summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c10
-rw-r--r--fs/9p/v9fs.c1
-rw-r--r--fs/9p/vfs_inode.c20
-rw-r--r--fs/9p/vfs_inode_dotl.c25
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/afs/inode.c8
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/rxrpc.c17
-rw-r--r--fs/aio.c6
-rw-r--r--fs/attr.c1
-rw-r--r--fs/autofs4/autofs_i.h1
-rw-r--r--fs/autofs4/dev-ioctl.c1
-rw-r--r--fs/autofs4/waitq.c1
-rw-r--r--fs/bad_inode.c4
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/binfmt_aout.c1
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/binfmt_elf_fdpic.c3
-rw-r--r--fs/binfmt_flat.c1
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c6
-rw-r--r--fs/btrfs/btrfs_inode.h31
-rw-r--r--fs/btrfs/compression.c43
-rw-r--r--fs/btrfs/compression.h30
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h40
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/dev-replace.h5
-rw-r--r--fs/btrfs/dir-item.c9
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c145
-rw-r--r--fs/btrfs/extent_io.c75
-rw-r--r--fs/btrfs/extent_io.h50
-rw-r--r--fs/btrfs/file-item.c40
-rw-r--r--fs/btrfs/file.c139
-rw-r--r--fs/btrfs/free-space-cache.c6
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c451
-rw-r--r--fs/btrfs/ioctl.c38
-rw-r--r--fs/btrfs/lzo.c12
-rw-r--r--fs/btrfs/ordered-data.c9
-rw-r--r--fs/btrfs/ordered-data.h7
-rw-r--r--fs/btrfs/relocation.c17
-rw-r--r--fs/btrfs/scrub.c11
-rw-r--r--fs/btrfs/send.c125
-rw-r--r--fs/btrfs/tests/inode-tests.c46
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c97
-rw-r--r--fs/btrfs/volumes.c21
-rw-r--r--fs/btrfs/volumes.h12
-rw-r--r--fs/btrfs/zlib.c9
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/ceph/addr.c20
-rw-r--r--fs/ceph/cache.c2
-rw-r--r--fs/ceph/caps.c42
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c32
-rw-r--r--fs/ceph/export.c3
-rw-r--r--fs/ceph/file.c106
-rw-r--r--fs/ceph/inode.c178
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/mds_client.c175
-rw-r--r--fs/ceph/mds_client.h15
-rw-r--r--fs/ceph/super.c9
-rw-r--r--fs/ceph/super.h18
-rw-r--r--fs/cifs/cifs_dfs_ref.c4
-rw-r--r--fs/cifs/cifs_unicode.h6
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h5
-rw-r--r--fs/cifs/cifspdu.h16
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c119
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/cifs/dir.c13
-rw-r--r--fs/cifs/inode.c7
-rw-r--r--fs/cifs/misc.c105
-rw-r--r--fs/cifs/sess.c4
-rw-r--r--fs/cifs/smb1ops.c1
-rw-r--r--fs/cifs/smb2file.c3
-rw-r--r--fs/cifs/smb2ops.c160
-rw-r--r--fs/cifs/smb2pdu.c154
-rw-r--r--fs/cifs/smb2pdu.h8
-rw-r--r--fs/cifs/smb2proto.h5
-rw-r--r--fs/coda/coda_linux.h2
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c7
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c1
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/crypto/keyinfo.c2
-rw-r--r--fs/dax.c1
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/inode.c13
-rw-r--r--fs/ecryptfs/read_write.c2
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c11
-rw-r--r--fs/exportfs/expfs.c4
-rw-r--r--fs/ext2/balloc.c1
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c6
-rw-r--r--fs/f2fs/checkpoint.c70
-rw-r--r--fs/f2fs/data.c192
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c38
-rw-r--r--fs/f2fs/extent_cache.c52
-rw-r--r--fs/f2fs/f2fs.h644
-rw-r--r--fs/f2fs/file.c42
-rw-r--r--fs/f2fs/gc.c79
-rw-r--r--fs/f2fs/inode.c4
-rw-r--r--fs/f2fs/namei.c18
-rw-r--r--fs/f2fs/node.c560
-rw-r--r--fs/f2fs/node.h33
-rw-r--r--fs/f2fs/recovery.c17
-rw-r--r--fs/f2fs/segment.c501
-rw-r--r--fs/f2fs/segment.h40
-rw-r--r--fs/f2fs/super.c138
-rw-r--r--fs/f2fs/xattr.c151
-rw-r--r--fs/f2fs/xattr.h7
-rw-r--r--fs/fat/fat.h4
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/fcntl.c1
-rw-r--r--fs/file.c2
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/fs_struct.c3
-rw-r--r--fs/fscache/object-list.c2
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/dir.c8
-rw-r--r--fs/fuse/file.c28
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/lock_dlm.c1
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/sys.c1
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/inode.c1
-rw-r--r--fs/hfsplus/inode.c1
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/iomap.c2
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/jffs2/background.c2
-rw-r--r--fs/jffs2/fs.c1
-rw-r--r--fs/jffs2/nodemgmt.c2
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/kernfs/inode.c8
-rw-r--r--fs/kernfs/kernfs-internal.h4
-rw-r--r--fs/libfs.c13
-rw-r--r--fs/lockd/svc.c4
-rw-r--r--fs/minix/inode.c11
-rw-r--r--fs/minix/minix.h2
-rw-r--r--fs/namei.c251
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/ncpfs/inode.c1
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/ncpfs/sock.c112
-rw-r--r--fs/nfs/cache_lib.c3
-rw-r--r--fs/nfs/callback.c1
-rw-r--r--fs/nfs/callback_xdr.c44
-rw-r--r--fs/nfs/dir.c36
-rw-r--r--fs/nfs/filelayout/filelayout.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c60
-rw-r--r--fs/nfs/inode.c15
-rw-r--r--fs/nfs/namespace.c9
-rw-r--r--fs/nfs/nfs42proc.c69
-rw-r--r--fs/nfs/nfs4_fs.h15
-rw-r--r--fs/nfs/nfs4idmap.c2
-rw-r--r--fs/nfs/nfs4proc.c390
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4session.h7
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4trace.h64
-rw-r--r--fs/nfs/nfs4xdr.c187
-rw-r--r--fs/nfs/super.c21
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs2acl.c1
-rw-r--r--fs/nfsd/nfs3acl.c1
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4proc.c88
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/nfs4xdr.c33
-rw-r--r--fs/nfsd/nfsctl.c70
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c18
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nfsd/vfs.c104
-rw-r--r--fs/nfsd/vfs.h9
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/notify/fanotify/fanotify_user.c1
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c1
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ocfs2/alloc.c1
-rw-r--r--fs/ocfs2/cluster/tcp.c1
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c1
-rw-r--r--fs/ocfs2/dlmfs/userdlm.c1
-rw-r--r--fs/ocfs2/dlmglue.c1
-rw-r--r--fs/ocfs2/file.c11
-rw-r--r--fs/ocfs2/file.h4
-rw-r--r--fs/ocfs2/super.c1
-rw-r--r--fs/omfs/inode.c1
-rw-r--r--fs/open.c14
-rw-r--r--fs/orangefs/inode.c13
-rw-r--r--fs/orangefs/orangefs-kernel.h7
-rw-r--r--fs/orangefs/super.c9
-rw-r--r--fs/overlayfs/copy_up.c100
-rw-r--r--fs/overlayfs/dir.c10
-rw-r--r--fs/overlayfs/inode.c8
-rw-r--r--fs/overlayfs/namei.c1
-rw-r--r--fs/overlayfs/overlayfs.h11
-rw-r--r--fs/overlayfs/ovl_entry.h3
-rw-r--r--fs/overlayfs/super.c40
-rw-r--r--fs/overlayfs/util.c32
-rw-r--r--fs/posix_acl.c1
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c17
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c6
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/kcore.c1
-rw-r--r--fs/proc/loadavg.c2
-rw-r--r--fs/proc/proc_net.c7
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/proc/root.c8
-rw-r--r--fs/proc/stat.c3
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/proc_namespace.c2
-rw-r--r--fs/quota/dquot.c1
-rw-r--r--fs/read_write.c133
-rw-r--r--fs/select.c4
-rw-r--r--fs/splice.c4
-rw-r--r--fs/stat.c215
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysv/itree.c7
-rw-r--r--fs/sysv/sysv.h2
-rw-r--r--fs/ubifs/dir.c6
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/udf/symlink.c5
-rw-r--r--fs/userfaultfd.c3
-rw-r--r--fs/xfs/kmem.c1
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_ioctl.c1
-rw-r--r--fs/xfs/xfs_iops.c9
-rw-r--r--fs/xfs/xfs_linux.h2
258 files changed, 5004 insertions, 3137 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 60fb474..ed4f851 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -91,10 +91,10 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
* dentry names.
*/
static int build_path_from_dentry(struct v9fs_session_info *v9ses,
- struct dentry *dentry, char ***names)
+ struct dentry *dentry, const unsigned char ***names)
{
int n = 0, i;
- char **wnames;
+ const unsigned char **wnames;
struct dentry *ds;
for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
@@ -105,7 +105,7 @@ static int build_path_from_dentry(struct v9fs_session_info *v9ses,
goto err_out;
for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent)
- wnames[i] = (char *)ds->d_name.name;
+ wnames[i] = ds->d_name.name;
*names = wnames;
return n;
@@ -117,7 +117,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
kuid_t uid, int any)
{
struct dentry *ds;
- char **wnames, *uname;
+ const unsigned char **wnames, *uname;
int i, n, l, clone, access;
struct v9fs_session_info *v9ses;
struct p9_fid *fid, *old_fid = NULL;
@@ -137,7 +137,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
fid = v9fs_fid_find(ds, uid, any);
if (fid) {
/* Found the parent fid do a lookup with that */
- fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
+ fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1);
goto fid_out;
}
up_read(&v9ses->rename_sem);
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 072e759..a89f3cf 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,6 +29,7 @@
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/parser.h>
#include <linux/idr.h>
#include <linux/slab.h>
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4f4450..2a5de61 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -643,7 +643,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
struct dentry *dentry, char *extension, u32 perm, u8 mode)
{
int err;
- char *name;
+ const unsigned char *name;
struct p9_fid *dfid, *ofid, *fid;
struct inode *inode;
@@ -652,7 +652,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
err = 0;
ofid = NULL;
fid = NULL;
- name = (char *) dentry->d_name.name;
+ name = dentry->d_name.name;
dfid = v9fs_parent_fid(dentry);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
@@ -788,7 +788,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
struct v9fs_session_info *v9ses;
struct p9_fid *dfid, *fid;
struct inode *inode;
- char *name;
+ const unsigned char *name;
p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n",
dir, dentry, dentry, flags);
@@ -802,7 +802,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_ERR(dfid))
return ERR_CAST(dfid);
- name = (char *) dentry->d_name.name;
+ name = dentry->d_name.name;
fid = p9_client_walk(dfid, 1, &name, 1);
if (IS_ERR(fid)) {
if (fid == ERR_PTR(-ENOENT)) {
@@ -1012,7 +1012,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
v9fs_blank_wstat(&wstat);
wstat.muid = v9ses->uname;
- wstat.name = (char *) new_dentry->d_name.name;
+ wstat.name = new_dentry->d_name.name;
retval = p9_client_wstat(oldfid, &wstat);
clunk_newdir:
@@ -1047,16 +1047,18 @@ done:
/**
* v9fs_vfs_getattr - retrieve file metadata
- * @mnt: mount information
- * @dentry: file to get attributes on
+ * @path: Object to query
* @stat: metadata structure to populate
+ * @request_mask: Mask of STATX_xxx flags indicating the caller's interests
+ * @flags: AT_STATX_xxx setting
*
*/
static int
-v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
struct p9_wstat *st;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5999bd0..70f9887 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -244,7 +244,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
int err = 0;
kgid_t gid;
umode_t mode;
- char *name = NULL;
+ const unsigned char *name = NULL;
struct p9_qid qid;
struct inode *inode;
struct p9_fid *fid = NULL;
@@ -269,7 +269,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
v9ses = v9fs_inode2v9ses(dir);
- name = (char *) dentry->d_name.name;
+ name = dentry->d_name.name;
p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
name, flags, omode);
@@ -385,7 +385,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
kgid_t gid;
- char *name;
+ const unsigned char *name;
umode_t mode;
struct inode *inode;
struct p9_qid qid;
@@ -416,7 +416,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
err);
goto error;
}
- name = (char *) dentry->d_name.name;
+ name = dentry->d_name.name;
err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
if (err < 0)
goto error;
@@ -468,9 +468,10 @@ error:
}
static int
-v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
struct p9_stat_dotl *st;
@@ -678,14 +679,14 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
{
int err;
kgid_t gid;
- char *name;
+ const unsigned char *name;
struct p9_qid qid;
struct inode *inode;
struct p9_fid *dfid;
struct p9_fid *fid = NULL;
struct v9fs_session_info *v9ses;
- name = (char *) dentry->d_name.name;
+ name = dentry->d_name.name;
p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname);
v9ses = v9fs_inode2v9ses(dir);
@@ -699,7 +700,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
gid = v9fs_get_fsgid_for_create(dir);
/* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
- err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+ err = p9_client_symlink(dfid, name, symname, gid, &qid);
if (err < 0) {
p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
@@ -775,7 +776,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
if (IS_ERR(oldfid))
return PTR_ERR(oldfid);
- err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+ err = p9_client_link(dfid, oldfid, dentry->d_name.name);
if (err < 0) {
p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
@@ -812,7 +813,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
{
int err;
kgid_t gid;
- char *name;
+ const unsigned char *name;
umode_t mode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
@@ -842,7 +843,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
err);
goto error;
}
- name = (char *) dentry->d_name.name;
+ name = dentry->d_name.name;
err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
if (err < 0)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index a5e6097..abcc598 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -10,6 +10,7 @@
* (C) 1991 Linus Torvalds - minix filesystem
*/
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/gfp.h>
#include "affs.h"
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 37532538..c2c27a8 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -16,6 +16,7 @@
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/slab.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 86cc726..1e4897a 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -375,12 +375,10 @@ error_unlock:
/*
* read the attributes of an inode
*/
-int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int afs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode;
-
- inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8acf367..5dfa569 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -533,7 +533,7 @@ extern struct inode *afs_iget(struct super_block *, struct key *,
struct afs_callback *);
extern void afs_zap_data(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int afs_setattr(struct dentry *, struct iattr *);
extern void afs_evict_inode(struct inode *);
extern int afs_drop_inode(struct inode *);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 95f4287..419ef05 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -10,6 +10,8 @@
*/
#include <linux/slab.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <rxrpc/packet.h>
@@ -260,8 +262,7 @@ void afs_flat_call_destructor(struct afs_call *call)
/*
* attach the data from a bunch of pages on an inode to a call
*/
-static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
- struct kvec *iov)
+static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
{
struct page *pages[8];
unsigned count, n, loop, offset, to;
@@ -284,20 +285,21 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
loop = 0;
do {
+ struct bio_vec bvec = {.bv_page = pages[loop],
+ .bv_offset = offset};
msg->msg_flags = 0;
to = PAGE_SIZE;
if (first + loop >= last)
to = call->last_to;
else
msg->msg_flags = MSG_MORE;
- iov->iov_base = kmap(pages[loop]) + offset;
- iov->iov_len = to - offset;
+ bvec.bv_len = to - offset;
offset = 0;
_debug("- range %u-%u%s",
offset, to, msg->msg_flags ? " [more]" : "");
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
- iov, 1, to - offset);
+ iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
+ &bvec, 1, to - offset);
/* have to change the state *before* sending the last
* packet as RxRPC might give us the reply before it
@@ -306,7 +308,6 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
msg, to - offset);
- kunmap(pages[loop]);
if (ret < 0)
break;
} while (++loop < count);
@@ -391,7 +392,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
goto error_do_abort;
if (call->send_pages) {
- ret = afs_send_pages(call, &msg, iov);
+ ret = afs_send_pages(call, &msg);
if (ret < 0)
goto error_do_abort;
}
diff --git a/fs/aio.c b/fs/aio.c
index 7e2ab9c..f52d925 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -20,7 +20,7 @@
#include <linux/backing-dev.h>
#include <linux/uio.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/mm.h>
@@ -1495,7 +1495,7 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
- ret = aio_ret(req, file->f_op->read_iter(req, &iter));
+ ret = aio_ret(req, call_read_iter(file, req, &iter));
kfree(iovec);
return ret;
}
@@ -1520,7 +1520,7 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
if (!ret) {
req->ki_flags |= IOCB_WRITE;
file_start_write(file);
- ret = aio_ret(req, file->f_op->write_iter(req, &iter));
+ ret = aio_ret(req, call_write_iter(file, req, &iter));
/*
* We release freeze protection in aio_complete(). Fool lockdep
* by telling it the lock got released so that it doesn't
diff --git a/fs/attr.c b/fs/attr.c
index c902b3d..1353041 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -9,6 +9,7 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/string.h>
+#include <linux/sched/signal.h>
#include <linux/capability.h>
#include <linux/fsnotify.h>
#include <linux/fcntl.h>
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c885daa..beef981 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/completion.h>
/* This is the range of ioctl() numbers we claim as ours */
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 806df74..734cbf8 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -17,6 +17,7 @@
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/magic.h>
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 79fbd85..24a58bf 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/signal.h>
+#include <linux/sched/signal.h>
#include <linux/file.h>
#include "autofs_i.h"
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f685c8..bb53728 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -89,8 +89,8 @@ static int bad_inode_permission(struct inode *inode, int mask)
return -EIO;
}
-static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int bad_inode_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
return -EIO;
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 1940716..c500e95 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -18,6 +18,7 @@
#include <linux/parser.h>
#include <linux/namei.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/exportfs.h>
#include "befs.h"
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 2a59139..9be82c4 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -25,6 +25,7 @@
#include <linux/init.h>
#include <linux/coredump.h>
#include <linux/slab.h>
+#include <linux/sched/task_stack.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 443a6f5..5075fd5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,10 @@
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
+#include <linux/cred.h>
#include <linux/dax.h>
#include <linux/uaccess.h>
#include <asm/param.h>
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ffca4bb..cf93a4f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -15,6 +15,9 @@
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/sched.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/errno.h>
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9b2917a..2edcefc 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/errno.h>
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9b4688a..bee1a36 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -12,7 +12,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/magic.h>
#include <linux/binfmts.h>
#include <linux/slab.h>
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 77c30f1..2eca00e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -870,6 +870,7 @@ static void init_once(void *foo)
#ifdef CONFIG_SYSFS
INIT_LIST_HEAD(&bdev->bd_holder_disks);
#endif
+ bdev->bd_bdi = &noop_backing_dev_info;
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -884,8 +885,10 @@ static void bdev_evict_inode(struct inode *inode)
spin_lock(&bdev_lock);
list_del_init(&bdev->bd_list);
spin_unlock(&bdev_lock);
- if (bdev->bd_bdi != &noop_backing_dev_info)
+ if (bdev->bd_bdi != &noop_backing_dev_info) {
bdi_put(bdev->bd_bdi);
+ bdev->bd_bdi = &noop_backing_dev_info;
+ }
}
static const struct super_operations bdev_sops = {
@@ -988,7 +991,6 @@ struct block_device *bdget(dev_t dev)
bdev->bd_contains = NULL;
bdev->bd_super = NULL;
bdev->bd_inode = inode;
- bdev->bd_bdi = &noop_backing_dev_info;
bdev->bd_block_size = i_blocksize(inode);
bdev->bd_part_count = 0;
bdev->bd_invalidated = 0;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 819a6d2..0c6baab 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -237,20 +237,20 @@ static inline u64 btrfs_ino(struct btrfs_inode *inode)
return ino;
}
-static inline void btrfs_i_size_write(struct inode *inode, u64 size)
+static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
{
- i_size_write(inode, size);
- BTRFS_I(inode)->disk_i_size = size;
+ i_size_write(&inode->vfs_inode, size);
+ inode->disk_i_size = size;
}
-static inline bool btrfs_is_free_space_inode(struct inode *inode)
+static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
if (root == root->fs_info->tree_root &&
- btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID)
+ btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
return true;
- if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+ if (inode->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
@@ -311,34 +311,33 @@ struct btrfs_dio_private {
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
-static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
+static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
{
- set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
+ set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
smp_mb();
}
-static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
+static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
{
smp_mb__before_atomic();
- clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
- &BTRFS_I(inode)->runtime_flags);
+ clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
}
-static inline void btrfs_print_data_csum_error(struct inode *inode,
+static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
/* Output minus objectid, which is more meaningful */
if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
btrfs_warn_rl(root->fs_info,
"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
- root->objectid, btrfs_ino(BTRFS_I(inode)),
+ root->objectid, btrfs_ino(inode),
logical_start, csum, csum_expected, mirror_num);
else
btrfs_warn_rl(root->fs_info,
"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
- root->objectid, btrfs_ino(BTRFS_I(inode)),
+ root->objectid, btrfs_ino(inode),
logical_start, csum, csum_expected, mirror_num);
}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 903c32c..c7721a6 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -100,7 +100,7 @@ static struct bio *compressed_bio_alloc(struct block_device *bdev,
return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags);
}
-static int check_compressed_csum(struct inode *inode,
+static int check_compressed_csum(struct btrfs_inode *inode,
struct compressed_bio *cb,
u64 disk_start)
{
@@ -111,7 +111,7 @@ static int check_compressed_csum(struct inode *inode,
u32 csum;
u32 *cb_sum = &cb->sums;
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ if (inode->flags & BTRFS_INODE_NODATASUM)
return 0;
for (i = 0; i < cb->nr_pages; i++) {
@@ -125,7 +125,7 @@ static int check_compressed_csum(struct inode *inode,
if (csum != *cb_sum) {
btrfs_print_data_csum_error(inode, disk_start, csum,
- *cb_sum, cb->mirror_num);
+ *cb_sum, cb->mirror_num);
ret = -EIO;
goto fail;
}
@@ -165,7 +165,7 @@ static void end_compressed_bio_read(struct bio *bio)
goto out;
inode = cb->inode;
- ret = check_compressed_csum(inode, cb,
+ ret = check_compressed_csum(BTRFS_I(inode), cb,
(u64)bio->bi_iter.bi_sector << 9);
if (ret)
goto csum_failed;
@@ -911,32 +911,28 @@ static void free_workspaces(void)
}
/*
- * given an address space and start/len, compress the bytes.
+ * Given an address space and start and length, compress the bytes into @pages
+ * that are allocated on demand.
*
- * pages are allocated to hold the compressed result and stored
- * in 'pages'
+ * @out_pages is an in/out parameter, holds maximum number of pages to allocate
+ * and returns number of actually allocated pages
*
- * out_pages is used to return the number of pages allocated. There
- * may be pages allocated even if we return an error
- *
- * total_in is used to return the number of bytes actually read. It
- * may be smaller then len if we had to exit early because we
+ * @total_in is used to return the number of bytes actually read. It
+ * may be smaller than the input length if we had to exit early because we
* ran out of room in the pages array or because we cross the
* max_out threshold.
*
- * total_out is used to return the total number of compressed bytes
+ * @total_out is an in/out parameter, must be set to the input length and will
+ * be also used to return the total number of compressed bytes
*
- * max_out tells us the max number of bytes that we're allowed to
+ * @max_out tells us the max number of bytes that we're allowed to
* stuff into pages
*/
int btrfs_compress_pages(int type, struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
+ u64 start, struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
+ unsigned long *total_out)
{
struct list_head *workspace;
int ret;
@@ -944,10 +940,9 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
workspace = find_workspace(type);
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
- start, len, pages,
- nr_dest_pages, out_pages,
- total_in, total_out,
- max_out);
+ start, pages,
+ out_pages,
+ total_in, total_out);
free_workspace(type, workspace);
return ret;
}
@@ -1015,7 +1010,7 @@ void btrfs_exit_compress(void)
*
* total_out is the last byte of the buffer
*/
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio *bio)
{
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 0987957..39ec43a 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,20 +19,32 @@
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
+/*
+ * We want to make sure that amount of RAM required to uncompress an extent is
+ * reasonable, so we limit the total size in ram of a compressed extent to
+ * 128k. This is a crucial number because it also controls how easily we can
+ * spread reads across cpus for decompression.
+ *
+ * We also want to make sure the amount of IO required to do a random read is
+ * reasonably small, so we limit the size of a compressed extent to 128k.
+ */
+
+/* Maximum length of compressed data stored on disk */
+#define BTRFS_MAX_COMPRESSED (SZ_128K)
+/* Maximum size of data before compression */
+#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
+
void btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(int type, struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
+ u64 start, struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out);
+ unsigned long *total_out);
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
unsigned long start_byte, size_t srclen, size_t destlen);
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio *bio);
@@ -59,13 +71,11 @@ struct btrfs_compress_op {
int (*compress_pages)(struct list_head *workspace,
struct address_space *mapping,
- u64 start, unsigned long len,
+ u64 start,
struct page **pages,
- unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out);
+ unsigned long *total_out);
int (*decompress_bio)(struct list_head *workspace,
struct page **pages_in,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1192bc7..7dc8844 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -453,8 +453,6 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
- BUG_ON(!tm);
-
tm->seq = btrfs_inc_tree_mod_seq(fs_info);
tm_root = &fs_info->tree_mod_log;
@@ -4159,6 +4157,9 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
/* try to push all the items before our slot into the next leaf */
slot = path->slots[0];
+ space_needed = data_size;
+ if (slot > 0)
+ space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -4214,6 +4215,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
if (wret < 0)
return wret;
if (wret) {
+ space_needed = data_size;
+ if (slot > 0)
+ space_needed -= btrfs_leaf_free_space(fs_info,
+ l);
wret = push_leaf_left(trans, root, path, space_needed,
space_needed, 0, (u32)-1);
if (wret < 0)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 105d4d4..29b7fc2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -20,6 +20,7 @@
#define __BTRFS_CTREE__
#include <linux/mm.h>
+#include <linux/sched/signal.h>
#include <linux/highmem.h>
#include <linux/fs.h>
#include <linux/rwsem.h>
@@ -2687,7 +2688,7 @@ enum btrfs_flush_state {
};
int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
-int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
@@ -2695,16 +2696,16 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct inode *inode);
-void btrfs_orphan_release_metadata(struct inode *inode);
+ struct btrfs_inode *inode);
+void btrfs_orphan_release_metadata(struct btrfs_inode *inode);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems,
u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
@@ -2982,7 +2983,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const char *name, int name_len);
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *name,
- int name_len, struct inode *dir,
+ int name_len, struct btrfs_inode *dir,
struct btrfs_key *location, u8 type, u64 index);
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -3081,7 +3082,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
-void btrfs_extent_item_to_extent_map(struct inode *inode,
+void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
const bool new_inline,
@@ -3100,9 +3101,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
int delay_iput);
void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create);
+struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
+ struct page *page, size_t pg_offset, u64 start,
+ u64 len, int create);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
@@ -3123,13 +3124,13 @@ static inline void btrfs_force_ra(struct address_space *mapping,
}
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
-int btrfs_set_inode_index(struct inode *dir, u64 *index);
+int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *dir, struct btrfs_inode *inode,
const char *name, int name_len);
int btrfs_add_link(struct btrfs_trans_handle *trans,
- struct inode *parent_inode, struct inode *inode,
+ struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index);
int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -3166,15 +3167,16 @@ void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *was_new);
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 end,
- int create);
+struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
+ struct page *page, size_t pg_offset,
+ u64 start, u64 end, int create);
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
+int btrfs_orphan_add(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@@ -3215,11 +3217,11 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
int btrfs_auto_defrag_init(void);
void btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct inode *inode);
+ struct btrfs_inode *inode);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
int skip_pinned);
extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
@@ -3233,7 +3235,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, u64 start,
u64 end, int drop_cache);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 start, u64 end);
+ struct btrfs_inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_pages(struct inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index f7a6ee5..1aff676 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1790,7 +1790,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
- btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+ btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
inode->i_mode = btrfs_stack_inode_mode(inode_item);
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5de280b9..e653921 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -304,8 +304,9 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
dev_replace->cursor_left_last_write_of_item;
}
-int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, char *tgtdev_name,
- u64 srcdevid, char *srcdev_name, int read_src)
+int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
+ const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
+ int read_src)
{
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_trans_handle *trans;
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 54ea12b..f94a768 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -27,8 +27,9 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
-int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, char *tgtdev_name,
- u64 srcdevid, char *srcdev_name, int read_src);
+int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
+ const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
+ int read_src);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 724504a..60a7506 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -80,7 +80,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
u32 data_size;
- BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info));
+ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info))
+ return -ENOSPC;
key.objectid = objectid;
key.type = BTRFS_XATTR_ITEM_KEY;
@@ -120,7 +121,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
*/
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, const char *name, int name_len,
- struct inode *dir, struct btrfs_key *location,
+ struct btrfs_inode *dir, struct btrfs_key *location,
u8 type, u64 index)
{
int ret = 0;
@@ -133,7 +134,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
struct btrfs_disk_key disk_key;
u32 data_size;
- key.objectid = btrfs_ino(BTRFS_I(dir));
+ key.objectid = btrfs_ino(dir);
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
@@ -174,7 +175,7 @@ second_insert:
btrfs_release_path(path);
ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
- name_len, BTRFS_I(dir), &disk_key, type, index);
+ name_len, dir, &disk_key, type, index);
out_free:
btrfs_free_path(path);
if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 207db02..08b74da 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -219,12 +219,12 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
*/
-static struct extent_map *btree_get_extent(struct inode *inode,
+static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
@@ -265,7 +265,7 @@ out:
return em;
}
-u32 btrfs_csum_data(char *data, u32 seed, size_t len)
+u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
{
return btrfs_crc32c(seed, data, len);
}
@@ -2205,11 +2205,9 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->delalloc_workers);
btrfs_destroy_workqueue(fs_info->workers);
btrfs_destroy_workqueue(fs_info->endio_workers);
- btrfs_destroy_workqueue(fs_info->endio_meta_workers);
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
btrfs_destroy_workqueue(fs_info->endio_repair_workers);
btrfs_destroy_workqueue(fs_info->rmw_workers);
- btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
btrfs_destroy_workqueue(fs_info->submit_workers);
@@ -2219,6 +2217,13 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
btrfs_destroy_workqueue(fs_info->extent_workers);
+ /*
+ * Now that all other work queues are destroyed, we can safely destroy
+ * the queues used for metadata I/O, since tasks from those other work
+ * queues can do metadata I/O operations.
+ */
+ btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+ btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
}
static void free_root_extent_buffers(struct btrfs_root *root)
@@ -3261,7 +3266,6 @@ fail_fsdev_sysfs:
fail_block_groups:
btrfs_put_block_group_cache(fs_info);
- btrfs_free_block_groups(fs_info);
fail_tree_roots:
free_root_pointers(fs_info, 1);
@@ -3269,6 +3273,7 @@ fail_tree_roots:
fail_sb_buffer:
btrfs_stop_all_workers(fs_info);
+ btrfs_free_block_groups(fs_info);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3448,7 +3453,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_set_super_bytenr(sb, bytenr);
crc = ~(u32)0;
- crc = btrfs_csum_data((char *)sb +
+ crc = btrfs_csum_data((const char *)sb +
BTRFS_CSUM_SIZE, crc,
BTRFS_SUPER_INFO_SIZE -
BTRFS_CSUM_SIZE);
@@ -3977,8 +3982,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfs_put_block_group_cache(fs_info);
- btrfs_free_block_groups(fs_info);
-
/*
* we must make sure there is not any read request to
* submit after we stopping all workers.
@@ -3986,6 +3989,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
+ btrfs_free_block_groups(fs_info);
+
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
free_root_pointers(fs_info, 1);
@@ -4653,9 +4658,12 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
}
static const struct extent_io_ops btree_extent_io_ops = {
- .readpage_end_io_hook = btree_readpage_end_io_hook,
- .readpage_io_failed_hook = btree_io_failed_hook,
+ /* mandatory callbacks */
.submit_bio_hook = btree_submit_bio_hook,
+ .readpage_end_io_hook = btree_readpage_end_io_hook,
/* note we're sharing with inode.c for the merge bio hook */
.merge_bio_hook = btrfs_merge_bio_hook,
+ .readpage_io_failed_hook = btree_io_failed_hook,
+
+ /* optional callbacks */
};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 0be2d4f..2e0ec29 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -116,7 +116,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
-u32 btrfs_csum_data(char *data, u32 seed, size_t len);
+u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, u8 *result);
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c35b966..be54776 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
@@ -4135,10 +4136,10 @@ static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
(may_use_included ? s_info->bytes_may_use : 0);
}
-int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 used;
int ret = 0;
@@ -4281,7 +4282,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
- ret = btrfs_alloc_data_chunk_ondemand(inode, len);
+ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
if (ret < 0)
return ret;
@@ -5742,10 +5743,10 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
/* Can only return 0 or -ENOSPC */
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
/*
* We always use trans->block_rsv here as we will have reserved space
* for our orphan when starting the transaction, using get_block_rsv()
@@ -5762,19 +5763,19 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
*/
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- trace_btrfs_space_reservation(fs_info, "orphan",
- btrfs_ino(BTRFS_I(inode)), num_bytes, 1);
+ trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
+ num_bytes, 1);
return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
}
-void btrfs_orphan_release_metadata(struct inode *inode)
+void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- trace_btrfs_space_reservation(fs_info, "orphan",
- btrfs_ino(BTRFS_I(inode)), num_bytes, 0);
+ trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
+ num_bytes, 0);
btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
}
@@ -5846,7 +5847,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
* reserved extents that need to be freed. This must be called with
* BTRFS_I(inode)->lock held.
*/
-static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
+static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
+ u64 num_bytes)
{
unsigned drop_inode_space = 0;
unsigned dropped_extents = 0;
@@ -5854,25 +5856,23 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
num_extents = count_max_extents(num_bytes);
ASSERT(num_extents);
- ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
- BTRFS_I(inode)->outstanding_extents -= num_extents;
+ ASSERT(inode->outstanding_extents >= num_extents);
+ inode->outstanding_extents -= num_extents;
- if (BTRFS_I(inode)->outstanding_extents == 0 &&
+ if (inode->outstanding_extents == 0 &&
test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags))
+ &inode->runtime_flags))
drop_inode_space = 1;
/*
* If we have more or the same amount of outstanding extents than we have
* reserved then we need to leave the reserved extents count alone.
*/
- if (BTRFS_I(inode)->outstanding_extents >=
- BTRFS_I(inode)->reserved_extents)
+ if (inode->outstanding_extents >= inode->reserved_extents)
return drop_inode_space;
- dropped_extents = BTRFS_I(inode)->reserved_extents -
- BTRFS_I(inode)->outstanding_extents;
- BTRFS_I(inode)->reserved_extents -= dropped_extents;
+ dropped_extents = inode->reserved_extents - inode->outstanding_extents;
+ inode->reserved_extents -= dropped_extents;
return dropped_extents + drop_inode_space;
}
@@ -5894,24 +5894,21 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
*
* This must be called with BTRFS_I(inode)->lock held.
*/
-static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
+static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
int reserve)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
u64 old_csums, num_csums;
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
- BTRFS_I(inode)->csum_bytes == 0)
+ if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0)
return 0;
- old_csums = btrfs_csum_bytes_to_leaves(fs_info,
- BTRFS_I(inode)->csum_bytes);
+ old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
if (reserve)
- BTRFS_I(inode)->csum_bytes += num_bytes;
+ inode->csum_bytes += num_bytes;
else
- BTRFS_I(inode)->csum_bytes -= num_bytes;
- num_csums = btrfs_csum_bytes_to_leaves(fs_info,
- BTRFS_I(inode)->csum_bytes);
+ inode->csum_bytes -= num_bytes;
+ num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
/* No change, no need to reserve more */
if (old_csums == num_csums)
@@ -5924,10 +5921,10 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums);
}
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
u64 to_reserve = 0;
u64 csum_bytes;
@@ -5959,25 +5956,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
schedule_timeout(1);
if (delalloc_lock)
- mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
+ mutex_lock(&inode->delalloc_mutex);
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
- spin_lock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
nr_extents = count_max_extents(num_bytes);
- BTRFS_I(inode)->outstanding_extents += nr_extents;
+ inode->outstanding_extents += nr_extents;
nr_extents = 0;
- if (BTRFS_I(inode)->outstanding_extents >
- BTRFS_I(inode)->reserved_extents)
- nr_extents += BTRFS_I(inode)->outstanding_extents -
- BTRFS_I(inode)->reserved_extents;
+ if (inode->outstanding_extents > inode->reserved_extents)
+ nr_extents += inode->outstanding_extents -
+ inode->reserved_extents;
/* We always want to reserve a slot for updating the inode. */
to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
- csum_bytes = BTRFS_I(inode)->csum_bytes;
- spin_unlock(&BTRFS_I(inode)->lock);
+ csum_bytes = inode->csum_bytes;
+ spin_unlock(&inode->lock);
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
ret = btrfs_qgroup_reserve_meta(root,
@@ -5993,38 +5989,38 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
goto out_fail;
}
- spin_lock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags)) {
+ &inode->runtime_flags)) {
to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
release_extra = true;
}
- BTRFS_I(inode)->reserved_extents += nr_extents;
- spin_unlock(&BTRFS_I(inode)->lock);
+ inode->reserved_extents += nr_extents;
+ spin_unlock(&inode->lock);
if (delalloc_lock)
- mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+ mutex_unlock(&inode->delalloc_mutex);
if (to_reserve)
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(BTRFS_I(inode)), to_reserve, 1);
+ btrfs_ino(inode), to_reserve, 1);
if (release_extra)
btrfs_block_rsv_release(fs_info, block_rsv,
btrfs_calc_trans_metadata_size(fs_info, 1));
return 0;
out_fail:
- spin_lock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
dropped = drop_outstanding_extent(inode, num_bytes);
/*
* If the inodes csum_bytes is the same as the original
* csum_bytes then we know we haven't raced with any free()ers
* so we can just reduce our inodes csum bytes and carry on.
*/
- if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
+ if (inode->csum_bytes == csum_bytes) {
calc_csum_metadata_size(inode, num_bytes, 0);
} else {
- u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
+ u64 orig_csum_bytes = inode->csum_bytes;
u64 bytes;
/*
@@ -6035,8 +6031,8 @@ out_fail:
* number of bytes that were freed while we were trying our
* reservation.
*/
- bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
- BTRFS_I(inode)->csum_bytes = csum_bytes;
+ bytes = csum_bytes - inode->csum_bytes;
+ inode->csum_bytes = csum_bytes;
to_free = calc_csum_metadata_size(inode, bytes, 0);
@@ -6045,7 +6041,7 @@ out_fail:
* been making this reservation and our ->csum_bytes were not
* artificially inflated.
*/
- BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
+ inode->csum_bytes = csum_bytes - num_bytes;
bytes = csum_bytes - orig_csum_bytes;
bytes = calc_csum_metadata_size(inode, bytes, 0);
@@ -6057,23 +6053,23 @@ out_fail:
* need to do anything, the other free-ers did the correct
* thing.
*/
- BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
+ inode->csum_bytes = orig_csum_bytes - num_bytes;
if (bytes > to_free)
to_free = bytes - to_free;
else
to_free = 0;
}
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_unlock(&inode->lock);
if (dropped)
to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
if (to_free) {
btrfs_block_rsv_release(fs_info, block_rsv, to_free);
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(BTRFS_I(inode)), to_free, 0);
+ btrfs_ino(inode), to_free, 0);
}
if (delalloc_lock)
- mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+ mutex_unlock(&inode->delalloc_mutex);
return ret;
}
@@ -6086,27 +6082,27 @@ out_fail:
* once we complete IO for a given set of bytes to release their metadata
* reservations.
*/
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
u64 to_free = 0;
unsigned dropped;
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
- spin_lock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
dropped = drop_outstanding_extent(inode, num_bytes);
if (num_bytes)
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_unlock(&inode->lock);
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
if (btrfs_is_testing(fs_info))
return;
- trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(BTRFS_I(inode)), to_free, 0);
+ trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
+ to_free, 0);
btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
}
@@ -6141,7 +6137,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
ret = btrfs_check_data_free_space(inode, start, len);
if (ret < 0)
return ret;
- ret = btrfs_delalloc_reserve_metadata(inode, len);
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
if (ret < 0)
btrfs_free_reserved_data_space(inode, start, len);
return ret;
@@ -6164,7 +6160,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
*/
void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
{
- btrfs_delalloc_release_metadata(inode, len);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
btrfs_free_reserved_data_space(inode, start, len);
}
@@ -9740,6 +9736,11 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
}
}
+/*
+ * Must be called only after stopping all workers, since we could have block
+ * group caching kthreads running, and therefore they could race with us if we
+ * freed the block groups before stopping them.
+ */
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
@@ -9779,9 +9780,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
list_del(&block_group->list);
up_write(&block_group->space_info->groups_sem);
- if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_block_group_cache_done(block_group);
-
/*
* We haven't cached this block group, which means we could
* possibly have excluded extents on this block group.
@@ -9791,6 +9789,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
free_excluded_extents(info, block_group);
btrfs_remove_free_space_cache(block_group);
+ ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
ASSERT(list_empty(&block_group->dirty_list));
ASSERT(list_empty(&block_group->io_list));
ASSERT(list_empty(&block_group->bg_list));
@@ -10342,7 +10341,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&trans->transaction->cache_write_mutex);
if (!IS_ERR(inode)) {
- ret = btrfs_orphan_add(trans, inode);
+ ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
goto out;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d15b5dd..28e8192 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -428,7 +428,8 @@ static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
- tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
+ tree->ops->clear_bit_hook(BTRFS_I(tree->mapping->host),
+ state, bits);
}
static void set_state_bits(struct extent_io_tree *tree,
@@ -1959,11 +1960,11 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
SetPageUptodate(page);
}
-int free_io_failure(struct inode *inode, struct io_failure_record *rec)
+int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
{
int ret;
int err = 0;
- struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+ struct extent_io_tree *failure_tree = &inode->io_failure_tree;
set_state_failrec(failure_tree, rec->start, NULL);
ret = clear_extent_bits(failure_tree, rec->start,
@@ -1972,7 +1973,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec)
if (ret)
err = ret;
- ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
+ ret = clear_extent_bits(&inode->io_tree, rec->start,
rec->start + rec->len - 1,
EXTENT_DAMAGED);
if (ret && !err)
@@ -1992,10 +1993,11 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec)
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
-int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
- struct page *page, unsigned int pg_offset, int mirror_num)
+int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
+ u64 logical, struct page *page,
+ unsigned int pg_offset, int mirror_num)
{
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio;
struct btrfs_device *dev;
u64 map_length = 0;
@@ -2054,7 +2056,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- btrfs_ino(BTRFS_I(inode)), start,
+ btrfs_ino(inode), start,
rcu_str_deref(dev->name), sector);
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
@@ -2074,7 +2076,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
- ret = repair_io_failure(fs_info->btree_inode, start,
+ ret = repair_io_failure(BTRFS_I(fs_info->btree_inode), start,
PAGE_SIZE, start, p,
start - page_offset(p), mirror_num);
if (ret)
@@ -2089,23 +2091,23 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
* each time an IO finishes, we do a fast check in the IO failure tree
* to see if we need to process or clean up an io_failure_record
*/
-int clean_io_failure(struct inode *inode, u64 start, struct page *page,
+int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
unsigned int pg_offset)
{
u64 private;
struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_state *state;
int num_copies;
int ret;
private = 0;
- ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
+ ret = count_range_bits(&inode->io_failure_tree, &private,
(u64)-1, 1, EXTENT_DIRTY, 0);
if (!ret)
return 0;
- ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start,
+ ret = get_state_failrec(&inode->io_failure_tree, start,
&failrec);
if (ret)
return 0;
@@ -2122,11 +2124,11 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
if (fs_info->sb->s_flags & MS_RDONLY)
goto out;
- spin_lock(&BTRFS_I(inode)->io_tree.lock);
- state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
+ spin_lock(&inode->io_tree.lock);
+ state = find_first_extent_bit_state(&inode->io_tree,
failrec->start,
EXTENT_LOCKED);
- spin_unlock(&BTRFS_I(inode)->io_tree.lock);
+ spin_unlock(&inode->io_tree.lock);
if (state && state->start <= failrec->start &&
state->end >= failrec->start + failrec->len - 1) {
@@ -2151,9 +2153,9 @@ out:
* - under ordered extent
* - the inode is freeing
*/
-void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
+void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
{
- struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+ struct extent_io_tree *failure_tree = &inode->io_failure_tree;
struct io_failure_record *failrec;
struct extent_state *state, *next;
@@ -2393,7 +2395,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
if (!ret) {
- free_io_failure(inode, failrec);
+ free_io_failure(BTRFS_I(inode), failrec);
return -EIO;
}
@@ -2406,7 +2408,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
(int)phy_offset, failed_bio->bi_end_io,
NULL);
if (!bio) {
- free_io_failure(inode, failrec);
+ free_io_failure(BTRFS_I(inode), failrec);
return -EIO;
}
bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -2418,7 +2420,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
failrec->bio_flags, 0);
if (ret) {
- free_io_failure(inode, failrec);
+ free_io_failure(BTRFS_I(inode), failrec);
bio_put(bio);
}
@@ -2435,12 +2437,9 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (tree->ops && tree->ops->writepage_end_io_hook) {
- ret = tree->ops->writepage_end_io_hook(page, start,
- end, NULL, uptodate);
- if (ret)
- uptodate = 0;
- }
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start, end, NULL,
+ uptodate);
if (!uptodate) {
ClearPageUptodate(page);
@@ -2568,21 +2567,21 @@ static void end_bio_extent_readpage(struct bio *bio)
len = bvec->bv_len;
mirror = io_bio->mirror_num;
- if (likely(uptodate && tree->ops &&
- tree->ops->readpage_end_io_hook)) {
+ if (likely(uptodate && tree->ops)) {
ret = tree->ops->readpage_end_io_hook(io_bio, offset,
page, start, end,
mirror);
if (ret)
uptodate = 0;
else
- clean_io_failure(inode, start, page, 0);
+ clean_io_failure(BTRFS_I(inode), start,
+ page, 0);
}
if (likely(uptodate))
goto readpage_ok;
- if (tree->ops && tree->ops->readpage_io_failed_hook) {
+ if (tree->ops) {
ret = tree->ops->readpage_io_failed_hook(page, mirror);
if (!ret && !bio->bi_error)
uptodate = 1;
@@ -2731,7 +2730,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
bio->bi_private = NULL;
bio_get(bio);
- if (tree->ops && tree->ops->submit_bio_hook)
+ if (tree->ops)
ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
mirror_num, bio_flags, start);
else
@@ -2746,7 +2745,7 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
unsigned long bio_flags)
{
int ret = 0;
- if (tree->ops && tree->ops->merge_bio_hook)
+ if (tree->ops)
ret = tree->ops->merge_bio_hook(page, offset, size, bio,
bio_flags);
return ret;
@@ -2857,7 +2856,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
*em_cached = NULL;
}
- em = get_extent(inode, page, pg_offset, start, len, 0);
+ em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
if (em_cached && !IS_ERR_OR_NULL(em)) {
BUG_ON(*em_cached);
atomic_inc(&em->refs);
@@ -3101,7 +3100,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
inode = pages[0]->mapping->host;
while (1) {
lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(inode, start,
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
end - start + 1);
if (!ordered)
break;
@@ -3173,7 +3172,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
while (1) {
lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(inode, start,
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
PAGE_SIZE);
if (!ordered)
break;
@@ -3370,7 +3369,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page_end, NULL, 1);
break;
}
- em = epd->get_extent(inode, page, pg_offset, cur,
+ em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur,
end - cur + 1, 1);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
@@ -4335,7 +4334,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (len == 0)
break;
len = ALIGN(len, sectorsize);
- em = get_extent(inode, NULL, 0, offset, len, 0);
+ em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0);
if (IS_ERR_OR_NULL(em))
return em;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 270d03b..3e4fad4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -84,6 +84,7 @@ extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
struct extent_state;
struct btrfs_root;
+struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
@@ -91,24 +92,34 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset);
struct extent_io_ops {
- int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written);
- int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
+ /*
+ * The following callbacks must be allways defined, the function
+ * pointer will be called unconditionally.
+ */
extent_submit_bio_hook_t *submit_bio_hook;
+ int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
+ struct page *page, u64 start, u64 end,
+ int mirror);
int (*merge_bio_hook)(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
- int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
- struct page *page, u64 start, u64 end,
- int mirror);
- int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
+
+ /*
+ * Optional hooks, called if the pointer is not NULL
+ */
+ int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written);
+
+ int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
+ void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned *bits);
- void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
- unsigned *bits);
+ void (*clear_bit_hook)(struct btrfs_inode *inode,
+ struct extent_state *state,
+ unsigned *bits);
void (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
@@ -209,7 +220,7 @@ static inline int extent_compress_type(unsigned long bio_flags)
struct extent_map_tree;
-typedef struct extent_map *(get_extent_t)(struct inode *inode,
+typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
struct page *page,
size_t pg_offset,
u64 start, u64 len,
@@ -451,12 +462,13 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
struct btrfs_fs_info;
+struct btrfs_inode;
-int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
- struct page *page, unsigned int pg_offset,
- int mirror_num);
-int clean_io_failure(struct inode *inode, u64 start, struct page *page,
- unsigned int pg_offset);
+int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
+ u64 logical, struct page *page,
+ unsigned int pg_offset, int mirror_num);
+int clean_io_failure(struct btrfs_inode *inode, u64 start,
+ struct page *page, unsigned int pg_offset);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int mirror_num);
@@ -480,7 +492,9 @@ struct io_failure_record {
int in_validation;
};
-void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
+
+void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
+ u64 end);
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret);
int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
@@ -489,7 +503,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data);
-int free_io_failure(struct inode *inode, struct io_failure_record *rec);
+int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
noinline u64 find_lock_delalloc_range(struct inode *inode,
struct extent_io_tree *tree,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f7b9a92..64fcb31 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -214,7 +214,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
- if (btrfs_is_free_space_inode(inode)) {
+ if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
path->search_commit_root = 1;
path->skip_locking = 1;
}
@@ -643,7 +643,33 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
/* delete the entire item, it is inside our range */
if (key.offset >= bytenr && csum_end <= end_byte) {
- ret = btrfs_del_item(trans, root, path);
+ int del_nr = 1;
+
+ /*
+ * Check how many csum items preceding this one in this
+ * leaf correspond to our range and then delete them all
+ * at once.
+ */
+ if (key.offset > bytenr && path->slots[0] > 0) {
+ int slot = path->slots[0] - 1;
+
+ while (slot >= 0) {
+ struct btrfs_key pk;
+
+ btrfs_item_key_to_cpu(leaf, &pk, slot);
+ if (pk.offset < bytenr ||
+ pk.type != BTRFS_EXTENT_CSUM_KEY ||
+ pk.objectid !=
+ BTRFS_EXTENT_CSUM_OBJECTID)
+ break;
+ path->slots[0] = slot;
+ del_nr++;
+ key.offset = pk.offset;
+ slot--;
+ }
+ }
+ ret = btrfs_del_items(trans, root, path,
+ path->slots[0], del_nr);
if (ret)
goto out;
if (key.offset == bytenr)
@@ -904,14 +930,14 @@ fail_unlock:
goto out;
}
-void btrfs_extent_item_to_extent_map(struct inode *inode,
+void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
const bool new_inline,
struct extent_map *em)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
struct btrfs_key key;
@@ -976,8 +1002,8 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
}
} else {
btrfs_err(fs_info,
- "unknown file extent item type %d, inode %llu, offset %llu, root %llu",
- type, btrfs_ino(BTRFS_I(inode)), extent_start,
+ "unknown file extent item type %d, inode %llu, offset %llu, "
+ "root %llu", type, btrfs_ino(inode), extent_start,
root->root_key.objectid);
}
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1d2a07..520cb72 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -92,10 +92,10 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
* If an existing record is found the defrag item you
* pass in is freed
*/
-static int __btrfs_add_inode_defrag(struct inode *inode,
+static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct inode_defrag *entry;
struct rb_node **p;
struct rb_node *parent = NULL;
@@ -123,7 +123,7 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
return -EEXIST;
}
}
- set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
+ set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
rb_link_node(&defrag->rb_node, parent, p);
rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
return 0;
@@ -145,10 +145,10 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
* enabled
*/
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
struct inode_defrag *defrag;
u64 transid;
int ret;
@@ -156,24 +156,24 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (!__need_auto_defrag(fs_info))
return 0;
- if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
+ if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
return 0;
if (trans)
transid = trans->transid;
else
- transid = BTRFS_I(inode)->root->last_trans;
+ transid = inode->root->last_trans;
defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
if (!defrag)
return -ENOMEM;
- defrag->ino = btrfs_ino(BTRFS_I(inode));
+ defrag->ino = btrfs_ino(inode);
defrag->transid = transid;
defrag->root = root->root_key.objectid;
spin_lock(&fs_info->defrag_inodes_lock);
- if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
+ if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
/*
* If we set IN_DEFRAG flag and evict the inode from memory,
* and then re-read this inode, this new inode doesn't have
@@ -194,10 +194,10 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
* the same inode in the tree, we will merge them together (by
* __btrfs_add_inode_defrag()) and free the one that we want to requeue.
*/
-static void btrfs_requeue_inode_defrag(struct inode *inode,
+static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
int ret;
if (!__need_auto_defrag(fs_info))
@@ -334,7 +334,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
*/
if (num_defrag == BTRFS_DEFRAG_BATCH) {
defrag->last_offset = range.start;
- btrfs_requeue_inode_defrag(inode, defrag);
+ btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
} else if (defrag->last_offset && !defrag->cycled) {
/*
* we didn't fill our defrag batch, but
@@ -343,7 +343,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
*/
defrag->last_offset = 0;
defrag->cycled = 1;
- btrfs_requeue_inode_defrag(inode, defrag);
+ btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
} else {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
}
@@ -529,13 +529,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
* this drops all the extents in the cache that intersect the range
* [start, end]. Existing extents are split as required.
*/
-void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
int skip_pinned)
{
struct extent_map *em;
struct extent_map *split = NULL;
struct extent_map *split2 = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
u64 len = end - start + 1;
u64 gen;
int ret;
@@ -720,7 +720,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
int leafs_visited = 0;
if (drop_cache)
- btrfs_drop_extent_cache(inode, start, end - 1, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
modify_tree = 0;
@@ -1082,10 +1082,10 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
* two or three.
*/
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 start, u64 end)
+ struct btrfs_inode *inode, u64 start, u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
@@ -1102,7 +1102,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_slot = 0;
int recow;
int ret;
- u64 ino = btrfs_ino(BTRFS_I(inode));
+ u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
if (!path)
@@ -1415,13 +1415,13 @@ fail:
* the other < 0 number - Something wrong happens
*/
static noinline int
-lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
+lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos,
size_t write_bytes,
u64 *lockstart, u64 *lockend,
struct extent_state **cached_state)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
u64 start_pos;
u64 last_pos;
int i;
@@ -1432,30 +1432,30 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
+ round_up(pos + write_bytes - start_pos,
fs_info->sectorsize) - 1;
- if (start_pos < inode->i_size) {
+ if (start_pos < inode->vfs_inode.i_size) {
struct btrfs_ordered_extent *ordered;
- lock_extent_bits(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos, cached_state);
+ lock_extent_bits(&inode->io_tree, start_pos, last_pos,
+ cached_state);
ordered = btrfs_lookup_ordered_range(inode, start_pos,
last_pos - start_pos + 1);
if (ordered &&
ordered->file_offset + ordered->len > start_pos &&
ordered->file_offset <= last_pos) {
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos,
- cached_state, GFP_NOFS);
+ unlock_extent_cached(&inode->io_tree, start_pos,
+ last_pos, cached_state, GFP_NOFS);
for (i = 0; i < num_pages; i++) {
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_start_ordered_extent(&inode->vfs_inode,
+ ordered, 1);
btrfs_put_ordered_extent(ordered);
return -EAGAIN;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
+ clear_extent_bit(&inode->io_tree, start_pos,
last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, cached_state, GFP_NOFS);
@@ -1474,11 +1474,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
return ret;
}
-static noinline int check_can_nocow(struct inode *inode, loff_t pos,
+static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
u64 num_bytes;
@@ -1493,19 +1493,20 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
fs_info->sectorsize) - 1;
while (1) {
- lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+ lock_extent(&inode->io_tree, lockstart, lockend);
ordered = btrfs_lookup_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (!ordered) {
break;
}
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
- btrfs_start_ordered_extent(inode, ordered, 1);
+ unlock_extent(&inode->io_tree, lockstart, lockend);
+ btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
num_bytes = lockend - lockstart + 1;
- ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
+ ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
+ NULL, NULL, NULL);
if (ret <= 0) {
ret = 0;
btrfs_end_write_no_snapshoting(root);
@@ -1514,7 +1515,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
num_bytes - pos + lockstart);
}
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+ unlock_extent(&inode->io_tree, lockstart, lockend);
return ret;
}
@@ -1579,7 +1580,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (ret < 0) {
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) &&
- check_can_nocow(inode, pos, &write_bytes) > 0) {
+ check_can_nocow(BTRFS_I(inode), pos,
+ &write_bytes) > 0) {
/*
* For nodata cow case, no need to reserve
* data space.
@@ -1599,7 +1601,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
}
- ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+ reserve_bytes);
if (ret) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode, pos,
@@ -1623,9 +1626,9 @@ again:
if (ret)
break;
- ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
- pos, write_bytes, &lockstart,
- &lockend, &cached_state);
+ ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
+ num_pages, pos, write_bytes, &lockstart,
+ &lockend, &cached_state);
if (ret < 0) {
if (ret == -EAGAIN)
goto again;
@@ -1677,7 +1680,7 @@ again:
spin_unlock(&BTRFS_I(inode)->lock);
}
if (only_release_metadata) {
- btrfs_delalloc_release_metadata(inode,
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes);
} else {
u64 __pos;
@@ -1738,7 +1741,8 @@ again:
if (release_bytes) {
if (only_release_metadata) {
btrfs_end_write_no_snapshoting(root);
- btrfs_delalloc_release_metadata(inode, release_bytes);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ release_bytes);
} else {
btrfs_delalloc_release_space(inode,
round_down(pos, fs_info->sectorsize),
@@ -2193,7 +2197,7 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
-static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
+static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
int slot, u64 start, u64 end)
{
struct btrfs_file_extent_item *fi;
@@ -2203,7 +2207,7 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
return 0;
btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+ if (key.objectid != btrfs_ino(inode) ||
key.type != BTRFS_EXTENT_DATA_KEY)
return 0;
@@ -2222,22 +2226,23 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
return 0;
}
-static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
- struct btrfs_path *path, u64 offset, u64 end)
+static int fill_holes(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path, u64 offset, u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct extent_map *hole_em;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
struct btrfs_key key;
int ret;
if (btrfs_fs_incompat(fs_info, NO_HOLES))
goto out;
- key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.objectid = btrfs_ino(inode);
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = offset;
@@ -2253,7 +2258,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
}
leaf = path->nodes[0];
- if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
+ if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) {
u64 num_bytes;
path->slots[0]--;
@@ -2285,7 +2290,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
}
btrfs_release_path(path);
- ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
if (ret)
return ret;
@@ -2296,8 +2301,7 @@ out:
hole_em = alloc_extent_map();
if (!hole_em) {
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
} else {
hole_em->start = offset;
hole_em->len = end - offset;
@@ -2320,7 +2324,7 @@ out:
free_extent_map(hole_em);
if (ret)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
}
return 0;
@@ -2337,7 +2341,7 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
struct extent_map *em;
int ret = 0;
- em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
if (IS_ERR_OR_NULL(em)) {
if (!em)
ret = -ENOMEM;
@@ -2550,8 +2554,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
trans->block_rsv = &fs_info->trans_block_rsv;
if (cur_offset < drop_end && cur_offset < ino_size) {
- ret = fill_holes(trans, inode, path, cur_offset,
- drop_end);
+ ret = fill_holes(trans, BTRFS_I(inode), path,
+ cur_offset, drop_end);
if (ret) {
/*
* If we failed then we didn't insert our hole
@@ -2622,7 +2626,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
* cur_offset == drop_end).
*/
if (cur_offset < ino_size && cur_offset < drop_end) {
- ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+ ret = fill_holes(trans, BTRFS_I(inode), path,
+ cur_offset, drop_end);
if (ret) {
/* Same comment as above. */
btrfs_abort_transaction(trans, ret);
@@ -2747,7 +2752,8 @@ static long btrfs_fallocate(struct file *file, int mode,
*
* For qgroup space, it will be checked later.
*/
- ret = btrfs_alloc_data_chunk_ondemand(inode, alloc_end - alloc_start);
+ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
+ alloc_end - alloc_start);
if (ret < 0)
return ret;
@@ -2827,7 +2833,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
INIT_LIST_HEAD(&reserve_list);
while (1) {
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
if (IS_ERR_OR_NULL(em)) {
if (!em)
@@ -2954,7 +2960,8 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
&cached_state);
while (start < inode->i_size) {
- em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
+ em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0,
+ start, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
em = NULL;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1a131f7..da6841e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,7 @@
#include <linux/pagemap.h>
#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/math64.h>
#include <linux/ratelimit.h>
@@ -260,7 +261,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
btrfs_free_path(path);
}
- btrfs_i_size_write(inode, 0);
+ btrfs_i_size_write(BTRFS_I(inode), 0);
truncate_pagecache(inode, 0);
/*
@@ -3545,7 +3546,8 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
if (ret) {
if (release_metadata)
- btrfs_delalloc_release_metadata(inode, inode->i_size);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ inode->i_size);
#ifdef DEBUG
btrfs_err(fs_info,
"failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 3bbb8f0..5c6c20e 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -499,7 +499,7 @@ again:
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
- btrfs_delalloc_release_metadata(inode, prealloc);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
goto out_put;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f02823f..c40060c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -316,8 +316,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
}
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
- btrfs_delalloc_release_metadata(inode, end + 1 - start);
- btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
+ btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
out:
/*
* Don't forget to free the reserved space, as for inlined extent
@@ -389,12 +389,12 @@ static inline int inode_need_compress(struct inode *inode)
return 0;
}
-static inline void inode_should_defrag(struct inode *inode,
+static inline void inode_should_defrag(struct btrfs_inode *inode,
u64 start, u64 end, u64 num_bytes, u64 small_write)
{
/* If this is a small write inside eof, kick off a defrag */
if (num_bytes < small_write &&
- (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+ (start > 0 || end + 1 < inode->disk_i_size))
btrfs_add_inode_defrag(NULL, inode);
}
@@ -430,23 +430,23 @@ static noinline void compress_file_range(struct inode *inode,
int ret = 0;
struct page **pages = NULL;
unsigned long nr_pages;
- unsigned long nr_pages_ret = 0;
unsigned long total_compressed = 0;
unsigned long total_in = 0;
- unsigned long max_compressed = SZ_128K;
- unsigned long max_uncompressed = SZ_128K;
int i;
int will_compress;
int compress_type = fs_info->compress_type;
int redirty = 0;
- inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
+ inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
+ SZ_16K);
actual_end = min_t(u64, isize, end + 1);
again:
will_compress = 0;
nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
- nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE);
+ BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
+ nr_pages = min_t(unsigned long, nr_pages,
+ BTRFS_MAX_COMPRESSED / PAGE_SIZE);
/*
* we don't want to send crud past the end of i_size through
@@ -471,17 +471,8 @@ again:
(start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
goto cleanup_and_bail_uncompressed;
- /* we want to make sure that amount of ram required to uncompress
- * an extent is reasonable, so we limit the total size in ram
- * of a compressed extent to 128k. This is a crucial number
- * because it also controls how easily we can spread reads across
- * cpus for decompression.
- *
- * We also want to make sure the amount of IO required to do
- * a random read is reasonably small, so we limit the size of
- * a compressed extent to 128k.
- */
- total_compressed = min(total_compressed, max_uncompressed);
+ total_compressed = min_t(unsigned long, total_compressed,
+ BTRFS_MAX_UNCOMPRESSED);
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
total_in = 0;
@@ -516,16 +507,15 @@ again:
redirty = 1;
ret = btrfs_compress_pages(compress_type,
inode->i_mapping, start,
- total_compressed, pages,
- nr_pages, &nr_pages_ret,
+ pages,
+ &nr_pages,
&total_in,
- &total_compressed,
- max_compressed);
+ &total_compressed);
if (!ret) {
unsigned long offset = total_compressed &
(PAGE_SIZE - 1);
- struct page *page = pages[nr_pages_ret - 1];
+ struct page *page = pages[nr_pages - 1];
char *kaddr;
/* zero the tail end of the last page, we might be
@@ -606,7 +596,7 @@ cont:
* will submit them to the elevator.
*/
add_async_extent(async_cow, start, num_bytes,
- total_compressed, pages, nr_pages_ret,
+ total_compressed, pages, nr_pages,
compress_type);
if (start + num_bytes < end) {
@@ -623,14 +613,14 @@ cont:
* the compression code ran but failed to make things smaller,
* free any pages it allocated and our page pointer array
*/
- for (i = 0; i < nr_pages_ret; i++) {
+ for (i = 0; i < nr_pages; i++) {
WARN_ON(pages[i]->mapping);
put_page(pages[i]);
}
kfree(pages);
pages = NULL;
total_compressed = 0;
- nr_pages_ret = 0;
+ nr_pages = 0;
/* flag the file so we don't compress in the future */
if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
@@ -659,7 +649,7 @@ cleanup_and_bail_uncompressed:
return;
free_pages_out:
- for (i = 0; i < nr_pages_ret; i++) {
+ for (i = 0; i < nr_pages; i++) {
WARN_ON(pages[i]->mapping);
put_page(pages[i]);
}
@@ -806,7 +796,8 @@ retry:
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
if (ret) {
- btrfs_drop_extent_cache(inode, async_extent->start,
+ btrfs_drop_extent_cache(BTRFS_I(inode),
+ async_extent->start,
async_extent->start +
async_extent->ram_size - 1, 0);
goto out_free_reserve;
@@ -933,7 +924,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map *em;
int ret = 0;
- if (btrfs_is_free_space_inode(inode)) {
+ if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
WARN_ON_ONCE(1);
ret = -EINVAL;
goto out_unlock;
@@ -943,7 +934,7 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
- inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
+ inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
if (start == 0) {
/* lets try to make an inline extent */
@@ -971,7 +962,8 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_super_total_bytes(fs_info->super_copy));
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
- btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), start,
+ start + num_bytes - 1, 0);
while (disk_num_bytes > 0) {
unsigned long op;
@@ -1039,7 +1031,7 @@ out:
return ret;
out_drop_extent_cache:
- btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
out_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
@@ -1231,7 +1223,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
return -ENOMEM;
}
- nolock = btrfs_is_free_space_inode(inode);
+ nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
cow_start = (u64)-1;
cur_offset = start;
@@ -1331,10 +1323,16 @@ next_slot:
* either valid or do not exist.
*/
if (csum_exist_in_range(fs_info, disk_bytenr,
- num_bytes))
+ num_bytes)) {
+ if (!nolock)
+ btrfs_end_write_no_snapshoting(root);
goto out_check;
- if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
+ }
+ if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
+ if (!nolock)
+ btrfs_end_write_no_snapshoting(root);
goto out_check;
+ }
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
@@ -1629,15 +1627,15 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
}
static void btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
spin_lock(&root->delalloc_lock);
- if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
- list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+ if (!list_empty(&inode->delalloc_inodes)) {
+ list_del_init(&inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
root->nr_delalloc_inodes--;
if (!root->nr_delalloc_inodes) {
spin_lock(&fs_info->delalloc_root_lock);
@@ -1670,7 +1668,7 @@ static void btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !btrfs_is_free_space_inode(inode);
+ bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1700,18 +1698,18 @@ static void btrfs_set_bit_hook(struct inode *inode,
/*
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
-static void btrfs_clear_bit_hook(struct inode *inode,
+static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
struct extent_state *state,
unsigned *bits)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(len);
- spin_lock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
- BTRFS_I(inode)->defrag_bytes -= len;
- spin_unlock(&BTRFS_I(inode)->lock);
+ inode->defrag_bytes -= len;
+ spin_unlock(&inode->lock);
/*
* set_bit and clear bit hooks normally require _irqsave/restore
@@ -1719,15 +1717,15 @@ static void btrfs_clear_bit_hook(struct inode *inode,
* bit, which is only set or cleared with irqs on
*/
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents -= num_extents;
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
+ inode->outstanding_extents -= num_extents;
+ spin_unlock(&inode->lock);
}
/*
@@ -1747,18 +1745,19 @@ static void btrfs_clear_bit_hook(struct inode *inode,
&& do_list && !(state->state & EXTENT_NORESERVE)
&& (*bits & (EXTENT_DO_ACCOUNTING |
EXTENT_CLEAR_DATA_RESV)))
- btrfs_free_reserved_data_space_noquota(inode,
+ btrfs_free_reserved_data_space_noquota(
+ &inode->vfs_inode,
state->start, len);
__percpu_counter_add(&fs_info->delalloc_bytes, -len,
fs_info->delalloc_batch);
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->delalloc_bytes -= len;
- if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
+ spin_lock(&inode->lock);
+ inode->delalloc_bytes -= len;
+ if (do_list && inode->delalloc_bytes == 0 &&
test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &BTRFS_I(inode)->runtime_flags))
+ &inode->runtime_flags))
btrfs_del_delalloc_inode(root, inode);
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_unlock(&inode->lock);
}
}
@@ -1854,7 +1853,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- if (btrfs_is_free_space_inode(inode))
+ if (btrfs_is_free_space_inode(BTRFS_I(inode)))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
if (bio_op(bio) != REQ_OP_WRITE) {
@@ -1963,7 +1962,7 @@ again:
if (PagePrivate2(page))
goto out;
- ordered = btrfs_lookup_ordered_range(inode, page_start,
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
PAGE_SIZE);
if (ordered) {
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
@@ -2793,16 +2792,17 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
bool nolock;
bool truncated = false;
- nolock = btrfs_is_free_space_inode(inode);
+ nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
goto out;
}
- btrfs_free_io_failure_record(inode, ordered_extent->file_offset,
- ordered_extent->file_offset +
- ordered_extent->len - 1);
+ btrfs_free_io_failure_record(BTRFS_I(inode),
+ ordered_extent->file_offset,
+ ordered_extent->file_offset +
+ ordered_extent->len - 1);
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
@@ -2873,7 +2873,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compress_type);
- ret = btrfs_mark_extent_written(trans, inode,
+ ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
ordered_extent->file_offset,
ordered_extent->file_offset +
logical_len);
@@ -2914,7 +2914,8 @@ out_unlock:
ordered_extent->len - 1, &cached_state, GFP_NOFS);
out:
if (root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ ordered_extent->len);
if (trans)
btrfs_end_transaction(trans);
@@ -2929,7 +2930,7 @@ out:
clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
/* Drop the cache for the part of the extent we didn't write. */
- btrfs_drop_extent_cache(inode, start, end, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
/*
* If the ordered extent had an IOERR or something else went
@@ -2977,7 +2978,7 @@ static void finish_ordered_fn(struct btrfs_work *work)
btrfs_finish_ordered_io(ordered_extent);
}
-static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
+static void btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate)
{
struct inode *inode = page->mapping->host;
@@ -2991,9 +2992,9 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
ClearPagePrivate2(page);
if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1, uptodate))
- return 0;
+ return;
- if (btrfs_is_free_space_inode(inode)) {
+ if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
wq = fs_info->endio_freespace_worker;
func = btrfs_freespace_write_helper;
} else {
@@ -3004,8 +3005,6 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
NULL);
btrfs_queue_work(wq, &ordered_extent->work);
-
- return 0;
}
static int __readpage_endio_check(struct inode *inode,
@@ -3028,7 +3027,7 @@ static int __readpage_endio_check(struct inode *inode,
kunmap_atomic(kaddr);
return 0;
zeroit:
- btrfs_print_data_csum_error(inode, start, csum, csum_expected,
+ btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
io_bio->mirror_num);
memset(kaddr + pgoff, 1, len);
flush_dcache_page(page);
@@ -3167,10 +3166,11 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
* NOTE: caller of this function should reserve 5 units of metadata for
* this function.
*/
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
+int btrfs_orphan_add(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = NULL;
int reserve = 0;
int insert = 0;
@@ -3192,7 +3192,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
}
if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags)) {
+ &inode->runtime_flags)) {
#if 0
/*
* For proper ENOSPC handling, we should do orphan
@@ -3209,7 +3209,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
}
if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags))
+ &inode->runtime_flags))
reserve = 1;
spin_unlock(&root->orphan_lock);
@@ -3220,28 +3220,27 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (ret) {
atomic_dec(&root->orphan_inodes);
clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
if (insert)
clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
return ret;
}
}
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
- ret = btrfs_insert_orphan_item(trans, root,
- btrfs_ino(BTRFS_I(inode)));
+ ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret) {
atomic_dec(&root->orphan_inodes);
if (reserve) {
clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
btrfs_orphan_release_metadata(inode);
}
if (ret != -EEXIST) {
clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -3266,20 +3265,20 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
* item for this particular inode.
*/
static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
int delete_item = 0;
int release_rsv = 0;
int ret = 0;
spin_lock(&root->orphan_lock);
if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags))
+ &inode->runtime_flags))
delete_item = 1;
if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags))
+ &inode->runtime_flags))
release_rsv = 1;
spin_unlock(&root->orphan_lock);
@@ -3287,7 +3286,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
atomic_dec(&root->orphan_inodes);
if (trans)
ret = btrfs_del_orphan_item(trans, root,
- btrfs_ino(BTRFS_I(inode)));
+ btrfs_ino(inode));
}
if (release_rsv)
@@ -3453,7 +3452,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
ret = PTR_ERR(trans);
goto out;
}
- ret = btrfs_orphan_add(trans, inode);
+ ret = btrfs_orphan_add(trans, BTRFS_I(inode));
btrfs_end_transaction(trans);
if (ret) {
iput(inode);
@@ -3462,7 +3461,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
ret = btrfs_truncate(inode);
if (ret)
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
} else {
nr_unlink++;
}
@@ -3617,7 +3616,7 @@ static int btrfs_read_locked_inode(struct inode *inode)
set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
- btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
+ btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
@@ -3865,7 +3864,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated
* without delay
*/
- if (!btrfs_is_free_space_inode(inode)
+ if (!btrfs_is_free_space_inode(BTRFS_I(inode))
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
btrfs_update_root_times(trans, root);
@@ -3988,8 +3987,7 @@ err:
if (ret)
goto out;
- btrfs_i_size_write(&dir->vfs_inode,
- dir->vfs_inode.i_size - name_len * 2);
+ btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
inode_inc_iversion(&inode->vfs_inode);
inode_inc_iversion(&dir->vfs_inode);
inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
@@ -4056,7 +4054,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
goto out;
if (inode->i_nlink == 0) {
- ret = btrfs_orphan_add(trans, inode);
+ ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret)
goto out;
}
@@ -4137,7 +4135,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
goto out;
}
- btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+ btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
inode_inc_iversion(dir);
dir->i_mtime = dir->i_ctime = current_time(dir);
ret = btrfs_update_inode_fallback(trans, root, dir);
@@ -4173,7 +4171,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
goto out;
}
- err = btrfs_orphan_add(trans, inode);
+ err = btrfs_orphan_add(trans, BTRFS_I(inode));
if (err)
goto out;
@@ -4184,7 +4182,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
BTRFS_I(d_inode(dentry)), dentry->d_name.name,
dentry->d_name.len);
if (!err) {
- btrfs_i_size_write(inode, 0);
+ btrfs_i_size_write(BTRFS_I(inode), 0);
/*
* Propagate the last_unlink_trans value of the deleted dir to
* its parent directory. This is to prevent an unrecoverable
@@ -4320,7 +4318,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* for non-free space inodes and ref cows, we want to back off from
* time to time
*/
- if (!btrfs_is_free_space_inode(inode) &&
+ if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
test_bit(BTRFS_ROOT_REF_COWS, &root->state))
be_nice = 1;
@@ -4336,7 +4334,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
*/
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
root == fs_info->tree_root)
- btrfs_drop_extent_cache(inode, ALIGN(new_size,
+ btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
fs_info->sectorsize),
(u64)-1, 0);
@@ -4412,19 +4410,8 @@ search_again:
if (found_type > min_type) {
del_item = 1;
} else {
- if (item_end < new_size) {
- /*
- * With NO_HOLES mode, for the following mapping
- *
- * [0-4k][hole][8k-12k]
- *
- * if truncating isize down to 6k, it ends up
- * isize being 8k.
- */
- if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
- last_size = new_size;
+ if (item_end < new_size)
break;
- }
if (found_key.offset >= new_size)
del_item = 1;
else
@@ -4607,8 +4594,12 @@ out:
btrfs_abort_transaction(trans, ret);
}
error:
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+ ASSERT(last_size >= new_size);
+ if (!err && last_size > new_size)
+ last_size = new_size;
btrfs_ordered_update_i_size(inode, last_size, NULL);
+ }
btrfs_free_path(path);
@@ -4835,7 +4826,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
lock_extent_bits(io_tree, hole_start, block_end - 1,
&cached_state);
- ordered = btrfs_lookup_ordered_range(inode, hole_start,
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
block_end - hole_start);
if (!ordered)
break;
@@ -4847,7 +4838,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
cur_offset = hole_start;
while (1) {
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
block_end - cur_offset, 0);
if (IS_ERR(em)) {
err = PTR_ERR(em);
@@ -4864,7 +4855,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_size);
if (err)
break;
- btrfs_drop_extent_cache(inode, cur_offset,
+ btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
if (!hole_em) {
@@ -4890,7 +4881,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
write_unlock(&em_tree->lock);
if (err != -EEXIST)
break;
- btrfs_drop_extent_cache(inode, cur_offset,
+ btrfs_drop_extent_cache(BTRFS_I(inode),
+ cur_offset,
cur_offset +
hole_size - 1, 0);
}
@@ -4987,7 +4979,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* so we need to guarantee from this point on that everything
* will be consistent.
*/
- ret = btrfs_orphan_add(trans, inode);
+ ret = btrfs_orphan_add(trans, BTRFS_I(inode));
btrfs_end_transaction(trans);
if (ret)
return ret;
@@ -4996,9 +4988,9 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
truncate_setsize(inode, newsize);
/* Disable nonlocked read DIO to avoid the end less truncate */
- btrfs_inode_block_unlocked_dio(inode);
+ btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
inode_dio_wait(inode);
- btrfs_inode_resume_unlocked_dio(inode);
+ btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
ret = btrfs_truncate(inode);
if (ret && inode->i_nlink) {
@@ -5007,7 +4999,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
/* To get a stable disk_i_size */
err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (err) {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
return err;
}
@@ -5019,11 +5011,11 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
*/
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
return ret;
}
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
- err = btrfs_orphan_del(trans, inode);
+ err = btrfs_orphan_del(trans, BTRFS_I(inode));
if (err)
btrfs_abort_transaction(trans, err);
btrfs_end_transaction(trans);
@@ -5181,18 +5173,18 @@ void btrfs_evict_inode(struct inode *inode)
if (inode->i_nlink &&
((btrfs_root_refs(&root->root_item) != 0 &&
root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
- btrfs_is_free_space_inode(inode)))
+ btrfs_is_free_space_inode(BTRFS_I(inode))))
goto no_delete;
if (is_bad_inode(inode)) {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
goto no_delete;
}
/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
if (!special_file(inode->i_mode))
btrfs_wait_ordered_range(inode, 0, (u64)-1);
- btrfs_free_io_failure_record(inode, 0, (u64)-1);
+ btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
@@ -5208,20 +5200,20 @@ void btrfs_evict_inode(struct inode *inode)
ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
if (ret) {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
goto no_delete;
}
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
if (!rsv) {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
goto no_delete;
}
rsv->size = min_size;
rsv->failfast = 1;
global_rsv = &fs_info->global_block_rsv;
- btrfs_i_size_write(inode, 0);
+ btrfs_i_size_write(BTRFS_I(inode), 0);
/*
* This is a bit simpler than btrfs_truncate since we've already
@@ -5256,14 +5248,14 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_warn(fs_info,
"Could not get space for a delete, will truncate on mount %d",
ret);
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
btrfs_free_block_rsv(fs_info, rsv);
goto no_delete;
}
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
btrfs_free_block_rsv(fs_info, rsv);
goto no_delete;
}
@@ -5289,7 +5281,7 @@ void btrfs_evict_inode(struct inode *inode)
if (ret) {
ret = btrfs_commit_transaction(trans);
if (ret) {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
btrfs_free_block_rsv(fs_info, rsv);
goto no_delete;
}
@@ -5318,9 +5310,9 @@ void btrfs_evict_inode(struct inode *inode)
*/
if (ret == 0) {
trans->block_rsv = root->orphan_block_rsv;
- btrfs_orphan_del(trans, inode);
+ btrfs_orphan_del(trans, BTRFS_I(inode));
} else {
- btrfs_orphan_del(NULL, inode);
+ btrfs_orphan_del(NULL, BTRFS_I(inode));
}
trans->block_rsv = &fs_info->trans_block_rsv;
@@ -5898,7 +5890,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
return 0;
- if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
+ if (btrfs_fs_closing(root->fs_info) &&
+ btrfs_is_free_space_inode(BTRFS_I(inode)))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -5978,15 +5971,15 @@ static int btrfs_update_time(struct inode *inode, struct timespec *now,
* and then set the in-memory index_cnt variable to reflect
* free sequence numbers
*/
-static int btrfs_set_inode_index_count(struct inode *inode)
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
struct btrfs_key key, found_key;
struct btrfs_path *path;
struct extent_buffer *leaf;
int ret;
- key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.objectid = btrfs_ino(inode);
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = (u64)-1;
@@ -6009,7 +6002,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
* else has to start at 2
*/
if (path->slots[0] == 0) {
- BTRFS_I(inode)->index_cnt = 2;
+ inode->index_cnt = 2;
goto out;
}
@@ -6018,13 +6011,13 @@ static int btrfs_set_inode_index_count(struct inode *inode)
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+ if (found_key.objectid != btrfs_ino(inode) ||
found_key.type != BTRFS_DIR_INDEX_KEY) {
- BTRFS_I(inode)->index_cnt = 2;
+ inode->index_cnt = 2;
goto out;
}
- BTRFS_I(inode)->index_cnt = found_key.offset + 1;
+ inode->index_cnt = found_key.offset + 1;
out:
btrfs_free_path(path);
return ret;
@@ -6034,12 +6027,12 @@ out:
* helper to find a free sequence number in a given directory. This current
* code is very simple, later versions will do smarter things in the btree
*/
-int btrfs_set_inode_index(struct inode *dir, u64 *index)
+int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
{
int ret = 0;
- if (BTRFS_I(dir)->index_cnt == (u64)-1) {
- ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
+ if (dir->index_cnt == (u64)-1) {
+ ret = btrfs_inode_delayed_dir_index_count(dir);
if (ret) {
ret = btrfs_set_inode_index_count(dir);
if (ret)
@@ -6047,8 +6040,8 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
}
}
- *index = BTRFS_I(dir)->index_cnt;
- BTRFS_I(dir)->index_cnt++;
+ *index = dir->index_cnt;
+ dir->index_cnt++;
return ret;
}
@@ -6109,7 +6102,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (dir && name) {
trace_btrfs_inode_request(dir);
- ret = btrfs_set_inode_index(dir, index);
+ ret = btrfs_set_inode_index(BTRFS_I(dir), index);
if (ret) {
btrfs_free_path(path);
iput(inode);
@@ -6244,18 +6237,18 @@ static inline u8 btrfs_inode_type(struct inode *inode)
* inode to the parent directory.
*/
int btrfs_add_link(struct btrfs_trans_handle *trans,
- struct inode *parent_inode, struct inode *inode,
+ struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
int ret = 0;
struct btrfs_key key;
- struct btrfs_root *root = BTRFS_I(parent_inode)->root;
- u64 ino = btrfs_ino(BTRFS_I(inode));
- u64 parent_ino = btrfs_ino(BTRFS_I(parent_inode));
+ struct btrfs_root *root = parent_inode->root;
+ u64 ino = btrfs_ino(inode);
+ u64 parent_ino = btrfs_ino(parent_inode);
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
- memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
+ memcpy(&key, &inode->root->root_key, sizeof(key));
} else {
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
@@ -6277,7 +6270,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
ret = btrfs_insert_dir_item(trans, root, name, name_len,
parent_inode, &key,
- btrfs_inode_type(inode), index);
+ btrfs_inode_type(&inode->vfs_inode), index);
if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
else if (ret) {
@@ -6285,12 +6278,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
return ret;
}
- btrfs_i_size_write(parent_inode, parent_inode->i_size +
+ btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
name_len * 2);
- inode_inc_iversion(parent_inode);
- parent_inode->i_mtime = parent_inode->i_ctime =
- current_time(parent_inode);
- ret = btrfs_update_inode(trans, root, parent_inode);
+ inode_inc_iversion(&parent_inode->vfs_inode);
+ parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime =
+ current_time(&parent_inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
if (ret)
btrfs_abort_transaction(trans, ret);
return ret;
@@ -6314,8 +6307,8 @@ fail_dir_item:
}
static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct dentry *dentry,
- struct inode *inode, int backref, u64 index)
+ struct btrfs_inode *dir, struct dentry *dentry,
+ struct btrfs_inode *inode, int backref, u64 index)
{
int err = btrfs_add_link(trans, dir, inode,
dentry->d_name.name, dentry->d_name.len,
@@ -6371,7 +6364,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (err)
goto out_unlock_inode;
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+ 0, index);
if (err) {
goto out_unlock_inode;
} else {
@@ -6448,7 +6442,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (err)
goto out_unlock_inode;
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+ 0, index);
if (err)
goto out_unlock_inode;
@@ -6490,7 +6485,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink >= BTRFS_LINK_MAX)
return -EMLINK;
- err = btrfs_set_inode_index(dir, &index);
+ err = btrfs_set_inode_index(BTRFS_I(dir), &index);
if (err)
goto fail;
@@ -6514,7 +6509,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
ihold(inode);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
- err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
+ err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+ 1, index);
if (err) {
drop_inode = 1;
@@ -6528,7 +6524,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
* If new hard link count is 1, it's a file created
* with open(2) O_TMPFILE flag.
*/
- err = btrfs_orphan_del(trans, inode);
+ err = btrfs_orphan_del(trans, BTRFS_I(inode));
if (err)
goto fail;
}
@@ -6589,13 +6585,14 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
goto out_fail_inode;
- btrfs_i_size_write(inode, 0);
+ btrfs_i_size_write(BTRFS_I(inode), 0);
err = btrfs_update_inode(trans, root, inode);
if (err)
goto out_fail_inode;
- err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
- dentry->d_name.len, 0, index);
+ err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+ dentry->d_name.name,
+ dentry->d_name.len, 0, index);
if (err)
goto out_fail_inode;
@@ -6725,25 +6722,26 @@ static noinline int uncompress_inline(struct btrfs_path *path,
* This also copies inline extents directly into the page.
*/
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create)
+struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
+ struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
int ret;
int err = 0;
u64 extent_start = 0;
u64 extent_end = 0;
- u64 objectid = btrfs_ino(BTRFS_I(inode));
+ u64 objectid = btrfs_ino(inode);
u32 found_type;
struct btrfs_path *path = NULL;
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
struct btrfs_file_extent_item *item;
struct extent_buffer *leaf;
struct btrfs_key found_key;
struct extent_map *em = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_trans_handle *trans = NULL;
const bool new_inline = !page || create;
@@ -6856,7 +6854,8 @@ next:
goto not_found_em;
}
- btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
+ btrfs_extent_item_to_extent_map(inode, path, item,
+ new_inline, em);
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -6992,7 +6991,7 @@ insert:
write_unlock(&em_tree->lock);
out:
- trace_btrfs_get_extent(root, BTRFS_I(inode), em);
+ trace_btrfs_get_extent(root, inode, em);
btrfs_free_path(path);
if (trans) {
@@ -7008,9 +7007,10 @@ out:
return em;
}
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create)
+struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
+ struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
{
struct extent_map *em;
struct extent_map *hole_em = NULL;
@@ -7047,7 +7047,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
em = NULL;
/* ok, we didn't find anything, lets look for delalloc */
- found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+ found = count_range_bits(&inode->io_tree, &range_start,
end, len, EXTENT_DELALLOC, 1);
found_end = range_start + found;
if (found_end < range_start)
@@ -7162,7 +7162,7 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
if (ret) {
if (em) {
free_extent_map(em);
- btrfs_drop_extent_cache(inode, start,
+ btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + len - 1, 0);
}
em = ERR_PTR(ret);
@@ -7423,7 +7423,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* doing DIO to, so we need to make sure there's no ordered
* extents in this range.
*/
- ordered = btrfs_lookup_ordered_range(inode, lockstart,
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
lockend - lockstart + 1);
/*
@@ -7529,7 +7529,7 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
}
do {
- btrfs_drop_extent_cache(inode, em->start,
+ btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
em->start + em->len - 1, 0);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 1);
@@ -7617,7 +7617,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
goto err;
}
- em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto unlock_err;
@@ -7854,7 +7854,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
failed_mirror);
if (!ret) {
- free_io_failure(inode, failrec);
+ free_io_failure(BTRFS_I(inode), failrec);
return -EIO;
}
@@ -7868,7 +7868,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
pgoff, isector, repair_endio, repair_arg);
if (!bio) {
- free_io_failure(inode, failrec);
+ free_io_failure(BTRFS_I(inode), failrec);
return -EIO;
}
bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -7879,7 +7879,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
if (ret) {
- free_io_failure(inode, failrec);
+ free_io_failure(BTRFS_I(inode), failrec);
bio_put(bio);
}
@@ -7909,7 +7909,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
done->uptodate = 1;
bio_for_each_segment_all(bvec, bio, i)
- clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
+ clean_io_failure(BTRFS_I(done->inode), done->start, bvec->bv_page, 0);
end:
complete(&done->done);
bio_put(bio);
@@ -7995,7 +7995,7 @@ static void btrfs_retry_endio(struct bio *bio)
bvec->bv_page, bvec->bv_offset,
done->start, bvec->bv_len);
if (!ret)
- clean_io_failure(done->inode, done->start,
+ clean_io_failure(BTRFS_I(done->inode), done->start,
bvec->bv_page, bvec->bv_offset);
else
uptodate = 0;
@@ -8796,7 +8796,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
lock_extent_bits(tree, page_start, page_end, &cached_state);
again:
start = page_start;
- ordered = btrfs_lookup_ordered_range(inode, start,
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
page_end - start + 1);
if (ordered) {
end = min(page_end, ordered->file_offset + ordered->len - 1);
@@ -8962,7 +8962,8 @@ again:
* we can't set the delalloc bits if there are pending ordered
* extents. Drop our locks and wait for them to finish
*/
- ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
+ PAGE_SIZE);
if (ordered) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -9160,7 +9161,7 @@ static int btrfs_truncate(struct inode *inode)
if (ret == 0 && inode->i_nlink > 0) {
trans->block_rsv = root->orphan_block_rsv;
- ret = btrfs_orphan_del(trans, inode);
+ ret = btrfs_orphan_del(trans, BTRFS_I(inode));
if (ret)
err = ret;
}
@@ -9205,7 +9206,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
inode->i_fop = &btrfs_dir_file_operations;
set_nlink(inode, 1);
- btrfs_i_size_write(inode, 0);
+ btrfs_i_size_write(BTRFS_I(inode), 0);
unlock_new_inode(inode);
err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
@@ -9278,7 +9279,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_destroy_inode(struct inode *inode)
{
- btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
#endif
@@ -9333,7 +9334,7 @@ void btrfs_destroy_inode(struct inode *inode)
}
btrfs_qgroup_check_reserved_leak(inode);
inode_tree_del(inode);
- btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
free:
call_rcu(&inode->i_rcu, btrfs_i_callback);
}
@@ -9412,11 +9413,11 @@ fail:
return -ENOMEM;
}
-static int btrfs_getattr(struct vfsmount *mnt,
- struct dentry *dentry, struct kstat *stat)
+static int btrfs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
u64 delalloc_bytes;
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
u32 blocksize = inode->i_sb->s_blocksize;
generic_fillattr(inode, stat);
@@ -9480,10 +9481,10 @@ static int btrfs_rename_exchange(struct inode *old_dir,
* We need to find a free sequence number both in the source and
* in the destination directory for the exchange.
*/
- ret = btrfs_set_inode_index(new_dir, &old_idx);
+ ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
if (ret)
goto out_fail;
- ret = btrfs_set_inode_index(old_dir, &new_idx);
+ ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
if (ret)
goto out_fail;
@@ -9581,7 +9582,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
goto out_fail;
}
- ret = btrfs_add_link(trans, new_dir, old_inode,
+ ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
new_dentry->d_name.name,
new_dentry->d_name.len, 0, old_idx);
if (ret) {
@@ -9589,7 +9590,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
goto out_fail;
}
- ret = btrfs_add_link(trans, old_dir, new_inode,
+ ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
old_dentry->d_name.name,
old_dentry->d_name.len, 0, new_idx);
if (ret) {
@@ -9691,8 +9692,8 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- ret = btrfs_add_nondir(trans, dir, dentry,
- inode, 0, index);
+ ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
+ BTRFS_I(inode), 0, index);
if (ret)
goto out;
@@ -9791,7 +9792,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (dest != root)
btrfs_record_root_in_trans(trans, dest);
- ret = btrfs_set_inode_index(new_dir, &index);
+ ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
if (ret)
goto out_fail;
@@ -9858,14 +9859,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.len);
}
if (!ret && new_inode->i_nlink == 0)
- ret = btrfs_orphan_add(trans, d_inode(new_dentry));
+ ret = btrfs_orphan_add(trans,
+ BTRFS_I(d_inode(new_dentry)));
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}
}
- ret = btrfs_add_link(trans, new_dir, old_inode,
+ ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
new_dentry->d_name.name,
new_dentry->d_name.len, 0, index);
if (ret) {
@@ -10232,7 +10234,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode_set_bytes(inode, name_len);
- btrfs_i_size_write(inode, name_len);
+ btrfs_i_size_write(BTRFS_I(inode), name_len);
err = btrfs_update_inode(trans, root, inode);
/*
* Last step, add directory indexes for our symlink inode. This is the
@@ -10240,7 +10242,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
* elsewhere above.
*/
if (!err)
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
+ BTRFS_I(inode), 0, index);
if (err) {
drop_inode = 1;
goto out_unlock_inode;
@@ -10326,7 +10329,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
break;
}
- btrfs_drop_extent_cache(inode, cur_offset,
+ btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
cur_offset + ins.offset -1, 0);
em = alloc_extent_map();
@@ -10353,7 +10356,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
write_unlock(&em_tree->lock);
if (ret != -EEXIST)
break;
- btrfs_drop_extent_cache(inode, cur_offset,
+ btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
cur_offset + ins.offset - 1,
0);
}
@@ -10475,7 +10478,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = btrfs_update_inode(trans, root, inode);
if (ret)
goto out_inode;
- ret = btrfs_orphan_add(trans, inode);
+ ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret)
goto out_inode;
@@ -10505,6 +10508,12 @@ out_inode:
}
+__attribute__((const))
+static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror)
+{
+ return 0;
+}
+
static const struct inode_operations btrfs_dir_inode_operations = {
.getattr = btrfs_getattr,
.lookup = btrfs_lookup,
@@ -10543,10 +10552,14 @@ static const struct file_operations btrfs_dir_file_operations = {
};
static const struct extent_io_ops btrfs_extent_io_ops = {
- .fill_delalloc = run_delalloc_range,
+ /* mandatory callbacks */
.submit_bio_hook = btrfs_submit_bio_hook,
- .merge_bio_hook = btrfs_merge_bio_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
+ .merge_bio_hook = btrfs_merge_bio_hook,
+ .readpage_io_failed_hook = dummy_readpage_io_failed_hook,
+
+ /* optional callbacks */
+ .fill_delalloc = run_delalloc_range,
.writepage_end_io_hook = btrfs_writepage_end_io_hook,
.writepage_start_hook = btrfs_writepage_start_hook,
.set_bit_hook = btrfs_set_bit_hook,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d853997..dabfc7a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -434,7 +434,7 @@ int btrfs_is_empty_uuid(u8 *uuid)
static noinline int create_subvol(struct inode *dir,
struct dentry *dentry,
- char *name, int namelen,
+ const char *name, int namelen,
u64 *async_transid,
struct btrfs_qgroup_inherit *inherit)
{
@@ -580,21 +580,21 @@ static noinline int create_subvol(struct inode *dir,
/*
* insert the directory item
*/
- ret = btrfs_set_inode_index(dir, &index);
+ ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_insert_dir_item(trans, root,
- name, namelen, dir, &key,
+ name, namelen, BTRFS_I(dir), &key,
BTRFS_FT_DIR, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
- btrfs_i_size_write(dir, dir->i_size + namelen * 2);
+ btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
@@ -832,7 +832,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
* inside this filesystem so it's quite a bit simpler.
*/
static noinline int btrfs_mksubvol(const struct path *parent,
- char *name, int namelen,
+ const char *name, int namelen,
struct btrfs_root *snap_src,
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit *inherit)
@@ -1009,7 +1009,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
/* get the big lock and read metadata off disk */
lock_extent_bits(io_tree, start, end, &cached);
- em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
if (IS_ERR(em))
@@ -1625,7 +1625,7 @@ out:
}
static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
- char *name, unsigned long fd, int subvol,
+ const char *name, unsigned long fd, int subvol,
u64 *transid, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
@@ -3298,7 +3298,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
if (endoff > destoff + olen)
endoff = destoff + olen;
if (endoff > inode->i_size)
- btrfs_i_size_write(inode, endoff);
+ btrfs_i_size_write(BTRFS_I(inode), endoff);
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
@@ -3311,20 +3311,19 @@ out:
return ret;
}
-static void clone_update_extent_map(struct inode *inode,
+static void clone_update_extent_map(struct btrfs_inode *inode,
const struct btrfs_trans_handle *trans,
const struct btrfs_path *path,
const u64 hole_offset,
const u64 hole_len)
{
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
em = alloc_extent_map();
if (!em) {
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
return;
}
@@ -3338,7 +3337,7 @@ static void clone_update_extent_map(struct inode *inode,
if (btrfs_file_extent_type(path->nodes[0], fi) ==
BTRFS_FILE_EXTENT_INLINE)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
} else {
em->start = hole_offset;
em->len = hole_len;
@@ -3364,8 +3363,7 @@ static void clone_update_extent_map(struct inode *inode,
}
if (ret)
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
}
/*
@@ -3791,11 +3789,12 @@ process_slot:
/* If we have an implicit hole (NO_HOLES feature). */
if (drop_start < new_key.offset)
- clone_update_extent_map(inode, trans,
+ clone_update_extent_map(BTRFS_I(inode), trans,
NULL, drop_start,
new_key.offset - drop_start);
- clone_update_extent_map(inode, trans, path, 0, 0);
+ clone_update_extent_map(BTRFS_I(inode), trans,
+ path, 0, 0);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@@ -3845,8 +3844,9 @@ process_slot:
btrfs_end_transaction(trans);
goto out;
}
- clone_update_extent_map(inode, trans, NULL, last_dest_end,
- destoff + len - last_dest_end);
+ clone_update_extent_map(BTRFS_I(inode), trans, NULL,
+ last_dest_end,
+ destoff + len - last_dest_end);
ret = clone_finish_inode_update(trans, inode, destoff + len,
destoff, olen, no_time_update);
}
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 45d2698..f48c8c1 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -76,7 +76,7 @@ static inline void write_compress_length(char *buf, size_t len)
memcpy(buf, &dlen, LZO_LEN);
}
-static inline size_t read_compress_length(char *buf)
+static inline size_t read_compress_length(const char *buf)
{
__le32 dlen;
@@ -86,13 +86,11 @@ static inline size_t read_compress_length(char *buf)
static int lzo_compress_pages(struct list_head *ws,
struct address_space *mapping,
- u64 start, unsigned long len,
+ u64 start,
struct page **pages,
- unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
+ unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
@@ -102,7 +100,9 @@ static int lzo_compress_pages(struct list_head *ws,
struct page *in_page = NULL;
struct page *out_page = NULL;
unsigned long bytes_left;
-
+ unsigned long len = *total_out;
+ unsigned long nr_dest_pages = *out_pages;
+ const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
size_t in_len;
size_t out_len;
char *buf;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index bc2aba8..9a46878 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -879,15 +879,14 @@ out:
/* Since the DIO code tries to lock a wide area we need to look for any ordered
* extents that exist in the range, rather than just the start of the range.
*/
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
- u64 file_offset,
- u64 len)
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
+ struct btrfs_inode *inode, u64 file_offset, u64 len)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
- tree = &BTRFS_I(inode)->ordered_tree;
+ tree = &inode->ordered_tree;
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node) {
@@ -923,7 +922,7 @@ bool btrfs_have_ordered_extents_in_range(struct inode *inode,
{
struct btrfs_ordered_extent *oe;
- oe = btrfs_lookup_ordered_range(inode, file_offset, len);
+ oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len);
if (oe) {
btrfs_put_ordered_extent(oe);
return true;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index a8cb8ef..195c93b 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -189,9 +189,10 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
- u64 file_offset,
- u64 len);
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
+ struct btrfs_inode *inode,
+ u64 file_offset,
+ u64 len);
bool btrfs_have_ordered_extents_in_range(struct inode *inode,
u64 file_offset,
u64 len);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ddbde0f..d60df51 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1714,8 +1714,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
if (!ret)
continue;
- btrfs_drop_extent_cache(inode, key.offset, end,
- 1);
+ btrfs_drop_extent_cache(BTRFS_I(inode),
+ key.offset, end, 1);
unlock_extent(&BTRFS_I(inode)->io_tree,
key.offset, end);
}
@@ -2130,7 +2130,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
/* the lock_extent waits for readpage to complete */
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
- btrfs_drop_extent_cache(inode, start, end, 1);
+ btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
}
return 0;
@@ -3161,7 +3161,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
free_extent_map(em);
break;
}
- btrfs_drop_extent_cache(inode, start, end, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
}
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
return ret;
@@ -3203,7 +3203,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
index = (cluster->start - offset) >> PAGE_SHIFT;
last_index = (cluster->end - offset) >> PAGE_SHIFT;
while (index <= last_index) {
- ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE);
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+ PAGE_SIZE);
if (ret)
goto out;
@@ -3215,7 +3216,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
page = find_or_create_page(inode->i_mapping, index,
mask);
if (!page) {
- btrfs_delalloc_release_metadata(inode,
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE);
ret = -ENOMEM;
goto out;
@@ -3234,7 +3235,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!PageUptodate(page)) {
unlock_page(page);
put_page(page);
- btrfs_delalloc_release_metadata(inode,
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE);
ret = -EIO;
goto out;
@@ -4245,7 +4246,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
BTRFS_I(inode)->index_cnt = group->key.objectid;
- err = btrfs_orphan_add(trans, inode);
+ err = btrfs_orphan_add(trans, BTRFS_I(inode));
out:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ff9a11c..b0251eb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -731,7 +731,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
ret = -EIO;
goto out;
}
- ret = repair_io_failure(inode, offset, PAGE_SIZE,
+ ret = repair_io_failure(BTRFS_I(inode), offset, PAGE_SIZE,
fixup->logical, page,
offset - page_offset(page),
fixup->mirror_num);
@@ -4236,7 +4236,7 @@ out:
scrub_pending_trans_workers_dec(sctx);
}
-static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
+static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
u64 logical)
{
struct extent_state *cached_state = NULL;
@@ -4246,7 +4246,7 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
u64 lockstart = start, lockend = start + len - 1;
int ret = 0;
- io_tree = &BTRFS_I(inode)->io_tree;
+ io_tree = &inode->io_tree;
lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
@@ -4325,7 +4325,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
io_tree = &BTRFS_I(inode)->io_tree;
nocow_ctx_logical = nocow_ctx->logical;
- ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
+ ret = check_extent_to_block(BTRFS_I(inode), offset, len,
+ nocow_ctx_logical);
if (ret) {
ret = ret > 0 ? 0 : ret;
goto out;
@@ -4372,7 +4373,7 @@ again:
}
}
- ret = check_extent_to_block(inode, offset, len,
+ ret = check_extent_to_block(BTRFS_I(inode), offset, len,
nocow_ctx_logical);
if (ret) {
ret = ret > 0 ? 0 : ret;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d145ce8..456c890 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1681,6 +1681,9 @@ static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
{
int ret;
+ if (ino == BTRFS_FIRST_FREE_OBJECTID)
+ return 1;
+
ret = get_cur_inode_state(sctx, ino, gen);
if (ret < 0)
goto out;
@@ -1866,7 +1869,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
* not deleted and then re-created, if it was then we have no overwrite
* and we can just unlink this entry.
*/
- if (sctx->parent_root) {
+ if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
NULL, NULL, NULL);
if (ret < 0 && ret != -ENOENT)
@@ -1934,6 +1937,19 @@ static int did_overwrite_ref(struct send_ctx *sctx,
if (ret <= 0)
goto out;
+ if (dir != BTRFS_FIRST_FREE_OBJECTID) {
+ ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
+ NULL, NULL, NULL);
+ if (ret < 0 && ret != -ENOENT)
+ goto out;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ if (gen != dir_gen)
+ goto out;
+ }
+
/* check if the ref was overwritten by another ref */
ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
&ow_inode, &other_type);
@@ -3556,6 +3572,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
{
int ret = 0;
u64 ino = parent_ref->dir;
+ u64 ino_gen = parent_ref->dir_gen;
u64 parent_ino_before, parent_ino_after;
struct fs_path *path_before = NULL;
struct fs_path *path_after = NULL;
@@ -3576,6 +3593,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
* at get_cur_path()).
*/
while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+ u64 parent_ino_after_gen;
+
if (is_waiting_for_move(sctx, ino)) {
/*
* If the current inode is an ancestor of ino in the
@@ -3598,7 +3617,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
fs_path_reset(path_after);
ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
- NULL, path_after);
+ &parent_ino_after_gen, path_after);
if (ret < 0)
goto out;
ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
@@ -3615,10 +3634,20 @@ static int wait_for_parent_move(struct send_ctx *sctx,
if (ino > sctx->cur_ino &&
(parent_ino_before != parent_ino_after || len1 != len2 ||
memcmp(path_before->start, path_after->start, len1))) {
- ret = 1;
- break;
+ u64 parent_ino_gen;
+
+ ret = get_inode_info(sctx->parent_root, ino, NULL,
+ &parent_ino_gen, NULL, NULL, NULL,
+ NULL);
+ if (ret < 0)
+ goto out;
+ if (ino_gen == parent_ino_gen) {
+ ret = 1;
+ break;
+ }
}
ino = parent_ino_after;
+ ino_gen = parent_ino_after_gen;
}
out:
@@ -5277,6 +5306,81 @@ out:
return ret;
}
+static int range_is_hole_in_parent(struct send_ctx *sctx,
+ const u64 start,
+ const u64 end)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root *root = sctx->parent_root;
+ u64 search_start = start;
+ int ret;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = search_start;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0 && path->slots[0] > 0)
+ path->slots[0]--;
+
+ while (search_start < end) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ struct btrfs_file_extent_item *fi;
+ u64 extent_end;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid < sctx->cur_ino ||
+ key.type < BTRFS_EXTENT_DATA_KEY)
+ goto next;
+ if (key.objectid > sctx->cur_ino ||
+ key.type > BTRFS_EXTENT_DATA_KEY ||
+ key.offset >= end)
+ break;
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+
+ extent_end = ALIGN(key.offset + size,
+ root->fs_info->sectorsize);
+ } else {
+ extent_end = key.offset +
+ btrfs_file_extent_num_bytes(leaf, fi);
+ }
+ if (extent_end <= start)
+ goto next;
+ if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+ search_start = extent_end;
+ goto next;
+ }
+ ret = 0;
+ goto out;
+next:
+ path->slots[0]++;
+ }
+ ret = 1;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
struct btrfs_key *key)
{
@@ -5321,8 +5425,17 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
return ret;
}
- if (sctx->cur_inode_last_extent < key->offset)
- ret = send_hole(sctx, key->offset);
+ if (sctx->cur_inode_last_extent < key->offset) {
+ ret = range_is_hole_in_parent(sctx,
+ sctx->cur_inode_last_extent,
+ key->offset);
+ if (ret < 0)
+ return ret;
+ else if (ret == 0)
+ ret = send_hole(sctx, key->offset);
+ else
+ ret = 0;
+ }
sctx->cur_inode_last_extent = extent_end;
return ret;
}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 4d0f038..8c91d03 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -278,7 +278,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* First with no extents */
BTRFS_I(inode)->root = root;
- em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
if (IS_ERR(em)) {
em = NULL;
test_msg("Got an error when we shouldn't have\n");
@@ -293,7 +293,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
free_extent_map(em);
- btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+ btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
/*
* All of the magic numbers are based on the mapping setup in
@@ -302,7 +302,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
*/
setup_file_extents(root, sectorsize);
- em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -323,7 +323,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -350,7 +350,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -372,7 +372,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Regular extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -399,7 +399,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* The next 3 are split extents */
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -428,7 +428,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -450,7 +450,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -484,7 +484,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -513,7 +513,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* The next 3 are a half written prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -543,7 +543,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -576,7 +576,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -611,7 +611,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Now for the compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -645,7 +645,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Split compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -680,7 +680,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -707,7 +707,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -742,7 +742,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* A hole between regular extents but no hole extent */
- em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
+ sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -769,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, 4096 * 1024, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -802,7 +803,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -885,7 +886,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
insert_inode_item_key(root);
insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
- em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -907,7 +908,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
}
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
+ 2 * sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6b3e0fc..61b807d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1505,7 +1505,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/*
* insert the directory item
*/
- ret = btrfs_set_inode_index(parent_inode, &index);
+ ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
BUG_ON(ret); /* -ENOMEM */
/* check if there is a file/dir which has the same name. */
@@ -1644,7 +1644,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_insert_dir_item(trans, parent_root,
dentry->d_name.name, dentry->d_name.len,
- parent_inode, &key,
+ BTRFS_I(parent_inode), &key,
BTRFS_FT_DIR, index);
/* We have check then name at the beginning, so it is impossible. */
BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
@@ -1653,7 +1653,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail;
}
- btrfs_i_size_write(parent_inode, parent_inode->i_size +
+ btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
dentry->d_name.len * 2);
parent_inode->i_mtime = parent_inode->i_ctime =
current_time(parent_inode);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3806853..a59674c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -673,6 +673,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
unsigned long dest_offset;
struct btrfs_key ins;
+ if (btrfs_file_extent_disk_bytenr(eb, item) == 0 &&
+ btrfs_fs_incompat(fs_info, NO_HOLES))
+ goto update_inode;
+
ret = btrfs_insert_empty_item(trans, root, path, key,
sizeof(*item));
if (ret)
@@ -825,6 +829,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
}
inode_add_bytes(inode, nbytes);
+update_inode:
ret = btrfs_update_inode(trans, root, inode);
out:
if (inode)
@@ -1322,8 +1327,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
/* insert our name */
- ret = btrfs_add_link(trans, dir, inode, name, namelen,
- 0, ref_index);
+ ret = btrfs_add_link(trans, BTRFS_I(dir),
+ BTRFS_I(inode),
+ name, namelen, 0, ref_index);
if (ret)
goto out;
@@ -1641,7 +1647,8 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
return -EIO;
}
- ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
+ ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
+ name_len, 1, index);
/* FIXME, put inode into FIXUP list */
@@ -1780,7 +1787,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
out:
btrfs_release_path(path);
if (!ret && update_size) {
- btrfs_i_size_write(dir, dir->i_size + name_len * 2);
+ btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
ret = btrfs_update_inode(trans, root, dir);
}
kfree(name);
@@ -5045,14 +5052,14 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
* a full commit is required.
*/
static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct dentry *parent,
struct super_block *sb,
u64 last_committed)
{
int ret = 0;
struct dentry *old_parent = NULL;
- struct inode *orig_inode = inode;
+ struct btrfs_inode *orig_inode = inode;
/*
* for regular files, if its inode is already on disk, we don't
@@ -5060,15 +5067,15 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
* we can use the last_unlink_trans field to record renames
* and other fun in this file.
*/
- if (S_ISREG(inode->i_mode) &&
- BTRFS_I(inode)->generation <= last_committed &&
- BTRFS_I(inode)->last_unlink_trans <= last_committed)
- goto out;
+ if (S_ISREG(inode->vfs_inode.i_mode) &&
+ inode->generation <= last_committed &&
+ inode->last_unlink_trans <= last_committed)
+ goto out;
- if (!S_ISDIR(inode->i_mode)) {
+ if (!S_ISDIR(inode->vfs_inode.i_mode)) {
if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
goto out;
- inode = d_inode(parent);
+ inode = BTRFS_I(d_inode(parent));
}
while (1) {
@@ -5079,10 +5086,10 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
* think this inode has already been logged.
*/
if (inode != orig_inode)
- BTRFS_I(inode)->logged_trans = trans->transid;
+ inode->logged_trans = trans->transid;
smp_mb();
- if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) {
+ if (btrfs_must_commit_transaction(trans, inode)) {
ret = 1;
break;
}
@@ -5091,8 +5098,8 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
break;
if (IS_ROOT(parent)) {
- inode = d_inode(parent);
- if (btrfs_must_commit_transaction(trans, BTRFS_I(inode)))
+ inode = BTRFS_I(d_inode(parent));
+ if (btrfs_must_commit_transaction(trans, inode))
ret = 1;
break;
}
@@ -5100,7 +5107,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
parent = dget_parent(parent);
dput(old_parent);
old_parent = parent;
- inode = d_inode(parent);
+ inode = BTRFS_I(d_inode(parent));
}
dput(old_parent);
@@ -5287,15 +5294,15 @@ next_dir_inode:
}
static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
int ret;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- const u64 ino = btrfs_ino(BTRFS_I(inode));
+ struct btrfs_root *root = inode->root;
+ const u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
if (!path)
@@ -5390,7 +5397,8 @@ out:
* the last committed transaction
*/
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root,
+ struct btrfs_inode *inode,
struct dentry *parent,
const loff_t start,
const loff_t end,
@@ -5404,9 +5412,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
int ret = 0;
u64 last_committed = fs_info->last_trans_committed;
bool log_dentries = false;
- struct inode *orig_inode = inode;
+ struct btrfs_inode *orig_inode = inode;
- sb = inode->i_sb;
+ sb = inode->vfs_inode.i_sb;
if (btrfs_test_opt(fs_info, NOTREELOG)) {
ret = 1;
@@ -5423,18 +5431,17 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans;
}
- if (root != BTRFS_I(inode)->root ||
- btrfs_root_refs(&root->root_item) == 0) {
+ if (root != inode->root || btrfs_root_refs(&root->root_item) == 0) {
ret = 1;
goto end_no_trans;
}
- ret = check_parent_dirs_for_sync(trans, inode, parent,
- sb, last_committed);
+ ret = check_parent_dirs_for_sync(trans, inode, parent, sb,
+ last_committed);
if (ret)
goto end_no_trans;
- if (btrfs_inode_in_log(BTRFS_I(inode), trans->transid)) {
+ if (btrfs_inode_in_log(inode, trans->transid)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}
@@ -5443,8 +5450,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- ret = btrfs_log_inode(trans, root, BTRFS_I(inode), inode_only,
- start, end, ctx);
+ ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx);
if (ret)
goto end_trans;
@@ -5454,14 +5460,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* we can use the last_unlink_trans field to record renames
* and other fun in this file.
*/
- if (S_ISREG(inode->i_mode) &&
- BTRFS_I(inode)->generation <= last_committed &&
- BTRFS_I(inode)->last_unlink_trans <= last_committed) {
+ if (S_ISREG(inode->vfs_inode.i_mode) &&
+ inode->generation <= last_committed &&
+ inode->last_unlink_trans <= last_committed) {
ret = 0;
goto end_trans;
}
- if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
+ if (S_ISDIR(inode->vfs_inode.i_mode) && ctx && ctx->log_new_dentries)
log_dentries = true;
/*
@@ -5505,7 +5511,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* but the file inode does not have a matching BTRFS_INODE_REF_KEY item
* and has a link count of 2.
*/
- if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
+ if (inode->last_unlink_trans > last_committed) {
ret = btrfs_log_all_parents(trans, orig_inode, ctx);
if (ret)
goto end_trans;
@@ -5515,14 +5521,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
break;
- inode = d_inode(parent);
- if (root != BTRFS_I(inode)->root)
+ inode = BTRFS_I(d_inode(parent));
+ if (root != inode->root)
break;
- if (BTRFS_I(inode)->generation > last_committed) {
- ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
- LOG_INODE_EXISTS,
- 0, LLONG_MAX, ctx);
+ if (inode->generation > last_committed) {
+ ret = btrfs_log_inode(trans, root, inode,
+ LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
if (ret)
goto end_trans;
}
@@ -5534,7 +5539,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
old_parent = parent;
}
if (log_dentries)
- ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx);
+ ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
else
ret = 0;
end_trans:
@@ -5566,8 +5571,8 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct dentry *parent = dget_parent(dentry);
int ret;
- ret = btrfs_log_inode_parent(trans, root, d_inode(dentry), parent,
- start, end, 0, ctx);
+ ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
+ parent, start, end, 0, ctx);
dput(parent);
return ret;
@@ -5829,7 +5834,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
(!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
return 0;
- return btrfs_log_inode_parent(trans, root, &inode->vfs_inode, parent, 0,
+ return btrfs_log_inode_parent(trans, root, inode, parent, 0,
LLONG_MAX, 1, NULL);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 13e55d1..73d56ee 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1725,7 +1725,7 @@ out:
* Function to update ctime/mtime for a given device path.
* Mainly used for ctime/mtime based probe like libblkid.
*/
-static void update_dev_time(char *path_name)
+static void update_dev_time(const char *path_name)
{
struct file *filp;
@@ -1851,7 +1851,8 @@ void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
fs_info->fs_devices->latest_bdev = next_device->bdev;
}
-int btrfs_rm_device(struct btrfs_fs_info *fs_info, char *device_path, u64 devid)
+int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
+ u64 devid)
{
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
@@ -2091,7 +2092,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
}
static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
- char *device_path,
+ const char *device_path,
struct btrfs_device **device)
{
int ret = 0;
@@ -2118,7 +2119,7 @@ static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
}
int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
- char *device_path,
+ const char *device_path,
struct btrfs_device **device)
{
*device = NULL;
@@ -2151,7 +2152,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
* Lookup a device given by device id, or the path if the id is 0.
*/
int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
- char *devpath, struct btrfs_device **device)
+ const char *devpath,
+ struct btrfs_device **device)
{
int ret;
@@ -2307,7 +2309,7 @@ error:
return ret;
}
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
{
struct btrfs_root *root = fs_info->dev_root;
struct request_queue *q;
@@ -2515,7 +2517,7 @@ error:
}
int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- char *device_path,
+ const char *device_path,
struct btrfs_device *srcdev,
struct btrfs_device **device_out)
{
@@ -6954,7 +6956,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
key.offset = device->devid;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
if (ret < 0) {
btrfs_warn_in_rcu(fs_info,
@@ -7102,7 +7105,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
return 0;
}
-void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path)
+void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
{
struct buffer_head *bh;
struct btrfs_super_block *disk_super;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 24ba6bc..59be812 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -422,16 +422,16 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *device, struct btrfs_device *this_dev);
int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
- char *device_path,
+ const char *device_path,
struct btrfs_device **device);
int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
- char *devpath,
+ const char *devpath,
struct btrfs_device **device);
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
const u8 *uuid);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
- char *device_path, u64 devid);
+ const char *device_path, u64 devid);
void btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
@@ -439,9 +439,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *path);
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- char *device_path,
+ const char *device_path,
struct btrfs_device *srcdev,
struct btrfs_device **device_out);
int btrfs_balance(struct btrfs_balance_control *bctl,
@@ -474,7 +474,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
-void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path);
+void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
u64 logical, u64 len, int mirror_num);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index da497f1..135b108 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -73,13 +73,11 @@ fail:
static int zlib_compress_pages(struct list_head *ws,
struct address_space *mapping,
- u64 start, unsigned long len,
+ u64 start,
struct page **pages,
- unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
+ unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret;
@@ -89,6 +87,9 @@ static int zlib_compress_pages(struct list_head *ws,
struct page *in_page = NULL;
struct page *out_page = NULL;
unsigned long bytes_left;
+ unsigned long len = *total_out;
+ unsigned long nr_dest_pages = *out_pages;
+ const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
*out_pages = 0;
*total_out = 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index 28484b3..9196f2a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -19,6 +19,7 @@
*/
#include <linux/kernel.h>
+#include <linux/sched/signal.h>
#include <linux/syscalls.h>
#include <linux/fs.h>
#include <linux/iomap.h>
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index cd1effe..9bf90bc 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -19,6 +19,7 @@
#include <linux/fscache-cache.h>
#include <linux/timer.h>
#include <linux/wait.h>
+#include <linux/cred.h>
#include <linux/workqueue.h>
#include <linux/security.h>
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7ce35ae..1a3e1b4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/pagevec.h>
#include <linux/task_io_accounting_ops.h>
+#include <linux/signal.h>
#include "super.h"
#include "mds_client.h"
@@ -391,6 +392,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
nr_pages = i;
if (nr_pages > 0) {
len = nr_pages << PAGE_SHIFT;
+ osd_req_op_extent_update(req, 0, len);
break;
}
goto out_pages;
@@ -771,7 +773,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
@@ -1017,8 +1019,7 @@ new_request:
&ci->i_layout, vino,
offset, &len, 0, num_ops,
CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, false);
if (IS_ERR(req)) {
@@ -1028,8 +1029,7 @@ new_request:
min(num_ops,
CEPH_OSD_SLAB_OPS),
CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, true);
BUG_ON(IS_ERR(req));
@@ -1194,7 +1194,7 @@ static int ceph_update_writeable_page(struct file *file,
int r;
struct ceph_snap_context *snapc, *oldest;
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
unlock_page(page);
return -EIO;
@@ -1681,8 +1681,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 0, 1,
- CEPH_OSD_OP_CREATE,
- CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1699,8 +1698,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 1, 3,
- CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
if (IS_ERR(req)) {
@@ -1873,7 +1871,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
goto out_unlock;
}
- wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK;
+ wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 5bc5d37..4e7421c 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
inode);
if (fscache_cookie_enabled(ci->fscache)) {
- dout("fscache_file_set_cookie %p %p enabing cache\n",
+ dout("fscache_file_set_cookie %p %p enabling cache\n",
inode, filp);
}
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 94fd76d..68c78be 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2,7 +2,7 @@
#include <linux/fs.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
@@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
/*
* Return caps we have registered with the MDS(s) as 'wanted'.
*/
-int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
+int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
{
struct ceph_cap *cap;
struct rb_node *p;
@@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
- if (!__cap_is_valid(cap))
+ if (check && !__cap_is_valid(cap))
continue;
if (cap == ci->i_auth_cap)
mds_wanted |= cap->mds_wanted;
@@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
delayed = 1;
}
ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
+ if (want & ~cap->mds_wanted) {
+ /* user space may open/close single file frequently.
+ * This avoids droping mds_wanted immediately after
+ * requesting new mds_wanted.
+ */
+ __cap_set_timeouts(mdsc, ci);
+ }
cap->issued &= retain; /* drop bits we don't want */
if (cap->implemented & ~cap->issued) {
@@ -2084,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
- ceph_sync_write_wait(inode);
-
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret < 0)
goto out;
@@ -2477,23 +2482,22 @@ again:
if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
int mds_wanted;
- if (ACCESS_ONCE(mdsc->fsc->mount_state) ==
+ if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
*err = -EIO;
ret = 1;
goto out_unlock;
}
- mds_wanted = __ceph_caps_mds_wanted(ci);
- if ((mds_wanted & need) != need) {
+ mds_wanted = __ceph_caps_mds_wanted(ci, false);
+ if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode);
*err = -ESTALE;
ret = 1;
goto out_unlock;
}
- if ((mds_wanted & file_wanted) ==
- (file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
+ if (!(file_wanted & ~mds_wanted))
ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
}
@@ -3404,6 +3408,7 @@ retry:
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
ci->i_auth_cap = tcap;
+
if (!list_empty(&ci->i_cap_flush_list) &&
ci->i_auth_cap == tcap) {
spin_lock(&mdsc->cap_dirty_lock);
@@ -3417,9 +3422,18 @@ retry:
} else if (tsession) {
/* add placeholder for the export tagert */
int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
+ tcap = new_cap;
ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
+ if (!list_empty(&ci->i_cap_flush_list) &&
+ ci->i_auth_cap == tcap) {
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_move_tail(&ci->i_flushing_item,
+ &tcap->session->s_cap_flushing);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ }
+
__ceph_remove_cap(cap, false);
goto out_unlock;
}
@@ -3924,9 +3938,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
}
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+ struct inode *dir,
int mds, int drop, int unless)
{
- struct inode *dir = d_inode(dentry->d_parent);
+ struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
@@ -3941,9 +3956,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
+ if (!dir) {
+ parent = dget(dentry->d_parent);
+ dir = d_inode(parent);
+ }
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+ dput(parent);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 39ff678..f2ae393 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
seq_puts(s, "\t(unsafe)");
else
seq_puts(s, "\t");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8ab1fdf..3e9ad50 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -371,7 +371,7 @@ more:
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
- req->r_direct_is_hash = true;
+ __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
if (fi->last_name) {
req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
if (!req->r_path2) {
@@ -417,7 +417,7 @@ more:
fi->frag = frag;
fi->last_readdir = req;
- if (req->r_did_prepopulate) {
+ if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
fi->readdir_cache_idx = req->r_readdir_cache_idx;
if (fi->readdir_cache_idx < 0) {
/* preclude from marking dir ordered */
@@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask);
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err);
@@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mknod.mode = cpu_to_le32(mode);
req->r_args.mknod.rdev = cpu_to_le32(rdev);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
ceph_mdsc_put_request(req);
goto out;
}
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mkdir.mode = cpu_to_le32(mode);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_SHARED on source inode (mds will lock it) */
@@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_inode_drop = drop_caps_for_unlink(inode);
@@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
req->r_old_dentry_dir = old_dir;
- req->r_locked_dir = new_dir;
+ req->r_parent = new_dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -1194,7 +1201,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
struct inode *dir;
if (flags & LOOKUP_RCU) {
- parent = ACCESS_ONCE(dentry->d_parent);
+ parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
@@ -1237,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
op = ceph_snap(dir) == CEPH_SNAPDIR ?
- CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
+ CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (!IS_ERR(req)) {
req->r_dentry = dget(dentry);
- req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
+ req->r_num_caps = 2;
+ req->r_parent = dir;
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 180bbef..e8f11fa 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
req->r_inode = d_inode(child);
ihold(d_inode(child));
req->r_ino2 = ceph_vino(d_inode(parent));
- req->r_locked_dir = d_inode(parent);
+ req->r_parent = d_inode(parent);
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 045d30d..26cc954 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file)
spin_lock(&ci->i_ceph_lock);
if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
- int mds_wanted = __ceph_caps_mds_wanted(ci);
+ int mds_wanted = __ceph_caps_mds_wanted(ci, true);
int issued = __ceph_caps_issued(ci, NULL);
dout("open %p fmode %d want %s issued %s using existing\n",
@@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.open.mask = cpu_to_le32(mask);
- req->r_locked_dir = dir; /* caller holds dir->i_mutex */
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
@@ -758,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
goto out;
}
- req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
@@ -794,89 +793,6 @@ out:
kfree(aio_work);
}
-/*
- * Write commit request unsafe callback, called to tell us when a
- * request is unsafe (that is, in flight--has been handed to the
- * messenger to send to its target osd). It is called again when
- * we've received a response message indicating the request is
- * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
- * is completed early (and unsuccessfully) due to a timeout or
- * interrupt.
- *
- * This is used if we requested both an ACK and ONDISK commit reply
- * from the OSD.
- */
-static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
-{
- struct ceph_inode_info *ci = ceph_inode(req->r_inode);
-
- dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
- unsafe ? "un" : "");
- if (unsafe) {
- ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
- spin_lock(&ci->i_unsafe_lock);
- list_add_tail(&req->r_unsafe_item,
- &ci->i_unsafe_writes);
- spin_unlock(&ci->i_unsafe_lock);
-
- complete_all(&req->r_completion);
- } else {
- spin_lock(&ci->i_unsafe_lock);
- list_del_init(&req->r_unsafe_item);
- spin_unlock(&ci->i_unsafe_lock);
- ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
- }
-}
-
-/*
- * Wait on any unsafe replies for the given inode. First wait on the
- * newest request, and make that the upper bound. Then, if there are
- * more requests, keep waiting on the oldest as long as it is still older
- * than the original request.
- */
-void ceph_sync_write_wait(struct inode *inode)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- struct list_head *head = &ci->i_unsafe_writes;
- struct ceph_osd_request *req;
- u64 last_tid;
-
- if (!S_ISREG(inode->i_mode))
- return;
-
- spin_lock(&ci->i_unsafe_lock);
- if (list_empty(head))
- goto out;
-
- /* set upper bound as _last_ entry in chain */
-
- req = list_last_entry(head, struct ceph_osd_request,
- r_unsafe_item);
- last_tid = req->r_tid;
-
- do {
- ceph_osdc_get_request(req);
- spin_unlock(&ci->i_unsafe_lock);
-
- dout("sync_write_wait on tid %llu (until %llu)\n",
- req->r_tid, last_tid);
- wait_for_completion(&req->r_done_completion);
- ceph_osdc_put_request(req);
-
- spin_lock(&ci->i_unsafe_lock);
- /*
- * from here on look at first entry in chain, since we
- * only want to wait for anything older than last_tid
- */
- if (list_empty(head))
- break;
- req = list_first_entry(head, struct ceph_osd_request,
- r_unsafe_item);
- } while (req->r_tid < last_tid);
-out:
- spin_unlock(&ci->i_unsafe_lock);
-}
-
static ssize_t
ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct ceph_snap_context *snapc,
@@ -915,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (ret2 < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret2);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
} else {
flags = CEPH_OSD_FLAG_READ;
}
@@ -1116,10 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (ret < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ACK;
+ flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
while ((len = iov_iter_count(from)) > 0) {
size_t left;
@@ -1165,8 +1076,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
goto out;
}
- /* get a second commit callback */
- req->r_unsafe_callback = ceph_sync_write_unsafe;
req->r_inode = inode;
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
@@ -1616,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode,
ceph_vino(inode),
offset, length,
0, 1, op,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5e659d0..d449e1c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_rdcache_gen = 0;
ci->i_rdcache_revoking = 0;
- INIT_LIST_HEAD(&ci->i_unsafe_writes);
INIT_LIST_HEAD(&ci->i_unsafe_dirops);
INIT_LIST_HEAD(&ci->i_unsafe_iops);
spin_lock_init(&ci->i_unsafe_lock);
@@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode)
return 1;
}
-void ceph_evict_inode(struct inode *inode)
-{
- /* wait unsafe sync writes */
- ceph_sync_write_wait(inode);
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
-}
-
static inline blkcnt_t calc_inode_blocks(u64 size)
{
return (size + (1<<9) - 1) >> 9;
@@ -1016,7 +1007,9 @@ out:
static void update_dentry_lease(struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
- unsigned long from_time)
+ unsigned long from_time,
+ struct ceph_vino *tgt_vino,
+ struct ceph_vino *dir_vino)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
long unsigned duration = le32_to_cpu(lease->duration_ms);
@@ -1024,13 +1017,27 @@ static void update_dentry_lease(struct dentry *dentry,
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
struct inode *dir;
+ /*
+ * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
+ * we expect a negative dentry.
+ */
+ if (!tgt_vino && d_really_is_positive(dentry))
+ return;
+
+ if (tgt_vino && (d_really_is_negative(dentry) ||
+ !ceph_ino_compare(d_inode(dentry), tgt_vino)))
+ return;
+
spin_lock(&dentry->d_lock);
dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
dentry, duration, ttl);
- /* make lease_rdcache_gen match directory */
dir = d_inode(dentry->d_parent);
+ /* make sure parent matches dir_vino */
+ if (!ceph_ino_compare(dir, dir_vino))
+ goto out_unlock;
+
/* only track leases on regular dentries */
if (ceph_snap(dir) != CEPH_NOSNAP)
goto out_unlock;
@@ -1108,61 +1115,27 @@ out:
*
* Called with snap_rwsem (read).
*/
-int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
- struct ceph_mds_session *session)
+int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
{
+ struct ceph_mds_session *session = req->r_session;
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct inode *in = NULL;
- struct ceph_vino vino;
+ struct ceph_vino tvino, dvino;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
dout("fill_trace %p is_dentry %d is_target %d\n", req,
rinfo->head->is_dentry, rinfo->head->is_target);
-#if 0
- /*
- * Debugging hook:
- *
- * If we resend completed ops to a recovering mds, we get no
- * trace. Since that is very rare, pretend this is the case
- * to ensure the 'no trace' handlers in the callers behave.
- *
- * Fill in inodes unconditionally to avoid breaking cap
- * invariants.
- */
- if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
- pr_info("fill_trace faking empty trace on %lld %s\n",
- req->r_tid, ceph_mds_op_name(rinfo->head->op));
- if (rinfo->head->is_dentry) {
- rinfo->head->is_dentry = 0;
- err = fill_inode(req->r_locked_dir,
- &rinfo->diri, rinfo->dirfrag,
- session, req->r_request_started, -1);
- }
- if (rinfo->head->is_target) {
- rinfo->head->is_target = 0;
- ininfo = rinfo->targeti.in;
- vino.ino = le64_to_cpu(ininfo->ino);
- vino.snap = le64_to_cpu(ininfo->snapid);
- in = ceph_get_inode(sb, vino);
- err = fill_inode(in, &rinfo->targeti, NULL,
- session, req->r_request_started,
- req->r_fmode);
- iput(in);
- }
- }
-#endif
-
if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
dout("fill_trace reply is empty!\n");
- if (rinfo->head->result == 0 && req->r_locked_dir)
+ if (rinfo->head->result == 0 && req->r_parent)
ceph_invalidate_dir_request(req);
return 0;
}
if (rinfo->head->is_dentry) {
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
if (dir) {
err = fill_inode(dir, NULL,
@@ -1188,8 +1161,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
dname.name = rinfo->dname;
dname.len = rinfo->dname_len;
dname.hash = full_name_hash(parent, dname.name, dname.len);
- vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
retry_lookup:
dn = d_lookup(parent, &dname);
dout("d_lookup on parent=%p name=%.*s got %p\n",
@@ -1206,8 +1179,8 @@ retry_lookup:
}
err = 0;
} else if (d_really_is_positive(dn) &&
- (ceph_ino(d_inode(dn)) != vino.ino ||
- ceph_snap(d_inode(dn)) != vino.snap)) {
+ (ceph_ino(d_inode(dn)) != tvino.ino ||
+ ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
d_delete(dn);
@@ -1221,10 +1194,10 @@ retry_lookup:
}
if (rinfo->head->is_target) {
- vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
- in = ceph_get_inode(sb, vino);
+ in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
@@ -1233,8 +1206,8 @@ retry_lookup:
err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
session, req->r_request_started,
- (!req->r_aborted && rinfo->head->result == 0) ?
- req->r_fmode : -1,
+ (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+ rinfo->head->result == 0) ? req->r_fmode : -1,
&req->r_caps_reservation);
if (err < 0) {
pr_err("fill_inode badness %p %llx.%llx\n",
@@ -1247,8 +1220,9 @@ retry_lookup:
* ignore null lease/binding on snapdir ENOENT, or else we
* will have trouble splicing in the virtual snapdir later
*/
- if (rinfo->head->is_dentry && !req->r_aborted &&
- req->r_locked_dir &&
+ if (rinfo->head->is_dentry &&
+ !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+ test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
fsc->mount_options->snapdir_name,
req->r_dentry->d_name.len))) {
@@ -1257,17 +1231,19 @@ retry_lookup:
* mknod symlink mkdir : null -> new inode
* unlink : linked -> null
*/
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
struct dentry *dn = req->r_dentry;
bool have_dir_cap, have_lease;
BUG_ON(!dn);
BUG_ON(!dir);
BUG_ON(d_inode(dn->d_parent) != dir);
- BUG_ON(ceph_ino(dir) !=
- le64_to_cpu(rinfo->diri.in->ino));
- BUG_ON(ceph_snap(dir) !=
- le64_to_cpu(rinfo->diri.in->snapid));
+
+ dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+ dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+ BUG_ON(ceph_ino(dir) != dvino.ino);
+ BUG_ON(ceph_snap(dir) != dvino.snap);
/* do we have a lease on the whole dir? */
have_dir_cap =
@@ -1319,12 +1295,13 @@ retry_lookup:
ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
- } else {
- if (have_lease && d_unhashed(dn))
+ } else if (have_lease) {
+ if (d_unhashed(dn))
d_add(dn, NULL);
update_dentry_lease(dn, rinfo->dlease,
session,
- req->r_request_started);
+ req->r_request_started,
+ NULL, &dvino);
}
goto done;
}
@@ -1347,15 +1324,19 @@ retry_lookup:
have_lease = false;
}
- if (have_lease)
+ if (have_lease) {
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
update_dentry_lease(dn, rinfo->dlease, session,
- req->r_request_started);
+ req->r_request_started,
+ &tvino, &dvino);
+ }
dout(" final dn %p\n", dn);
- } else if (!req->r_aborted &&
- (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
- req->r_op == CEPH_MDS_OP_MKSNAP)) {
+ } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+ req->r_op == CEPH_MDS_OP_MKSNAP) &&
+ !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
struct dentry *dn = req->r_dentry;
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
/* fill out a snapdir LOOKUPSNAP dentry */
BUG_ON(!dn);
@@ -1370,6 +1351,26 @@ retry_lookup:
goto done;
}
req->r_dentry = dn; /* may have spliced */
+ } else if (rinfo->head->is_dentry) {
+ struct ceph_vino *ptvino = NULL;
+
+ if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
+ le32_to_cpu(rinfo->dlease->duration_ms)) {
+ dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+ dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+ if (rinfo->head->is_target) {
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ ptvino = &tvino;
+ }
+
+ update_dentry_lease(req->r_dentry, rinfo->dlease,
+ session, req->r_request_started, ptvino,
+ &dvino);
+ } else {
+ dout("%s: no dentry lease or dir cap\n", __func__);
+ }
}
done:
dout("fill_trace done err=%d\n", err);
@@ -1478,7 +1479,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
- if (req->r_aborted)
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
return readdir_prepopulate_inodes_only(req, session);
if (rinfo->hash_order && req->r_path2) {
@@ -1523,14 +1524,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
/* FIXME: release caps/leases if error occurs */
for (i = 0; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
- struct ceph_vino vino;
+ struct ceph_vino tvino, dvino;
dname.name = rde->name;
dname.len = rde->name_len;
dname.hash = full_name_hash(parent, dname.name, dname.len);
- vino.ino = le64_to_cpu(rde->inode.in->ino);
- vino.snap = le64_to_cpu(rde->inode.in->snapid);
+ tvino.ino = le64_to_cpu(rde->inode.in->ino);
+ tvino.snap = le64_to_cpu(rde->inode.in->snapid);
if (rinfo->hash_order) {
u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
@@ -1559,8 +1560,8 @@ retry_lookup:
goto out;
}
} else if (d_really_is_positive(dn) &&
- (ceph_ino(d_inode(dn)) != vino.ino ||
- ceph_snap(d_inode(dn)) != vino.snap)) {
+ (ceph_ino(d_inode(dn)) != tvino.ino ||
+ ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
d_delete(dn);
@@ -1572,7 +1573,7 @@ retry_lookup:
if (d_really_is_positive(dn)) {
in = d_inode(dn);
} else {
- in = ceph_get_inode(parent->d_sb, vino);
+ in = ceph_get_inode(parent->d_sb, tvino);
if (IS_ERR(in)) {
dout("new_inode badness\n");
d_drop(dn);
@@ -1617,8 +1618,9 @@ retry_lookup:
ceph_dentry(dn)->offset = rde->offset;
+ dvino = ceph_vino(d_inode(parent));
update_dentry_lease(dn, rde->lease, req->r_session,
- req->r_request_started);
+ req->r_request_started, &tvino, &dvino);
if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
ret = fill_readdir_cache(d_inode(parent), dn,
@@ -1632,7 +1634,7 @@ next_item:
}
out:
if (err == 0 && skipped == 0) {
- req->r_did_prepopulate = true;
+ set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags);
req->r_readdir_cache_idx = cache_ctl.index;
}
ceph_readdir_cache_release(&cache_ctl);
@@ -1720,7 +1722,7 @@ static void ceph_invalidate_work(struct work_struct *work)
mutex_lock(&ci->i_truncate_mutex);
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
@@ -2185,10 +2187,10 @@ int ceph_permission(struct inode *inode, int mask)
* Get all attributes. Hopefully somedata we'll have a statlite()
* and can limit the fields we require to be accurate.
*/
-int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int ceph_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
int err;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 7d752d5..4c9c72f 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
l.stripe_count = ci->i_layout.stripe_count;
l.object_size = ci->i_layout.object_size;
l.data_pool = ci->i_layout.pool_id;
- l.preferred_osd = (s32)-1;
+ l.preferred_osd = -1;
if (copy_to_user(arg, &l, sizeof(l)))
return -EFAULT;
}
@@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
nl.data_pool = ci->i_layout.pool_id;
/* this is obsolete, and always -1 */
- nl.preferred_osd = le64_to_cpu(-1);
+ nl.preferred_osd = -1;
err = __validate_layout(mdsc, &nl);
if (err)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c9d2e55..c681762 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -547,8 +547,8 @@ void ceph_mdsc_release_request(struct kref *kref)
ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
iput(req->r_inode);
}
- if (req->r_locked_dir)
- ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_parent)
+ ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
@@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ /* Never leave an unregistered request on an unsafe list! */
+ list_del_init(&req->r_unsafe_item);
+
if (req->r_tid == mdsc->oldest_tid) {
struct rb_node *p = rb_next(&req->r_node);
mdsc->oldest_tid = 0;
@@ -644,13 +647,15 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
erase_request(&mdsc->request_tree, req);
- if (req->r_unsafe_dir && req->r_got_unsafe) {
+ if (req->r_unsafe_dir &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_dir_item);
spin_unlock(&ci->i_unsafe_lock);
}
- if (req->r_target_inode && req->r_got_unsafe) {
+ if (req->r_target_inode &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_target_item);
@@ -668,6 +673,28 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
}
/*
+ * Walk back up the dentry tree until we hit a dentry representing a
+ * non-snapshot inode. We do this using the rcu_read_lock (which must be held
+ * when calling this) to ensure that the objects won't disappear while we're
+ * working with them. Once we hit a candidate dentry, we attempt to take a
+ * reference to it, and return that as the result.
+ */
+static struct inode *get_nonsnap_parent(struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+
+ while (dentry && !IS_ROOT(dentry)) {
+ inode = d_inode_rcu(dentry);
+ if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
+ break;
+ dentry = dentry->d_parent;
+ }
+ if (inode)
+ inode = igrab(inode);
+ return inode;
+}
+
+/*
* Choose mds to send request to next. If there is a hint set in the
* request (e.g., due to a prior forward hint from the mds), use that.
* Otherwise, consult frag tree and/or caps to identify the
@@ -675,19 +702,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*
* Called under mdsc->mutex.
*/
-static struct dentry *get_nonsnap_parent(struct dentry *dentry)
-{
- /*
- * we don't need to worry about protecting the d_parent access
- * here because we never renaming inside the snapped namespace
- * except to resplice to another snapdir, and either the old or new
- * result is a valid result.
- */
- while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
- dentry = dentry->d_parent;
- return dentry;
-}
-
static int __choose_mds(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
@@ -697,7 +711,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
int mode = req->r_direct_mode;
int mds = -1;
u32 hash = req->r_direct_hash;
- bool is_hash = req->r_direct_is_hash;
+ bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
/*
* is there a specific mds we should try? ignore hint if we have
@@ -717,30 +731,39 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode = NULL;
if (req->r_inode) {
inode = req->r_inode;
+ ihold(inode);
} else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
- struct dentry *parent = req->r_dentry->d_parent;
- struct inode *dir = d_inode(parent);
+ struct dentry *parent;
+ struct inode *dir;
+
+ rcu_read_lock();
+ parent = req->r_dentry->d_parent;
+ dir = req->r_parent ? : d_inode_rcu(parent);
- if (dir->i_sb != mdsc->fsc->sb) {
- /* not this fs! */
+ if (!dir || dir->i_sb != mdsc->fsc->sb) {
+ /* not this fs or parent went negative */
inode = d_inode(req->r_dentry);
+ if (inode)
+ ihold(inode);
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests
* based on parent dir inode */
- struct dentry *dn = get_nonsnap_parent(parent);
- inode = d_inode(dn);
+ inode = get_nonsnap_parent(parent);
dout("__choose_mds using nonsnap parent %p\n", inode);
} else {
/* dentry target */
inode = d_inode(req->r_dentry);
if (!inode || mode == USE_AUTH_MDS) {
/* dir + name */
- inode = dir;
+ inode = igrab(dir);
hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true;
+ } else {
+ ihold(inode);
}
}
+ rcu_read_unlock();
}
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
@@ -769,7 +792,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
(int)r, frag.ndist);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
/* since this file/dir wasn't known to be
@@ -784,7 +807,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), frag.frag, mds);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
}
}
@@ -797,6 +820,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
+ iput(inode);
goto random;
}
mds = cap->session->s_mds;
@@ -804,6 +828,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), mds,
cap == ci->i_auth_cap ? "auth " : "", cap);
spin_unlock(&ci->i_ceph_lock);
+out:
+ iput(inode);
return mds;
random:
@@ -1036,7 +1062,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
while (!list_empty(&session->s_unsafe)) {
req = list_first_entry(&session->s_unsafe,
struct ceph_mds_request, r_unsafe_item);
- list_del_init(&req->r_unsafe_item);
pr_warn_ratelimited(" dropping unsafe request %llu\n",
req->r_tid);
__unregister_request(mdsc, req);
@@ -1146,7 +1171,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
if (ci->i_wrbuffer_ref > 0 &&
- ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
invalidate = true;
while (!list_empty(&ci->i_cap_flush_list)) {
@@ -1775,18 +1800,23 @@ retry:
return path;
}
-static int build_dentry_path(struct dentry *dentry,
+static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
int *pfreepath)
{
char *path;
- if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
- *pino = ceph_ino(d_inode(dentry->d_parent));
+ rcu_read_lock();
+ if (!dir)
+ dir = d_inode_rcu(dentry->d_parent);
+ if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+ *pino = ceph_ino(dir);
+ rcu_read_unlock();
*ppath = dentry->d_name.name;
*ppathlen = dentry->d_name.len;
return 0;
}
+ rcu_read_unlock();
path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -1822,8 +1852,8 @@ static int build_inode_path(struct inode *inode,
* an explicit ino+path.
*/
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
- const char *rpath, u64 rino,
- const char **ppath, int *pathlen,
+ struct inode *rdiri, const char *rpath,
+ u64 rino, const char **ppath, int *pathlen,
u64 *ino, int *freepath)
{
int r = 0;
@@ -1833,7 +1863,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
ceph_snap(rinode));
} else if (rdentry) {
- r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+ r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
+ freepath);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -1866,7 +1897,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
int ret;
ret = set_request_path_attr(req->r_inode, req->r_dentry,
- req->r_path1, req->r_ino1.ino,
+ req->r_parent, req->r_path1, req->r_ino1.ino,
&path1, &pathlen1, &ino1, &freepath1);
if (ret < 0) {
msg = ERR_PTR(ret);
@@ -1874,6 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
}
ret = set_request_path_attr(NULL, req->r_old_dentry,
+ req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
&path2, &pathlen2, &ino2, &freepath2);
if (ret < 0) {
@@ -1927,10 +1959,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
mds, req->r_inode_drop, req->r_inode_unless, 0);
if (req->r_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_dentry,
- mds, req->r_dentry_drop, req->r_dentry_unless);
+ req->r_parent, mds, req->r_dentry_drop,
+ req->r_dentry_unless);
if (req->r_old_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
- mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+ req->r_old_dentry_dir, mds,
+ req->r_old_dentry_drop,
+ req->r_old_dentry_unless);
if (req->r_old_inode_drop)
releases += ceph_encode_inode_release(&p,
d_inode(req->r_old_dentry),
@@ -2012,7 +2047,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
- if (req->r_got_unsafe) {
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p;
/*
* Replay. Do not regenerate message (and rebuild
@@ -2061,16 +2096,16 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead = msg->front.iov_base;
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
- if (req->r_locked_dir)
+ if (req->r_parent)
flags |= CEPH_MDS_FLAG_WANT_DENTRY;
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
rhead->ino = 0;
- dout(" r_locked_dir = %p\n", req->r_locked_dir);
+ dout(" r_parent = %p\n", req->r_parent);
return 0;
}
@@ -2084,8 +2119,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = 0;
- if (req->r_err || req->r_got_result) {
- if (req->r_aborted)
+ if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
__unregister_request(mdsc, req);
goto out;
}
@@ -2096,12 +2131,12 @@ static int __do_request(struct ceph_mds_client *mdsc,
err = -EIO;
goto finish;
}
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
dout("do_request forced umount\n");
err = -EIO;
goto finish;
}
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
if (mdsc->mdsmap_err) {
err = mdsc->mdsmap_err;
dout("do_request mdsmap err %d\n", err);
@@ -2215,7 +2250,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node);
p = rb_next(p);
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
continue;
if (req->r_attempts > 0)
continue; /* only new requests */
@@ -2250,11 +2285,11 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
dout("do_request on %p\n", req);
- /* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
+ /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
if (req->r_inode)
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
- if (req->r_locked_dir)
- ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_parent)
+ ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
if (req->r_old_dentry_dir)
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
@@ -2289,7 +2324,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
/* only abort if we didn't race with a real reply */
- if (req->r_got_result) {
+ if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
err = le32_to_cpu(req->r_reply_info.head->result);
} else if (err < 0) {
dout("aborted request %lld with %d\n", req->r_tid, err);
@@ -2301,10 +2336,10 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
*/
mutex_lock(&req->r_fill_mutex);
req->r_err = err;
- req->r_aborted = true;
+ set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
mutex_unlock(&req->r_fill_mutex);
- if (req->r_locked_dir &&
+ if (req->r_parent &&
(req->r_op & CEPH_MDS_OP_WRITE))
ceph_invalidate_dir_request(req);
} else {
@@ -2323,7 +2358,7 @@ out:
*/
void ceph_invalidate_dir_request(struct ceph_mds_request *req)
{
- struct inode *inode = req->r_locked_dir;
+ struct inode *inode = req->r_parent;
dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
@@ -2379,14 +2414,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
}
/* dup? */
- if ((req->r_got_unsafe && !head->safe) ||
- (req->r_got_safe && head->safe)) {
+ if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
+ (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
pr_warn("got a dup %s reply on %llu from mds%d\n",
head->safe ? "safe" : "unsafe", tid, mds);
mutex_unlock(&mdsc->mutex);
goto out;
}
- if (req->r_got_safe) {
+ if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
pr_warn("got unsafe after safe on %llu from mds%d\n",
tid, mds);
mutex_unlock(&mdsc->mutex);
@@ -2425,10 +2460,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
- req->r_got_safe = true;
+ set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
__unregister_request(mdsc, req);
- if (req->r_got_unsafe) {
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
/*
* We already handled the unsafe response, now do the
* cleanup. No need to examine the response; the MDS
@@ -2437,7 +2472,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
* useful we could do with a revised return value.
*/
dout("got safe reply %llu, mds%d\n", tid, mds);
- list_del_init(&req->r_unsafe_item);
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
@@ -2446,7 +2480,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
goto out;
}
} else {
- req->r_got_unsafe = true;
+ set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
if (req->r_unsafe_dir) {
struct ceph_inode_info *ci =
@@ -2486,7 +2520,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* insert trace into our cache */
mutex_lock(&req->r_fill_mutex);
current->journal_info = req;
- err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+ err = ceph_fill_trace(mdsc->fsc->sb, req);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
req->r_op == CEPH_MDS_OP_LSSNAP))
@@ -2500,7 +2534,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (realm)
ceph_put_snap_realm(mdsc, realm);
- if (err == 0 && req->r_got_unsafe && req->r_target_inode) {
+ if (err == 0 && req->r_target_inode &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
spin_lock(&ci->i_unsafe_lock);
list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
@@ -2508,12 +2543,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
}
out_err:
mutex_lock(&mdsc->mutex);
- if (!req->r_aborted) {
+ if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
if (err) {
req->r_err = err;
} else {
req->r_reply = ceph_msg_get(msg);
- req->r_got_result = true;
+ set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
}
} else {
dout("reply arrived after request %lld was aborted\n", tid);
@@ -2557,7 +2592,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
goto out; /* dup reply? */
}
- if (req->r_aborted) {
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
dout("forward tid %llu aborted, unregistering\n", tid);
__unregister_request(mdsc, req);
} else if (fwd_seq <= req->r_num_fwd) {
@@ -2567,7 +2602,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
/* resend. forward race not possible; mds would drop */
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
BUG_ON(req->r_err);
- BUG_ON(req->r_got_result);
+ BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
req->r_attempts = 0;
req->r_num_fwd = fwd_seq;
req->r_resend_mds = next_mds;
@@ -2732,7 +2767,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node);
p = rb_next(p);
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
continue;
if (req->r_attempts == 0)
continue; /* only old requests */
@@ -3556,7 +3591,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@@ -3587,7 +3622,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
*/
static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
{
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return true;
return atomic_read(&mdsc->num_sessions) <= skipped;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3c6f77b..ac0475a 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -202,9 +202,18 @@ struct ceph_mds_request {
char *r_path1, *r_path2;
struct ceph_vino r_ino1, r_ino2;
- struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
+ struct inode *r_parent; /* parent dir inode */
struct inode *r_target_inode; /* resulting inode */
+#define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */
+#define CEPH_MDS_R_ABORTED (2) /* call was aborted */
+#define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */
+#define CEPH_MDS_R_GOT_SAFE (4) /* got a safe reply */
+#define CEPH_MDS_R_GOT_RESULT (5) /* got a result */
+#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */
+#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */
+ unsigned long r_req_flags;
+
struct mutex r_fill_mutex;
union ceph_mds_request_args r_args;
@@ -216,7 +225,6 @@ struct ceph_mds_request {
/* for choosing which mds to send this request to */
int r_direct_mode;
u32 r_direct_hash; /* choose dir frag based on this dentry hash */
- bool r_direct_is_hash; /* true if r_direct_hash is valid */
/* data payload is used for xattr ops */
struct ceph_pagelist *r_pagelist;
@@ -234,7 +242,6 @@ struct ceph_mds_request {
struct ceph_mds_reply_info_parsed r_reply_info;
struct page *r_locked_page;
int r_err;
- bool r_aborted;
unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
unsigned long r_started; /* start time to measure timeout against */
@@ -262,9 +269,7 @@ struct ceph_mds_request {
ceph_mds_request_callback_t r_callback;
ceph_mds_request_wait_callback_t r_wait_for_completion;
struct list_head r_unsafe_item; /* per-session unsafe list item */
- bool r_got_unsafe, r_got_safe, r_got_result;
- bool r_did_prepopulate;
long long r_dir_release_cnt;
long long r_dir_ordered_cnt;
int r_readdir_cache_idx;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6bd20d7..0ec8d01 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = {
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
.drop_inode = ceph_drop_inode,
- .evict_inode = ceph_evict_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
@@ -952,6 +951,14 @@ static int ceph_register_bdi(struct super_block *sb,
fsc->backing_dev_info.ra_pages =
VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+ if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
+ fsc->mount_options->rsize >= PAGE_SIZE)
+ fsc->backing_dev_info.io_pages =
+ (fsc->mount_options->rsize + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
+ else if (fsc->mount_options->rsize == 0)
+ fsc->backing_dev_info.io_pages = ULONG_MAX;
+
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
atomic_long_inc_return(&bdi_seq));
if (!err)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3373b61..fe6b9cf 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -45,8 +45,8 @@
#define ceph_test_mount_opt(fsc, opt) \
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
-#define CEPH_RSIZE_DEFAULT 0 /* max read size */
-#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */
+#define CEPH_RSIZE_DEFAULT (64*1024*1024) /* max read size */
+#define CEPH_RASIZE_DEFAULT (8192*1024) /* max readahead */
#define CEPH_MAX_READDIR_DEFAULT 1024
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -343,7 +343,6 @@ struct ceph_inode_info {
u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
- struct list_head i_unsafe_writes; /* uncommitted sync writes */
struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
struct list_head i_unsafe_iops; /* uncommitted mds inode ops */
spinlock_t i_unsafe_lock;
@@ -602,7 +601,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
}
/* what the mds thinks we want */
-extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
+extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
extern void ceph_caps_init(struct ceph_mds_client *mdsc);
extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
@@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops;
extern struct inode *ceph_alloc_inode(struct super_block *sb);
extern void ceph_destroy_inode(struct inode *inode);
extern int ceph_drop_inode(struct inode *inode);
-extern void ceph_evict_inode(struct inode *inode);
extern struct inode *ceph_get_inode(struct super_block *sb,
struct ceph_vino vino);
@@ -764,8 +762,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec *ctime,
struct timespec *mtime, struct timespec *atime);
extern int ceph_fill_trace(struct super_block *sb,
- struct ceph_mds_request *req,
- struct ceph_mds_session *session);
+ struct ceph_mds_request *req);
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct ceph_mds_session *session);
@@ -787,8 +784,8 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
extern int ceph_permission(struct inode *inode, int mask);
extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
-extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
+extern int ceph_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
/* xattr.c */
int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
@@ -904,6 +901,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
int mds, int drop, int unless, int force);
extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+ struct inode *dir,
int mds, int drop, int unless);
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
@@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
extern int ceph_release(struct inode *inode, struct file *filp);
extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
char *data, size_t len);
-extern void ceph_sync_write_wait(struct inode *inode);
+
/* dir.c */
extern const struct file_operations ceph_dir_fops;
extern const struct file_operations ceph_snapdir_fops;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 9156be5..6b61df1 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -303,7 +303,9 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
* gives us the latter, so we must adjust the result.
*/
mnt = ERR_PTR(-ENOMEM);
- full_path = build_path_from_dentry(mntpt);
+
+ /* always use tree name prefix */
+ full_path = build_path_from_dentry_optional_prefix(mntpt, true);
if (full_path == NULL)
goto cdda_exit;
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 479bc0a..3d7298c 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -130,10 +130,10 @@ wchar_t cifs_toupper(wchar_t in);
* Returns:
* Address of the first string
*/
-static inline wchar_t *
-UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
+static inline __le16 *
+UniStrcat(__le16 *ucs1, const __le16 *ucs2)
{
- wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */
+ __le16 *anchor = ucs1; /* save a pointer to start of ucs1 */
while (*ucs1++) ; /* To end of first string */
ucs1--; /* Return to the null */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c9c00a8..da717fe 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -83,7 +83,7 @@ extern int cifs_revalidate_dentry(struct dentry *);
extern int cifs_invalidate_mapping(struct inode *inode);
extern int cifs_revalidate_mapping(struct inode *inode);
extern int cifs_zap_mapping(struct inode *inode);
-extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int cifs_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int cifs_setattr(struct dentry *, struct iattr *);
extern const struct inode_operations cifs_file_inode_ops;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1a90bb3..d42dd32 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -443,6 +443,9 @@ struct smb_version_operations {
int (*is_transform_hdr)(void *buf);
int (*receive_transform)(struct TCP_Server_Info *,
struct mid_q_entry **);
+ enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
+ enum securityEnum);
+
};
struct smb_version_values {
@@ -822,7 +825,7 @@ struct cifs_ses {
int ses_count; /* reference counter */
enum statusEnum status;
unsigned overrideSecFlg; /* if non-zero override global sec flags */
- __u16 ipc_tid; /* special tid for connection to IPC share */
+ __u32 ipc_tid; /* special tid for connection to IPC share */
char *serverOS; /* name of operating system underlying server */
char *serverNOS; /* name of network operating system of server */
char *serverDomain; /* security realm of server */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index f5b8730..1ce733f 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2086,17 +2086,21 @@ typedef struct dfs_referral_level_3 { /* version 4 is same, + one flag bit */
__u8 ServiceSiteGuid[16]; /* MBZ, ignored */
} __attribute__((packed)) REFERRAL3;
-typedef struct smb_com_transaction_get_dfs_refer_rsp {
- struct smb_hdr hdr; /* wct = 10 */
- struct trans2_resp t2;
- __u16 ByteCount;
- __u8 Pad;
+struct get_dfs_referral_rsp {
__le16 PathConsumed;
__le16 NumberOfReferrals;
__le32 DFSFlags;
REFERRAL3 referrals[1]; /* array of level 3 dfs_referral structures */
/* followed by the strings pointed to by the referral structures */
-} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_RSP;
+} __packed;
+
+typedef struct smb_com_transaction_get_dfs_refer_rsp {
+ struct smb_hdr hdr; /* wct = 10 */
+ struct trans2_resp t2;
+ __u16 ByteCount;
+ __u8 Pad;
+ struct get_dfs_referral_rsp dfs_data;
+} __packed TRANSACTION2_GET_DFS_REFER_RSP;
/* DFS Flags */
#define DFSREF_REFERRAL_SERVER 0x00000001 /* all targets are DFS roots */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 406d2c1..97e5d23 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -61,6 +61,8 @@ extern void exit_cifs_idmap(void);
extern int init_cifs_spnego(void);
extern void exit_cifs_spnego(void);
extern char *build_path_from_dentry(struct dentry *);
+extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry,
+ bool prefix);
extern char *cifs_build_path_to_root(struct smb_vol *vol,
struct cifs_sb_info *cifs_sb,
struct cifs_tcon *tcon,
@@ -284,6 +286,11 @@ extern int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
const struct nls_table *nls_codepage,
unsigned int *num_referrals,
struct dfs_info3_param **referrals, int remap);
+extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
+ unsigned int *num_of_nodes,
+ struct dfs_info3_param **target_nodes,
+ const struct nls_table *nls_codepage, int remap,
+ const char *searchName, bool is_unicode);
extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
struct smb_vol *vol);
@@ -526,4 +533,6 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
int __cifs_calc_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server, char *signature,
struct shash_desc *shash);
+enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
+ enum securityEnum);
#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f5099fb..0669506 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4786,117 +4786,6 @@ GetInodeNumOut:
return rc;
}
-/* parses DFS refferal V3 structure
- * caller is responsible for freeing target_nodes
- * returns:
- * on success - 0
- * on failure - errno
- */
-static int
-parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
- unsigned int *num_of_nodes,
- struct dfs_info3_param **target_nodes,
- const struct nls_table *nls_codepage, int remap,
- const char *searchName)
-{
- int i, rc = 0;
- char *data_end;
- bool is_unicode;
- struct dfs_referral_level_3 *ref;
-
- if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
- is_unicode = true;
- else
- is_unicode = false;
- *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
-
- if (*num_of_nodes < 1) {
- cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n",
- *num_of_nodes);
- rc = -EINVAL;
- goto parse_DFS_referrals_exit;
- }
-
- ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
- if (ref->VersionNumber != cpu_to_le16(3)) {
- cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
- le16_to_cpu(ref->VersionNumber));
- rc = -EINVAL;
- goto parse_DFS_referrals_exit;
- }
-
- /* get the upper boundary of the resp buffer */
- data_end = (char *)(&(pSMBr->PathConsumed)) +
- le16_to_cpu(pSMBr->t2.DataCount);
-
- cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n",
- *num_of_nodes, le32_to_cpu(pSMBr->DFSFlags));
-
- *target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param),
- GFP_KERNEL);
- if (*target_nodes == NULL) {
- rc = -ENOMEM;
- goto parse_DFS_referrals_exit;
- }
-
- /* collect necessary data from referrals */
- for (i = 0; i < *num_of_nodes; i++) {
- char *temp;
- int max_len;
- struct dfs_info3_param *node = (*target_nodes)+i;
-
- node->flags = le32_to_cpu(pSMBr->DFSFlags);
- if (is_unicode) {
- __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
- GFP_KERNEL);
- if (tmp == NULL) {
- rc = -ENOMEM;
- goto parse_DFS_referrals_exit;
- }
- cifsConvertToUTF16((__le16 *) tmp, searchName,
- PATH_MAX, nls_codepage, remap);
- node->path_consumed = cifs_utf16_bytes(tmp,
- le16_to_cpu(pSMBr->PathConsumed),
- nls_codepage);
- kfree(tmp);
- } else
- node->path_consumed = le16_to_cpu(pSMBr->PathConsumed);
-
- node->server_type = le16_to_cpu(ref->ServerType);
- node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags);
-
- /* copy DfsPath */
- temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
- max_len = data_end - temp;
- node->path_name = cifs_strndup_from_utf16(temp, max_len,
- is_unicode, nls_codepage);
- if (!node->path_name) {
- rc = -ENOMEM;
- goto parse_DFS_referrals_exit;
- }
-
- /* copy link target UNC */
- temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
- max_len = data_end - temp;
- node->node_name = cifs_strndup_from_utf16(temp, max_len,
- is_unicode, nls_codepage);
- if (!node->node_name) {
- rc = -ENOMEM;
- goto parse_DFS_referrals_exit;
- }
-
- ref++;
- }
-
-parse_DFS_referrals_exit:
- if (rc) {
- free_dfs_info_array(*target_nodes, *num_of_nodes);
- *target_nodes = NULL;
- *num_of_nodes = 0;
- }
- return rc;
-}
-
int
CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
const char *search_name, struct dfs_info3_param **target_nodes,
@@ -4993,9 +4882,11 @@ getDFSRetry:
get_bcc(&pSMBr->hdr), le16_to_cpu(pSMBr->t2.DataOffset));
/* parse returned result into more usable form */
- rc = parse_DFS_referrals(pSMBr, num_of_nodes,
- target_nodes, nls_codepage, remap,
- search_name);
+ rc = parse_dfs_referrals(&pSMBr->dfs_data,
+ le16_to_cpu(pSMBr->t2.DataCount),
+ num_of_nodes, target_nodes, nls_codepage,
+ remap, search_name,
+ (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) != 0);
GetDFSRefExit:
cifs_buf_release(pSMB);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 777ad9f..9ae695a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -21,6 +21,7 @@
#include <linux/fs.h>
#include <linux/net.h>
#include <linux/string.h>
+#include <linux/sched/signal.h>
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/slab.h>
@@ -2073,7 +2074,8 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
* that was specified, or "Unspecified" if that sectype was not
* compatible with the given NEGOTIATE request.
*/
- if (select_sectype(server, vol->sectype) == Unspecified)
+ if (server->ops->select_sectype(server, vol->sectype)
+ == Unspecified)
return false;
/*
@@ -2455,7 +2457,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
}
down_read(&key->sem);
- upayload = user_key_payload(key);
+ upayload = user_key_payload_locked(key);
if (IS_ERR_OR_NULL(upayload)) {
rc = upayload ? PTR_ERR(upayload) : -EINVAL;
goto out_key_put;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 2c227a9..56366e9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -81,6 +81,17 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
char *
build_path_from_dentry(struct dentry *direntry)
{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ bool prefix = tcon->Flags & SMB_SHARE_IS_IN_DFS;
+
+ return build_path_from_dentry_optional_prefix(direntry,
+ prefix);
+}
+
+char *
+build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix)
+{
struct dentry *temp;
int namelen;
int dfsplen;
@@ -92,7 +103,7 @@ build_path_from_dentry(struct dentry *direntry)
unsigned seq;
dirsep = CIFS_DIR_SEP(cifs_sb);
- if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
+ if (prefix)
dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
else
dfsplen = 0;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7ab5be7..b261db3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -23,6 +23,8 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/freezer.h>
+#include <linux/sched/signal.h>
+
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
@@ -1990,9 +1992,10 @@ int cifs_revalidate_dentry(struct dentry *dentry)
return cifs_revalidate_mapping(inode);
}
-int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int cifs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct inode *inode = d_inode(dentry);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index c672915..d3fb115 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -640,3 +640,108 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
cifs_add_pending_open_locked(fid, tlink, open);
spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
+
+/* parses DFS refferal V3 structure
+ * caller is responsible for freeing target_nodes
+ * returns:
+ * - on success - 0
+ * - on failure - errno
+ */
+int
+parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
+ unsigned int *num_of_nodes,
+ struct dfs_info3_param **target_nodes,
+ const struct nls_table *nls_codepage, int remap,
+ const char *searchName, bool is_unicode)
+{
+ int i, rc = 0;
+ char *data_end;
+ struct dfs_referral_level_3 *ref;
+
+ *num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals);
+
+ if (*num_of_nodes < 1) {
+ cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n",
+ *num_of_nodes);
+ rc = -EINVAL;
+ goto parse_DFS_referrals_exit;
+ }
+
+ ref = (struct dfs_referral_level_3 *) &(rsp->referrals);
+ if (ref->VersionNumber != cpu_to_le16(3)) {
+ cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
+ le16_to_cpu(ref->VersionNumber));
+ rc = -EINVAL;
+ goto parse_DFS_referrals_exit;
+ }
+
+ /* get the upper boundary of the resp buffer */
+ data_end = (char *)rsp + rsp_size;
+
+ cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n",
+ *num_of_nodes, le32_to_cpu(rsp->DFSFlags));
+
+ *target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param),
+ GFP_KERNEL);
+ if (*target_nodes == NULL) {
+ rc = -ENOMEM;
+ goto parse_DFS_referrals_exit;
+ }
+
+ /* collect necessary data from referrals */
+ for (i = 0; i < *num_of_nodes; i++) {
+ char *temp;
+ int max_len;
+ struct dfs_info3_param *node = (*target_nodes)+i;
+
+ node->flags = le32_to_cpu(rsp->DFSFlags);
+ if (is_unicode) {
+ __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
+ GFP_KERNEL);
+ if (tmp == NULL) {
+ rc = -ENOMEM;
+ goto parse_DFS_referrals_exit;
+ }
+ cifsConvertToUTF16((__le16 *) tmp, searchName,
+ PATH_MAX, nls_codepage, remap);
+ node->path_consumed = cifs_utf16_bytes(tmp,
+ le16_to_cpu(rsp->PathConsumed),
+ nls_codepage);
+ kfree(tmp);
+ } else
+ node->path_consumed = le16_to_cpu(rsp->PathConsumed);
+
+ node->server_type = le16_to_cpu(ref->ServerType);
+ node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags);
+
+ /* copy DfsPath */
+ temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
+ max_len = data_end - temp;
+ node->path_name = cifs_strndup_from_utf16(temp, max_len,
+ is_unicode, nls_codepage);
+ if (!node->path_name) {
+ rc = -ENOMEM;
+ goto parse_DFS_referrals_exit;
+ }
+
+ /* copy link target UNC */
+ temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
+ max_len = data_end - temp;
+ node->node_name = cifs_strndup_from_utf16(temp, max_len,
+ is_unicode, nls_codepage);
+ if (!node->node_name) {
+ rc = -ENOMEM;
+ goto parse_DFS_referrals_exit;
+ }
+
+ ref++;
+ }
+
+parse_DFS_referrals_exit:
+ if (rc) {
+ free_dfs_info_array(*target_nodes, *num_of_nodes);
+ *target_nodes = NULL;
+ *num_of_nodes = 0;
+ }
+ return rc;
+}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index dcbcc92..8b0502c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -498,7 +498,7 @@ setup_ntlmv2_ret:
}
enum securityEnum
-select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
+cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
{
switch (server->negflavor) {
case CIFS_NEGFLAVOR_EXTENDED:
@@ -1391,7 +1391,7 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
{
int type;
- type = select_sectype(ses->server, ses->sectype);
+ type = cifs_select_sectype(ses->server, ses->sectype);
cifs_dbg(FYI, "sess setup type %d\n", type);
if (type == Unspecified) {
cifs_dbg(VFS,
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 67a987e..cc93ba4 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1087,6 +1087,7 @@ struct smb_version_operations smb1_operations = {
.is_read_op = cifs_is_read_op,
.wp_retry_size = cifs_wp_retry_size,
.dir_needs_close = cifs_dir_needs_close,
+ .select_sectype = cifs_select_sectype,
#ifdef CONFIG_CIFS_XATTR
.query_all_EAs = CIFSSMBQAllEAs,
.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b2aff0c..b4b1f03 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -73,7 +73,8 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */
nr_ioctl_req.Reserved = 0;
rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
- fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, true,
+ fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
+ true /* is_fsctl */, false /* use_ipc */,
(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
NULL, NULL /* no return info */);
if (rc == -EOPNOTSUPP) {
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a44b4db..0231108 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -282,6 +282,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
+ false /* use_ipc */,
NULL /* no data input */, 0 /* no data input */,
(char **)&out_buf, &ret_data_len);
if (rc != 0)
@@ -571,6 +572,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
+ false /* use_ipc */,
NULL, 0 /* no input */,
(char **)&res_key, &ret_data_len);
@@ -635,7 +637,8 @@ smb2_clone_range(const unsigned int xid,
/* Request server copy to target from src identified by key */
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
- true /* is_fsctl */, (char *)pcchunk,
+ true /* is_fsctl */, false /* use_ipc */,
+ (char *)pcchunk,
sizeof(struct copychunk_ioctl), (char **)&retbuf,
&ret_data_len);
if (rc == 0) {
@@ -787,7 +790,8 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
- true /* is_fctl */, &setsparse, 1, NULL, NULL);
+ true /* is_fctl */, false /* use_ipc */,
+ &setsparse, 1, NULL, NULL);
if (rc) {
tcon->broken_sparse_sup = true;
cifs_dbg(FYI, "set sparse rc = %d\n", rc);
@@ -857,7 +861,8 @@ smb2_duplicate_extents(const unsigned int xid,
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid,
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
- true /* is_fsctl */, (char *)&dup_ext_buf,
+ true /* is_fsctl */, false /* use_ipc */,
+ (char *)&dup_ext_buf,
sizeof(struct duplicate_extents_to_file),
NULL,
&ret_data_len);
@@ -891,7 +896,8 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
FSCTL_SET_INTEGRITY_INFORMATION,
- true /* is_fsctl */, (char *)&integr_info,
+ true /* is_fsctl */, false /* use_ipc */,
+ (char *)&integr_info,
sizeof(struct fsctl_set_integrity_information_req),
NULL,
&ret_data_len);
@@ -910,7 +916,8 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
FSCTL_SRV_ENUMERATE_SNAPSHOTS,
- true /* is_fsctl */, NULL, 0 /* no input data */,
+ true /* is_fsctl */, false /* use_ipc */,
+ NULL, 0 /* no input data */,
(char **)&retbuf,
&ret_data_len);
cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
@@ -1097,6 +1104,103 @@ smb2_new_lease_key(struct cifs_fid *fid)
generate_random_uuid(fid->lease_key);
}
+static int
+smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
+ const char *search_name,
+ struct dfs_info3_param **target_nodes,
+ unsigned int *num_of_nodes,
+ const struct nls_table *nls_codepage, int remap)
+{
+ int rc;
+ __le16 *utf16_path = NULL;
+ int utf16_path_len = 0;
+ struct cifs_tcon *tcon;
+ struct fsctl_get_dfs_referral_req *dfs_req = NULL;
+ struct get_dfs_referral_rsp *dfs_rsp = NULL;
+ u32 dfs_req_size = 0, dfs_rsp_size = 0;
+
+ cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name);
+
+ /*
+ * Use any tcon from the current session. Here, the first one.
+ */
+ spin_lock(&cifs_tcp_ses_lock);
+ tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon,
+ tcon_list);
+ if (tcon)
+ tcon->tc_count++;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ if (!tcon) {
+ cifs_dbg(VFS, "session %p has no tcon available for a dfs referral request\n",
+ ses);
+ rc = -ENOTCONN;
+ goto out;
+ }
+
+ utf16_path = cifs_strndup_to_utf16(search_name, PATH_MAX,
+ &utf16_path_len,
+ nls_codepage, remap);
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ dfs_req_size = sizeof(*dfs_req) + utf16_path_len;
+ dfs_req = kzalloc(dfs_req_size, GFP_KERNEL);
+ if (!dfs_req) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Highest DFS referral version understood */
+ dfs_req->MaxReferralLevel = DFS_VERSION;
+
+ /* Path to resolve in an UTF-16 null-terminated string */
+ memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len);
+
+ do {
+ /* try first with IPC */
+ rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+ FSCTL_DFS_GET_REFERRALS,
+ true /* is_fsctl */, true /* use_ipc */,
+ (char *)dfs_req, dfs_req_size,
+ (char **)&dfs_rsp, &dfs_rsp_size);
+ if (rc == -ENOTCONN) {
+ /* try with normal tcon */
+ rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+ FSCTL_DFS_GET_REFERRALS,
+ true /* is_fsctl */, false /*use_ipc*/,
+ (char *)dfs_req, dfs_req_size,
+ (char **)&dfs_rsp, &dfs_rsp_size);
+ }
+ } while (rc == -EAGAIN);
+
+ if (rc) {
+ cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
+ goto out;
+ }
+
+ rc = parse_dfs_referrals(dfs_rsp, dfs_rsp_size,
+ num_of_nodes, target_nodes,
+ nls_codepage, remap, search_name,
+ true /* is_unicode */);
+ if (rc) {
+ cifs_dbg(VFS, "parse error in smb2_get_dfs_refer rc=%d\n", rc);
+ goto out;
+ }
+
+ out:
+ if (tcon) {
+ spin_lock(&cifs_tcp_ses_lock);
+ tcon->tc_count--;
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+ kfree(utf16_path);
+ kfree(dfs_req);
+ kfree(dfs_rsp);
+ return rc;
+}
#define SMB2_SYMLINK_STRUCT_SIZE \
(sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
@@ -1220,7 +1324,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
- true /* is_fctl */, (char *)&fsctl_buf,
+ true /* is_fctl */, false /* use_ipc */,
+ (char *)&fsctl_buf,
sizeof(struct file_zero_data_information), NULL, NULL);
free_xid(xid);
return rc;
@@ -1254,7 +1359,8 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
- true /* is_fctl */, (char *)&fsctl_buf,
+ true /* is_fctl */, false /* use_ipc */,
+ (char *)&fsctl_buf,
sizeof(struct file_zero_data_information), NULL, NULL);
free_xid(xid);
return rc;
@@ -1609,6 +1715,26 @@ static void cifs_crypt_complete(struct crypto_async_request *req, int err)
complete(&res->completion);
}
+static int
+smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
+{
+ struct cifs_ses *ses;
+ u8 *ses_enc_key;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (ses->Suid != ses_id)
+ continue;
+ ses_enc_key = enc ? ses->smb3encryptionkey :
+ ses->smb3decryptionkey;
+ memcpy(key, ses_enc_key, SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ return 1;
+}
/*
* Encrypt or decrypt @rqst message. @rqst has the following format:
* iov[0] - transform header (associate data),
@@ -1622,10 +1748,10 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
struct smb2_transform_hdr *tr_hdr =
(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
- struct cifs_ses *ses;
int rc = 0;
struct scatterlist *sg;
u8 sign[SMB2_SIGNATURE_SIZE] = {};
+ u8 key[SMB3_SIGN_KEY_SIZE];
struct aead_request *req;
char *iv;
unsigned int iv_len;
@@ -1635,9 +1761,10 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
init_completion(&result.completion);
- ses = smb2_find_smb_ses(server, tr_hdr->SessionId);
- if (!ses) {
- cifs_dbg(VFS, "%s: Could not find session\n", __func__);
+ rc = smb2_get_enc_key(server, tr_hdr->SessionId, enc, key);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not get %scryption key\n", __func__,
+ enc ? "en" : "de");
return 0;
}
@@ -1649,8 +1776,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
tfm = enc ? server->secmech.ccmaesencrypt :
server->secmech.ccmaesdecrypt;
- rc = crypto_aead_setkey(tfm, enc ? ses->smb3encryptionkey :
- ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+ rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
return rc;
@@ -2254,6 +2380,8 @@ struct smb_version_operations smb20_operations = {
.clone_range = smb2_clone_range,
.wp_retry_size = smb2_wp_retry_size,
.dir_needs_close = smb2_dir_needs_close,
+ .get_dfs_refer = smb2_get_dfs_refer,
+ .select_sectype = smb2_select_sectype,
};
struct smb_version_operations smb21_operations = {
@@ -2335,6 +2463,8 @@ struct smb_version_operations smb21_operations = {
.wp_retry_size = smb2_wp_retry_size,
.dir_needs_close = smb2_dir_needs_close,
.enum_snapshots = smb3_enum_snapshots,
+ .get_dfs_refer = smb2_get_dfs_refer,
+ .select_sectype = smb2_select_sectype,
};
struct smb_version_operations smb30_operations = {
@@ -2426,6 +2556,8 @@ struct smb_version_operations smb30_operations = {
.free_transform_rq = smb3_free_transform_rq,
.is_transform_hdr = smb3_is_transform_hdr,
.receive_transform = smb3_receive_transform,
+ .get_dfs_refer = smb2_get_dfs_refer,
+ .select_sectype = smb2_select_sectype,
};
#ifdef CONFIG_CIFS_SMB311
@@ -2518,6 +2650,8 @@ struct smb_version_operations smb311_operations = {
.free_transform_rq = smb3_free_transform_rq,
.is_transform_hdr = smb3_is_transform_hdr,
.receive_transform = smb3_receive_transform,
+ .get_dfs_refer = smb2_get_dfs_refer,
+ .select_sectype = smb2_select_sectype,
};
#endif /* CIFS_SMB311 */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index ad83b3d..7446496 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -620,6 +620,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
+ false /* use_ipc */,
(char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
(char **)&pneg_rsp, &rsplen);
@@ -656,6 +657,28 @@ vneg_out:
return -EIO;
}
+enum securityEnum
+smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
+{
+ switch (requested) {
+ case Kerberos:
+ case RawNTLMSSP:
+ return requested;
+ case NTLMv2:
+ return RawNTLMSSP;
+ case Unspecified:
+ if (server->sec_ntlmssp &&
+ (global_secflags & CIFSSEC_MAY_NTLMSSP))
+ return RawNTLMSSP;
+ if ((server->sec_kerberos || server->sec_mskerberos) &&
+ (global_secflags & CIFSSEC_MAY_KRB5))
+ return Kerberos;
+ /* Fallthrough */
+ default:
+ return Unspecified;
+ }
+}
+
struct SMB2_sess_data {
unsigned int xid;
struct cifs_ses *ses;
@@ -1008,10 +1031,17 @@ out:
static int
SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
{
- if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
- ses->sectype = RawNTLMSSP;
+ int type;
+
+ type = smb2_select_sectype(ses->server, ses->sectype);
+ cifs_dbg(FYI, "sess setup type %d\n", type);
+ if (type == Unspecified) {
+ cifs_dbg(VFS,
+ "Unable to select appropriate authentication method!");
+ return -EINVAL;
+ }
- switch (ses->sectype) {
+ switch (type) {
case Kerberos:
sess_data->func = SMB2_auth_kerberos;
break;
@@ -1019,7 +1049,7 @@ SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
sess_data->func = SMB2_sess_auth_rawntlmssp_negotiate;
break;
default:
- cifs_dbg(VFS, "secType %d not supported!\n", ses->sectype);
+ cifs_dbg(VFS, "secType %d not supported!\n", type);
return -EOPNOTSUPP;
}
@@ -1167,8 +1197,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
/* since no tcon, smb2_init can not do this, so do here */
req->hdr.sync_hdr.SessionId = ses->Suid;
- /* if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
- req->hdr.Flags |= SMB2_FLAGS_SIGNED; */
+ if (ses->server->sign)
+ req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
} else if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -1527,6 +1557,51 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
return 0;
}
+static int
+alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
+ const char *treename, const __le16 *path)
+{
+ int treename_len, path_len;
+ struct nls_table *cp;
+ const __le16 sep[] = {cpu_to_le16('\\'), cpu_to_le16(0x0000)};
+
+ /*
+ * skip leading "\\"
+ */
+ treename_len = strlen(treename);
+ if (treename_len < 2 || !(treename[0] == '\\' && treename[1] == '\\'))
+ return -EINVAL;
+
+ treename += 2;
+ treename_len -= 2;
+
+ path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
+
+ /*
+ * make room for one path separator between the treename and
+ * path
+ */
+ *out_len = treename_len + 1 + path_len;
+
+ /*
+ * final path needs to be null-terminated UTF16 with a
+ * size aligned to 8
+ */
+
+ *out_size = roundup((*out_len+1)*2, 8);
+ *out_path = kzalloc(*out_size, GFP_KERNEL);
+ if (!*out_path)
+ return -ENOMEM;
+
+ cp = load_nls_default();
+ cifs_strtoUTF16(*out_path, treename, treename_len, cp);
+ UniStrcat(*out_path, sep);
+ UniStrcat(*out_path, path);
+ unload_nls(cp);
+
+ return 0;
+}
+
int
SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
__u8 *oplock, struct smb2_file_all_info *buf,
@@ -1575,30 +1650,49 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
req->ShareAccess = FILE_SHARE_ALL_LE;
req->CreateDisposition = cpu_to_le32(oparms->disposition);
req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
- uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
- /* do not count rfc1001 len field */
- req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4);
iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
-
- /* MUST set path len (NameLength) to 0 opening root of share */
- req->NameLength = cpu_to_le16(uni_path_len - 2);
/* -1 since last byte is buf[0] which is sent below (path) */
iov[0].iov_len--;
- if (uni_path_len % 8 != 0) {
- copy_size = uni_path_len / 8 * 8;
- if (copy_size < uni_path_len)
- copy_size += 8;
-
- copy_path = kzalloc(copy_size, GFP_KERNEL);
- if (!copy_path)
- return -ENOMEM;
- memcpy((char *)copy_path, (const char *)path,
- uni_path_len);
+
+ req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4);
+
+ /* [MS-SMB2] 2.2.13 NameOffset:
+ * If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of
+ * the SMB2 header, the file name includes a prefix that will
+ * be processed during DFS name normalization as specified in
+ * section 3.3.5.9. Otherwise, the file name is relative to
+ * the share that is identified by the TreeId in the SMB2
+ * header.
+ */
+ if (tcon->share_flags & SHI1005_FLAGS_DFS) {
+ int name_len;
+
+ req->hdr.sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
+ rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
+ &name_len,
+ tcon->treeName, path);
+ if (rc)
+ return rc;
+ req->NameLength = cpu_to_le16(name_len * 2);
uni_path_len = copy_size;
path = copy_path;
+ } else {
+ uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
+ /* MUST set path len (NameLength) to 0 opening root of share */
+ req->NameLength = cpu_to_le16(uni_path_len - 2);
+ if (uni_path_len % 8 != 0) {
+ copy_size = roundup(uni_path_len, 8);
+ copy_path = kzalloc(copy_size, GFP_KERNEL);
+ if (!copy_path)
+ return -ENOMEM;
+ memcpy((char *)copy_path, (const char *)path,
+ uni_path_len);
+ uni_path_len = copy_size;
+ path = copy_path;
+ }
}
iov[1].iov_len = uni_path_len;
@@ -1683,8 +1777,9 @@ creat_exit:
*/
int
SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
- u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data,
- u32 indatalen, char **out_data, u32 *plen /* returned data len */)
+ u64 volatile_fid, u32 opcode, bool is_fsctl, bool use_ipc,
+ char *in_data, u32 indatalen,
+ char **out_data, u32 *plen /* returned data len */)
{
struct smb2_ioctl_req *req;
struct smb2_ioctl_rsp *rsp;
@@ -1721,6 +1816,16 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
return rc;
+ if (use_ipc) {
+ if (ses->ipc_tid == 0) {
+ cifs_small_buf_release(req);
+ return -ENOTCONN;
+ }
+
+ cifs_dbg(FYI, "replacing tid 0x%x with IPC tid 0x%x\n",
+ req->hdr.sync_hdr.TreeId, ses->ipc_tid);
+ req->hdr.sync_hdr.TreeId = ses->ipc_tid;
+ }
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -1843,6 +1948,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
+ false /* use_ipc */,
(char *)&fsctl_input /* data input */,
2 /* in data len */, &ret_data /* out data */, NULL);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c03b252..18700fd 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -695,6 +695,14 @@ struct fsctl_get_integrity_information_rsp {
/* Integrity flags for above */
#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001
+/* See MS-DFSC 2.2.2 */
+struct fsctl_get_dfs_referral_req {
+ __le16 MaxReferralLevel;
+ __u8 RequestFileName[];
+} __packed;
+
+/* DFS response is struct get_dfs_refer_rsp */
+
/* See MS-SMB2 2.2.31.3 */
struct network_resiliency_req {
__le32 Timeout;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 85fc7a7..69e3587 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -121,7 +121,8 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
struct smb2_err_rsp **err_buf);
extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, char *in_data, u32 indatalen,
+ bool is_fsctl, bool use_ipc,
+ char *in_data, u32 indatalen,
char **out_data, u32 *plen /* returned data len */);
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
@@ -180,4 +181,6 @@ extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 *lease_key, const __le32 lease_state);
extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
+extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
+ enum securityEnum);
#endif /* _SMB2PROTO_H */
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 5104d84..d3c3618 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -47,7 +47,7 @@ int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
int coda_permission(struct inode *inode, int mask);
int coda_revalidate_inode(struct inode *);
-int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+int coda_getattr(const struct path *, struct kstat *, u32, unsigned int);
int coda_setattr(struct dentry *, struct iattr *);
/* this file: heloers */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 6e0154e..9d956cd 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -96,7 +96,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
cfi->cfi_mapcount++;
spin_unlock(&cii->c_lock);
- return host_file->f_op->mmap(host_file, vma);
+ return call_mmap(host_file, vma);
}
int coda_open(struct inode *coda_inode, struct file *coda_file)
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 71dbe7e..2dea594 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -255,11 +255,12 @@ static void coda_evict_inode(struct inode *inode)
coda_cache_clear_inode(inode);
}
-int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int coda_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- int err = coda_revalidate_inode(d_inode(dentry));
+ int err = coda_revalidate_inode(d_inode(path->dentry));
if (!err)
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(d_inode(path->dentry), stat);
return err;
}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 8226291..f40e395 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -22,7 +22,7 @@
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/ioport.h>
#include <linux/fcntl.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f6c6c8a..e82357c 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -15,7 +15,7 @@
*/
#include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
diff --git a/fs/compat.c b/fs/compat.c
index e50a211..c61b506 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -21,6 +21,7 @@
#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/time.h>
+#include <linux/cred.h>
#include <linux/fs.h>
#include <linux/fcntl.h>
#include <linux/namei.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index ae6b056..5926837 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -16,6 +16,9 @@
#include <linux/personality.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
#include <linux/module.h>
@@ -33,7 +36,6 @@
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
-#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/path.h>
#include <linux/timekeeping.h>
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 02eb6b9..d5d896f 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -103,7 +103,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
goto out;
}
down_read(&keyring_key->sem);
- ukp = user_key_payload(keyring_key);
+ ukp = user_key_payload_locked(keyring_key);
if (ukp->datalen != sizeof(struct fscrypt_key)) {
res = -EINVAL;
up_read(&keyring_key->sem);
diff --git a/fs/dax.c b/fs/dax.c
index 7436c98..de622d4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -27,6 +27,7 @@
#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 1ce908c..23488f5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -17,6 +17,7 @@
#include <linux/dlm.h>
#include <linux/dlm_device.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
#include "dlm_internal.h"
#include "lockspace.h"
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 599a292..95c1c8d 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -117,7 +117,7 @@ ecryptfs_get_key_payload_data(struct key *key)
auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
if (!auth_tok)
- return (struct ecryptfs_auth_tok *)user_key_payload(key)->data;
+ return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data;
else
return auth_tok;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e7413f8..efc2db4 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -959,9 +959,10 @@ out:
return rc;
}
-static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
int rc = 0;
@@ -983,13 +984,15 @@ static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
return rc;
}
-static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int ecryptfs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
struct kstat lower_stat;
int rc;
- rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
+ rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat,
+ request_mask, flags);
if (!rc) {
fsstack_copy_attr_all(d_inode(dentry),
ecryptfs_inode_to_lower(d_inode(dentry)));
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 158a3a3..039e627 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -22,6 +22,8 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+
#include "ecryptfs_kernel.h"
/**
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 1231cd1..68b9fff 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -9,7 +9,7 @@
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/list.h>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5ec1631..3412514 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -13,7 +13,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/signal.h>
diff --git a/fs/exec.c b/fs/exec.c
index 698a860..65145a3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -32,6 +32,11 @@
#include <linux/swap.h>
#include <linux/string.h>
#include <linux/init.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
#include <linux/pagemap.h>
#include <linux/perf_event.h>
#include <linux/highmem.h>
@@ -1088,7 +1093,7 @@ static int de_thread(struct task_struct *tsk)
struct task_struct *leader = tsk->group_leader;
for (;;) {
- threadgroup_change_begin(tsk);
+ cgroup_threadgroup_change_begin(tsk);
write_lock_irq(&tasklist_lock);
/*
* Do this under tasklist_lock to ensure that
@@ -1099,7 +1104,7 @@ static int de_thread(struct task_struct *tsk)
break;
__set_current_state(TASK_KILLABLE);
write_unlock_irq(&tasklist_lock);
- threadgroup_change_end(tsk);
+ cgroup_threadgroup_change_end(tsk);
schedule();
if (unlikely(__fatal_signal_pending(tsk)))
goto killed;
@@ -1157,7 +1162,7 @@ static int de_thread(struct task_struct *tsk)
if (unlikely(leader->ptrace))
__wake_up_parent(leader, leader->parent);
write_unlock_irq(&tasklist_lock);
- threadgroup_change_end(tsk);
+ cgroup_threadgroup_change_end(tsk);
release_task(leader);
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a4b531b..329a5d1 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -15,6 +15,7 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#define dprintk(fmt, args...) do{}while(0)
@@ -299,7 +300,8 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
* filesystem supports 64-bit inode numbers. So we need to
* actually call ->getattr, not just read i_ino:
*/
- error = vfs_getattr_nosec(&child_path, &stat);
+ error = vfs_getattr_nosec(&child_path, &stat,
+ STATX_INO, AT_STATX_SYNC_AS_STAT);
if (error)
return error;
buffer.ino = stat.ino;
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 4c40c07..d0bdb74 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -15,6 +15,7 @@
#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/buffer_head.h>
#include <linux/capability.h>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2fd17e8..f493af6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -28,6 +28,7 @@
#include <linux/timer.h>
#include <linux/version.h>
#include <linux/wait.h>
+#include <linux/sched/signal.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
#include <linux/ratelimit.h>
@@ -2462,8 +2463,7 @@ extern struct inode *ext4_iget(struct super_block *, unsigned long);
extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
-extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
+extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern void ext4_evict_inode(struct inode *);
extern void ext4_clear_inode(struct inode *);
extern int ext4_sync_inode(handle_t *, struct inode *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b14bae2..17bc043 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -21,6 +21,8 @@
#include <linux/random.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
+#include <linux/cred.h>
+
#include <asm/byteorder.h>
#include "ext4.h"
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 971f663..7385e6a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5387,13 +5387,13 @@ err_out:
return error;
}
-int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int ext4_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
struct inode *inode;
unsigned long long delalloc_blocks;
- inode = d_inode(dentry);
+ inode = d_inode(path->dentry);
generic_fillattr(inode, stat);
/*
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f73ee95..0339daf 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
+ f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
+ 0, page->index, META, WRITE);
unlock_page(page);
@@ -493,6 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
+ f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
#endif
@@ -681,8 +683,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
return -EINVAL;
}
- crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
- + crc_offset)));
+ crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
@@ -891,7 +892,7 @@ retry:
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return 0;
}
- fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
+ fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[type]);
if (inode) {
@@ -924,7 +925,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 0;
}
- fi = list_entry(head->next, struct f2fs_inode_info,
+ fi = list_first_entry(head, struct f2fs_inode_info,
gdirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
@@ -998,8 +999,6 @@ out:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
-
- build_free_nids(sbi, false);
f2fs_unlock_all(sbi);
}
@@ -1025,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_lock(&sbi->cp_lock);
+ if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
+ sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+ disable_nat_bits(sbi, false);
+
if (cpc->reason == CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
@@ -1137,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
+ /* write nat bits */
+ if (enabled_nat_bits(sbi, cpc)) {
+ __u64 cp_ver = cur_cp_version(ckpt);
+ unsigned int i;
+ block_t blk;
+
+ cp_ver |= ((__u64)crc32 << 32);
+ *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
+
+ blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+ for (i = 0; i < nm_i->nat_bits_blocks; i++)
+ update_meta_page(sbi, nm_i->nat_bits +
+ (i << F2FS_BLKSIZE_BITS), blk + i);
+
+ /* Flush all the NAT BITS pages */
+ while (get_pages(sbi, F2FS_DIRTY_META)) {
+ sync_meta_pages(sbi, META, LONG_MAX);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+ }
+ }
+
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1248,15 +1273,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_merged_bios(sbi);
/* this is the case of multiple fstrims without any changes */
- if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
- f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
- f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
- f2fs_bug_on(sbi, prefree_segments(sbi));
- flush_sit_entries(sbi, cpc);
- clear_prefree_segments(sbi, cpc);
- f2fs_wait_all_discard_bio(sbi);
- unblock_operations(sbi);
- goto out;
+ if (cpc->reason == CP_DISCARD) {
+ if (!exist_trim_candidates(sbi, cpc)) {
+ unblock_operations(sbi);
+ goto out;
+ }
+
+ if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+ SIT_I(sbi)->dirty_sentries == 0 &&
+ prefree_segments(sbi) == 0) {
+ flush_sit_entries(sbi, cpc);
+ clear_prefree_segments(sbi, cpc);
+ unblock_operations(sbi);
+ goto out;
+ }
}
/*
@@ -1268,17 +1298,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
- flush_nat_entries(sbi);
+ flush_nat_entries(sbi, cpc);
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
- if (err) {
+ if (err)
release_discard_addrs(sbi);
- } else {
+ else
clear_prefree_segments(sbi, cpc);
- f2fs_wait_all_discard_bio(sbi);
- }
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ac2625..1602b4b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/memcontrol.h>
#include <linux/cleancache.h>
+#include <linux/sched/signal.h>
#include "f2fs.h"
#include "node.h"
@@ -55,8 +56,10 @@ static void f2fs_read_end_io(struct bio *bio)
int i;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO))
+ if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
+ f2fs_show_injection_info(FAULT_IO);
bio->bi_error = -EIO;
+ }
#endif
if (f2fs_bio_encrypted(bio)) {
@@ -93,6 +96,17 @@ static void f2fs_write_end_io(struct bio *bio)
struct page *page = bvec->bv_page;
enum count_type type = WB_DATA_TYPE(page);
+ if (IS_DUMMY_WRITTEN_PAGE(page)) {
+ set_page_private(page, (unsigned long)NULL);
+ ClearPagePrivate(page);
+ unlock_page(page);
+ mempool_free(page, sbi->write_io_dummy);
+
+ if (unlikely(bio->bi_error))
+ f2fs_stop_checkpoint(sbi, true);
+ continue;
+ }
+
fscrypt_pullback_bio_page(&page, true);
if (unlikely(bio->bi_error)) {
@@ -171,10 +185,46 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
struct bio *bio, enum page_type type)
{
if (!is_read_io(bio_op(bio))) {
+ unsigned int start;
+
if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
current->plug && (type == DATA || type == NODE))
blk_finish_plug(current->plug);
+
+ if (type != DATA && type != NODE)
+ goto submit_io;
+
+ start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
+ start %= F2FS_IO_SIZE(sbi);
+
+ if (start == 0)
+ goto submit_io;
+
+ /* fill dummy pages */
+ for (; start < F2FS_IO_SIZE(sbi); start++) {
+ struct page *page =
+ mempool_alloc(sbi->write_io_dummy,
+ GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+ f2fs_bug_on(sbi, !page);
+
+ SetPagePrivate(page);
+ set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+ lock_page(page);
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+ f2fs_bug_on(sbi, 1);
+ }
+ /*
+ * In the NODE case, we lose next block address chain. So, we
+ * need to do checkpoint in f2fs_sync_file.
+ */
+ if (type == NODE)
+ set_sbi_flag(sbi, SBI_NEED_CP);
}
+submit_io:
+ if (is_read_io(bio_op(bio)))
+ trace_f2fs_submit_read_bio(sbi->sb, type, bio);
+ else
+ trace_f2fs_submit_write_bio(sbi->sb, type, bio);
submit_bio(bio);
}
@@ -185,19 +235,19 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
if (!io->bio)
return;
+ bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+
if (is_read_io(fio->op))
- trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
+ trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
else
- trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
-
- bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+ trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
__submit_bio(io->sbi, io->bio, fio->type);
io->bio = NULL;
}
-static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
- struct page *page, nid_t ino)
+static bool __has_merged_page(struct f2fs_bio_info *io,
+ struct inode *inode, nid_t ino, pgoff_t idx)
{
struct bio_vec *bvec;
struct page *target;
@@ -206,7 +256,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
if (!io->bio)
return false;
- if (!inode && !page && !ino)
+ if (!inode && !ino)
return true;
bio_for_each_segment_all(bvec, io->bio, i) {
@@ -216,10 +266,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
else
target = fscrypt_control_page(bvec->bv_page);
+ if (idx != target->index)
+ continue;
+
if (inode && inode == target->mapping->host)
return true;
- if (page && page == target)
- return true;
if (ino && ino == ino_of_node(target))
return true;
}
@@ -228,22 +279,21 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
}
static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
- struct page *page, nid_t ino,
- enum page_type type)
+ nid_t ino, pgoff_t idx, enum page_type type)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = &sbi->write_io[btype];
bool ret;
down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, page, ino);
+ ret = __has_merged_page(io, inode, ino, idx);
up_read(&io->io_rwsem);
return ret;
}
static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
- nid_t ino, enum page_type type, int rw)
+ struct inode *inode, nid_t ino, pgoff_t idx,
+ enum page_type type, int rw)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io;
@@ -252,16 +302,16 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
down_write(&io->io_rwsem);
- if (!__has_merged_page(io, inode, page, ino))
+ if (!__has_merged_page(io, inode, ino, idx))
goto out;
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
io->fio.op = REQ_OP_WRITE;
- io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+ io->fio.op_flags = REQ_META | REQ_PRIO;
if (!test_opt(sbi, NOBARRIER))
- io->fio.op_flags |= REQ_FUA;
+ io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
}
__submit_merged_bio(io);
out:
@@ -271,15 +321,15 @@ out:
void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
int rw)
{
- __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
+ __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
}
void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
- nid_t ino, enum page_type type, int rw)
+ struct inode *inode, nid_t ino, pgoff_t idx,
+ enum page_type type, int rw)
{
- if (has_merged_page(sbi, inode, page, ino, type))
- __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
+ if (has_merged_page(sbi, inode, ino, idx, type))
+ __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
}
void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
@@ -315,13 +365,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
return 0;
}
-void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
bool is_read = is_read_io(fio->op);
struct page *bio_page;
+ int err = 0;
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
@@ -331,6 +382,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+ /* set submitted = 1 as a return value */
+ fio->submitted = 1;
+
if (!is_read)
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
@@ -342,6 +396,13 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
+ if ((fio->type == DATA || fio->type == NODE) &&
+ fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
+ err = -EAGAIN;
+ if (!is_read)
+ dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+ goto out_fail;
+ }
io->bio = __bio_alloc(sbi, fio->new_blkaddr,
BIO_MAX_PAGES, is_read);
io->fio = *fio;
@@ -355,9 +416,10 @@ alloc_new:
io->last_block_in_bio = fio->new_blkaddr;
f2fs_trace_ios(fio, 0);
-
+out_fail:
up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(fio->page, fio);
+ return err;
}
static void __set_data_blkaddr(struct dnode_of_data *dn)
@@ -453,7 +515,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
{
- struct extent_info ei;
+ struct extent_info ei = {0,0,0};
struct inode *inode = dn->inode;
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -470,7 +532,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
- struct extent_info ei;
+ struct extent_info ei = {0,0,0};
int err;
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
@@ -694,6 +756,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
struct f2fs_map_blocks map;
int err = 0;
+ if (is_inode_flag_set(inode, FI_NO_PREALLOC))
+ return 0;
+
map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
if (map.m_len > map.m_lblk)
@@ -742,7 +807,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
blkcnt_t prealloc;
- struct extent_info ei;
+ struct extent_info ei = {0,0,0};
block_t blkaddr;
if (!maxblocks)
@@ -806,7 +871,7 @@ next_block:
}
if (err)
goto sync_out;
- map->m_flags = F2FS_MAP_NEW;
+ map->m_flags |= F2FS_MAP_NEW;
blkaddr = dn.data_blkaddr;
} else {
if (flag == F2FS_GET_BLOCK_BMAP) {
@@ -906,7 +971,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
if (!err) {
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
- bh->b_size = map.m_len << inode->i_blkbits;
+ bh->b_size = (u64)map.m_len << inode->i_blkbits;
}
return err;
}
@@ -1088,7 +1153,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
prefetchw(&page->flags);
if (pages) {
- page = list_entry(pages->prev, struct page, lru);
+ page = list_last_entry(pages, struct page, lru);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
page->index,
@@ -1207,7 +1272,7 @@ static int f2fs_read_data_pages(struct file *file,
struct list_head *pages, unsigned nr_pages)
{
struct inode *inode = file->f_mapping->host;
- struct page *page = list_entry(pages->prev, struct page, lru);
+ struct page *page = list_last_entry(pages, struct page, lru);
trace_f2fs_readpages(inode, page, nr_pages);
@@ -1288,8 +1353,8 @@ out_writepage:
return err;
}
-static int f2fs_write_data_page(struct page *page,
- struct writeback_control *wbc)
+static int __write_data_page(struct page *page, bool *submitted,
+ struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1307,6 +1372,7 @@ static int f2fs_write_data_page(struct page *page,
.op_flags = wbc_to_write_flags(wbc),
.page = page,
.encrypted_page = NULL,
+ .submitted = false,
};
trace_f2fs_writepage(page, DATA);
@@ -1352,9 +1418,12 @@ write:
goto redirty_out;
err = -EAGAIN;
- f2fs_lock_op(sbi);
- if (f2fs_has_inline_data(inode))
+ if (f2fs_has_inline_data(inode)) {
err = f2fs_write_inline_data(inode, page);
+ if (!err)
+ goto out;
+ }
+ f2fs_lock_op(sbi);
if (err == -EAGAIN)
err = do_write_data_page(&fio);
if (F2FS_I(inode)->last_disk_size < psize)
@@ -1370,15 +1439,22 @@ out:
ClearPageUptodate(page);
if (wbc->for_reclaim) {
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
+ f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
+ DATA, WRITE);
remove_dirty_inode(inode);
+ submitted = NULL;
}
unlock_page(page);
f2fs_balance_fs(sbi, need_balance_fs);
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ submitted = NULL;
+ }
+
+ if (submitted)
+ *submitted = fio.submitted;
return 0;
@@ -1390,6 +1466,12 @@ redirty_out:
return err;
}
+static int f2fs_write_data_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ return __write_data_page(page, NULL, wbc);
+}
+
/*
* This function was copied from write_cche_pages from mm/page-writeback.c.
* The major change is making write step of cold data page separately from
@@ -1406,10 +1488,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
+ pgoff_t last_idx = ULONG_MAX;
int cycled;
int range_whole = 0;
int tag;
- int nwritten = 0;
pagevec_init(&pvec, 0);
@@ -1446,6 +1528,7 @@ retry:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ bool submitted = false;
if (page->index > end) {
done = 1;
@@ -1479,7 +1562,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = mapping->a_ops->writepage(page, wbc);
+ ret = __write_data_page(page, &submitted, wbc);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
@@ -1493,8 +1576,8 @@ continue_unlock:
done_index = page->index + 1;
done = 1;
break;
- } else {
- nwritten++;
+ } else if (submitted) {
+ last_idx = page->index;
}
if (--wbc->nr_to_write <= 0 &&
@@ -1516,9 +1599,9 @@ continue_unlock:
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
- if (nwritten)
+ if (last_idx != ULONG_MAX)
f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
- NULL, 0, DATA, WRITE);
+ 0, last_idx, DATA, WRITE);
return ret;
}
@@ -1591,14 +1674,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
struct dnode_of_data dn;
struct page *ipage;
bool locked = false;
- struct extent_info ei;
+ struct extent_info ei = {0,0,0};
int err = 0;
/*
* we already allocated all the blocks, so we don't need to get
* the block addresses when there is no need to fill the page.
*/
- if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE)
+ if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
+ !is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
if (f2fs_has_inline_data(inode) ||
@@ -1682,7 +1766,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
goto fail;
}
repeat:
- page = grab_cache_page_write_begin(mapping, index, flags);
+ /*
+ * Do not use grab_cache_page_write_begin() to avoid deadlock due to
+ * wait_for_stable_page. Will wait that below with our IO control.
+ */
+ page = pagecache_get_page(mapping, index,
+ FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
if (!page) {
err = -ENOMEM;
goto fail;
@@ -1715,6 +1804,11 @@ repeat:
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
+ if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
+ zero_user_segment(page, len, PAGE_SIZE);
+ return 0;
+ }
+
if (blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
@@ -1768,7 +1862,7 @@ static int f2fs_write_end(struct file *file,
* let generic_perform_write() try to copy data again through copied=0.
*/
if (!PageUptodate(page)) {
- if (unlikely(copied != PAGE_SIZE))
+ if (unlikely(copied != len))
copied = 0;
else
SetPageUptodate(page);
@@ -1917,7 +2011,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
- if (f2fs_is_atomic_file(inode)) {
+ if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
register_inmem_page(inode, page);
return 1;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fbd5184..a77df37 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,8 +50,16 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
+ si->aw_cnt = atomic_read(&sbi->aw_cnt);
+ si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+ if (SM_I(sbi) && SM_I(sbi)->fcc_info)
+ si->nr_flush =
+ atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
+ if (SM_I(sbi) && SM_I(sbi)->dcc_info)
+ si->nr_discard =
+ atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@@ -62,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
+ si->append = sbi->im[APPEND_INO].ino_num;
+ si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
si->utilization = utilization(sbi);
@@ -183,6 +193,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+ si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
+ si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+ si->base_mem += NM_I(sbi)->nat_blocks / 8;
get_cache:
si->cache_mem = 0;
@@ -192,8 +205,10 @@ get_cache:
si->cache_mem += sizeof(struct f2fs_gc_kthread);
/* build merge flush thread */
- if (SM_I(sbi)->cmd_control_info)
+ if (SM_I(sbi)->fcc_info)
si->cache_mem += sizeof(struct flush_cmd_control);
+ if (SM_I(sbi)->dcc_info)
+ si->cache_mem += sizeof(struct discard_cmd_control);
/* free nids */
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
@@ -254,8 +269,8 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
- seq_printf(s, " - Orphan Inode: %u\n",
- si->orphans);
+ seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
+ si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -314,8 +329,11 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
- si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
+ seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
+ si->nr_wb_cp_data, si->nr_wb_data,
+ si->nr_flush, si->nr_discard);
+ seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
+ si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -414,6 +432,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
+ atomic_set(&sbi->aw_cnt, 0);
+ atomic_set(&sbi->max_aw_cnt, 0);
+
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 18607fc..4650c9b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_put_page(dentry_page, 0);
}
- if (!de && room && F2FS_I(dir)->chash != namehash) {
- F2FS_I(dir)->chash = namehash;
- F2FS_I(dir)->clevel = level;
+ /* This is to increase the speed of f2fs_create */
+ if (!de && room) {
+ F2FS_I(dir)->task = current;
+ if (F2FS_I(dir)->chash != namehash) {
+ F2FS_I(dir)->chash = namehash;
+ F2FS_I(dir)->clevel = level;
+ }
}
return de;
@@ -548,8 +552,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
start:
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
+ if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
+ f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
+ }
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
@@ -646,14 +652,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
+ struct page *page = NULL;
+ struct f2fs_dir_entry *de = NULL;
int err;
err = fscrypt_setup_filename(dir, name, 0, &fname);
if (err)
return err;
- err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
-
+ /*
+ * An immature stakable filesystem shows a race condition between lookup
+ * and create. If we have same task when doing lookup and create, it's
+ * definitely fine as expected by VFS normally. Otherwise, let's just
+ * verify on-disk dentry one more time, which guarantees filesystem
+ * consistency more.
+ */
+ if (current != F2FS_I(dir)->task) {
+ de = __f2fs_find_entry(dir, &fname, &page);
+ F2FS_I(dir)->task = NULL;
+ }
+ if (de) {
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+ err = -EEXIST;
+ } else if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ } else {
+ err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
+ }
fscrypt_free_filename(&fname);
return err;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 4db44da..c6934f0 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -77,7 +77,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
struct extent_tree *et;
nid_t ino = inode->i_ino;
- down_write(&sbi->extent_tree_lock);
+ mutex_lock(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
@@ -94,7 +94,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
atomic_dec(&sbi->total_zombie_tree);
list_del_init(&et->list);
}
- up_write(&sbi->extent_tree_lock);
+ mutex_unlock(&sbi->extent_tree_lock);
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
@@ -311,28 +311,24 @@ static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
- *next_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
- *prev_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
- *prev_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
- *next_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
return en;
}
@@ -352,11 +348,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
- if (en)
- __release_extent_node(sbi, et, prev_ex);
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
+ if (en)
+ __release_extent_node(sbi, et, prev_ex);
+
en = next_ex;
}
@@ -416,7 +413,7 @@ do_insert:
return en;
}
-static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
+static void f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -429,7 +426,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int pos = (unsigned int)fofs;
if (!et)
- return false;
+ return;
trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
@@ -437,7 +434,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
write_unlock(&et->lock);
- return false;
+ return;
}
prev = et->largest;
@@ -492,9 +489,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (!next_en) {
struct rb_node *node = rb_next(&en->rb_node);
- next_en = node ?
- rb_entry(node, struct extent_node, rb_node)
- : NULL;
+ next_en = rb_entry_safe(node, struct extent_node,
+ rb_node);
}
if (parts)
@@ -535,8 +531,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
__free_extent_tree(sbi, et);
write_unlock(&et->lock);
-
- return !__is_extent_same(&prev, &et->largest);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -552,7 +546,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
if (!atomic_read(&sbi->total_zombie_tree))
goto free_node;
- if (!down_write_trylock(&sbi->extent_tree_lock))
+ if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
@@ -574,11 +568,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
goto unlock_out;
cond_resched();
}
- up_write(&sbi->extent_tree_lock);
+ mutex_unlock(&sbi->extent_tree_lock);
free_node:
/* 2. remove LRU extent entries */
- if (!down_write_trylock(&sbi->extent_tree_lock))
+ if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
@@ -608,7 +602,7 @@ free_node:
spin_unlock(&sbi->extent_lock);
unlock_out:
- up_write(&sbi->extent_tree_lock);
+ mutex_unlock(&sbi->extent_tree_lock);
out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
@@ -655,10 +649,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
- down_write(&sbi->extent_tree_lock);
+ mutex_lock(&sbi->extent_tree_lock);
list_add_tail(&et->list, &sbi->zombie_list);
atomic_inc(&sbi->total_zombie_tree);
- up_write(&sbi->extent_tree_lock);
+ mutex_unlock(&sbi->extent_tree_lock);
return;
}
@@ -666,12 +660,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
node_cnt = f2fs_destroy_extent_node(inode);
/* delete extent tree entry in radix tree */
- down_write(&sbi->extent_tree_lock);
+ mutex_lock(&sbi->extent_tree_lock);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(&sbi->total_ext_tree);
- up_write(&sbi->extent_tree_lock);
+ mutex_unlock(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
@@ -718,7 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
- init_rwsem(&sbi->extent_tree_lock);
+ mutex_init(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
atomic_set(&sbi->total_ext_tree, 0);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 069fc72..e849f83 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -112,9 +112,9 @@ struct f2fs_mount_info {
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
#define F2FS_SET_FEATURE(sb, mask) \
- F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)
+ (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
#define F2FS_CLEAR_FEATURE(sb, mask) \
- F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)
+ (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
/*
* For checkpoint manager
@@ -132,11 +132,14 @@ enum {
CP_DISCARD,
};
-#define DEF_BATCHED_TRIM_SECTIONS 2
+#define DEF_BATCHED_TRIM_SECTIONS 2048
#define BATCHED_TRIM_SEGMENTS(sbi) \
(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
+#define MAX_DISCARD_BLOCKS(sbi) \
+ ((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec)
+#define DISCARD_ISSUE_RATE 8
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
@@ -185,11 +188,30 @@ struct discard_entry {
int len; /* # of consecutive blocks of the discard */
};
-struct bio_entry {
- struct list_head list;
- struct bio *bio;
- struct completion event;
- int error;
+enum {
+ D_PREP,
+ D_SUBMIT,
+ D_DONE,
+};
+
+struct discard_cmd {
+ struct list_head list; /* command list */
+ struct completion wait; /* compleation */
+ block_t lstart; /* logical start address */
+ block_t len; /* length */
+ struct bio *bio; /* bio */
+ int state; /* state */
+};
+
+struct discard_cmd_control {
+ struct task_struct *f2fs_issue_discard; /* discard thread */
+ struct list_head discard_entry_list; /* 4KB discard entry list */
+ int nr_discards; /* # of discards in the list */
+ struct list_head discard_cmd_list; /* discard cmd list */
+ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
+ struct mutex cmd_lock;
+ int max_discards; /* max. discards to be issued */
+ atomic_t submit_discard; /* # of issued discard */
};
/* for the list of fsync inodes, used only during recovery */
@@ -214,6 +236,7 @@ struct fsync_inode_entry {
static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
{
int before = nats_in_cursum(journal);
+
journal->n_nats = cpu_to_le16(before + i);
return before;
}
@@ -221,6 +244,7 @@ static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
{
int before = sits_in_cursum(journal);
+
journal->n_sits = cpu_to_le16(before + i);
return before;
}
@@ -306,12 +330,14 @@ static inline void make_dentry_ptr(struct inode *inode,
if (type == 1) {
struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
+
d->max = NR_DENTRY_IN_BLOCK;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
} else {
struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
+
d->max = NR_INLINE_DENTRY;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
@@ -438,8 +464,8 @@ struct f2fs_inode_info {
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
+ struct task_struct *task; /* lookup and create consistency */
nid_t i_xattr_nid; /* node id that contains xattrs */
- unsigned long long xattr_ver; /* cp version of xattr modification */
loff_t last_disk_size; /* lastly written file size */
struct list_head dirty_list; /* dirty list for dirs and files */
@@ -474,13 +500,6 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
ei->len = len;
}
-static inline bool __is_extent_same(struct extent_info *ei1,
- struct extent_info *ei2)
-{
- return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
- ei1->len == ei2->len);
-}
-
static inline bool __is_extent_mergeable(struct extent_info *back,
struct extent_info *front)
{
@@ -500,7 +519,7 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
return __is_extent_mergeable(cur, front);
}
-extern void f2fs_mark_inode_dirty_sync(struct inode *, bool);
+extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
static inline void __try_update_largest_extent(struct inode *inode,
struct extent_tree *et, struct extent_node *en)
{
@@ -532,6 +551,7 @@ struct f2fs_nm_info {
struct list_head nat_entries; /* cached nat entry list (clean) */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
+ unsigned int nat_blocks; /* # of nat blocks */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -539,9 +559,19 @@ struct f2fs_nm_info {
unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
+ unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
+ unsigned char *nat_block_bitmap;
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
+
+ unsigned int nat_bits_blocks; /* # of nat bits blocks */
+ unsigned char *nat_bits; /* NAT bits blocks */
+ unsigned char *full_nat_bits; /* full NAT pages */
+ unsigned char *empty_nat_bits; /* empty NAT pages */
+#ifdef CONFIG_F2FS_CHECK_FS
+ char *nat_bitmap_mir; /* NAT bitmap mirror */
+#endif
int bitmap_size; /* bitmap size */
};
@@ -632,12 +662,6 @@ struct f2fs_sm_info {
/* a threshold to reclaim prefree segments */
unsigned int rec_prefree_segments;
- /* for small discard management */
- struct list_head discard_list; /* 4KB discard list */
- struct list_head wait_list; /* linked with issued discard bio */
- int nr_discards; /* # of discards in the list */
- int max_discards; /* max. discards to be issued */
-
/* for batched trimming */
unsigned int trim_sections; /* # of sections to trim */
@@ -648,8 +672,10 @@ struct f2fs_sm_info {
unsigned int min_fsync_blocks; /* threshold for fsync */
/* for flush command control */
- struct flush_cmd_control *cmd_control_info;
+ struct flush_cmd_control *fcc_info;
+ /* for discard command control */
+ struct discard_cmd_control *dcc_info;
};
/*
@@ -708,6 +734,7 @@ struct f2fs_io_info {
block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
+ bool submitted; /* indicate IO submission */
};
#define is_read_io(rw) (rw == READ)
@@ -787,6 +814,8 @@ struct f2fs_sb_info {
struct f2fs_bio_info read_io; /* for read bios */
struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */
+ int write_io_size_bits; /* Write IO size bits */
+ mempool_t *write_io_dummy; /* Dummy pages */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -811,7 +840,7 @@ struct f2fs_sb_info {
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
- struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
+ struct mutex extent_tree_lock; /* locking extent radix tree */
struct list_head extent_list; /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
atomic_t total_ext_tree; /* extent tree count */
@@ -858,6 +887,9 @@ struct f2fs_sb_info {
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
+ /* threshold for converting bg victims for fg */
+ u64 fggc_threshold;
+
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
@@ -877,6 +909,8 @@ struct f2fs_sb_info {
atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
+ atomic_t aw_cnt; /* # of atomic writes */
+ atomic_t max_aw_cnt; /* max # of atomic writes */
int bg_gc; /* background gc calls */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
@@ -908,6 +942,10 @@ struct f2fs_sb_info {
};
#ifdef CONFIG_F2FS_FAULT_INJECTION
+#define f2fs_show_injection_info(type) \
+ printk("%sF2FS-fs : inject %s in %s of %pF\n", \
+ KERN_INFO, fault_name[type], \
+ __func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
struct f2fs_fault_info *ffi = &sbi->fault_info;
@@ -921,10 +959,6 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
atomic_inc(&ffi->inject_ops);
if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
atomic_set(&ffi->inject_ops, 0);
- printk("%sF2FS-fs : inject %s in %pF\n",
- KERN_INFO,
- fault_name[type],
- __builtin_return_address(0));
return true;
}
return false;
@@ -1089,6 +1123,12 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
return le64_to_cpu(cp->checkpoint_ver);
}
+static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
+{
+ size_t crc_offset = le32_to_cpu(cp->checksum_offset);
+ return le32_to_cpu(*((__le32 *)((unsigned char *)cp + crc_offset)));
+}
+
static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
{
unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
@@ -1133,6 +1173,27 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
spin_unlock(&sbi->cp_lock);
}
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+ if (lock)
+ spin_lock(&sbi->cp_lock);
+ __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+ kfree(NM_I(sbi)->nat_bits);
+ NM_I(sbi)->nat_bits = NULL;
+ if (lock)
+ spin_unlock(&sbi->cp_lock);
+}
+
+static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
+{
+ bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+
+ return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set;
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
down_read(&sbi->cp_rwsem);
@@ -1212,8 +1273,10 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
blkcnt_t diff;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(sbi, FAULT_BLOCK))
+ if (time_to_inject(sbi, FAULT_BLOCK)) {
+ f2fs_show_injection_info(FAULT_BLOCK);
return false;
+ }
#endif
/*
* let's increase this in prior to actual block count change in order
@@ -1449,11 +1512,14 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
struct page *page = find_lock_page(mapping, index);
+
if (page)
return page;
- if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC))
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
+ f2fs_show_injection_info(FAULT_PAGE_ALLOC);
return NULL;
+ }
#endif
if (!for_write)
return grab_cache_page(mapping, index);
@@ -1532,6 +1598,7 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
static inline bool IS_INODE(struct page *page)
{
struct f2fs_node *p = F2FS_NODE(page);
+
return RAW_IS_INODE(p);
}
@@ -1545,6 +1612,7 @@ static inline block_t datablock_addr(struct page *node_page,
{
struct f2fs_node *raw_node;
__le32 *addr_array;
+
raw_node = F2FS_NODE(node_page);
addr_array = blkaddr_in_node(raw_node);
return le32_to_cpu(addr_array[offset]);
@@ -1628,6 +1696,7 @@ enum {
FI_UPDATE_WRITE, /* inode has in-place-update data */
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
+ FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
FI_VOLATILE_FILE, /* indicate volatile file */
FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
FI_DROP_CACHE, /* drop dirty page cache */
@@ -1635,6 +1704,7 @@ enum {
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
+ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -1779,6 +1849,7 @@ static inline unsigned int addrs_per_inode(struct inode *inode)
static inline void *inline_xattr_addr(struct page *page)
{
struct f2fs_inode *ri = F2FS_INODE(page);
+
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
F2FS_INLINE_XATTR_ADDRS]);
}
@@ -1817,6 +1888,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode)
return is_inode_flag_set(inode, FI_ATOMIC_FILE);
}
+static inline bool f2fs_is_commit_atomic_write(struct inode *inode)
+{
+ return is_inode_flag_set(inode, FI_ATOMIC_COMMIT);
+}
+
static inline bool f2fs_is_volatile_file(struct inode *inode)
{
return is_inode_flag_set(inode, FI_VOLATILE_FILE);
@@ -1835,6 +1911,7 @@ static inline bool f2fs_is_drop_cache(struct inode *inode)
static inline void *inline_data_addr(struct page *page)
{
struct f2fs_inode *ri = F2FS_INODE(page);
+
return (void *)&(ri->i_addr[1]);
}
@@ -1918,8 +1995,10 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(sbi, FAULT_KMALLOC))
+ if (time_to_inject(sbi, FAULT_KMALLOC)) {
+ f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
+ }
#endif
return kmalloc(size, flags);
}
@@ -1957,29 +2036,30 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
/*
* file.c
*/
-int f2fs_sync_file(struct file *, loff_t, loff_t, int);
-void truncate_data_blocks(struct dnode_of_data *);
-int truncate_blocks(struct inode *, u64, bool);
-int f2fs_truncate(struct inode *);
-int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-int f2fs_setattr(struct dentry *, struct iattr *);
-int truncate_hole(struct inode *, pgoff_t, pgoff_t);
-int truncate_data_blocks_range(struct dnode_of_data *, int);
-long f2fs_ioctl(struct file *, unsigned int, unsigned long);
-long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
+void truncate_data_blocks(struct dnode_of_data *dn);
+int truncate_blocks(struct inode *inode, u64 from, bool lock);
+int f2fs_truncate(struct inode *inode);
+int f2fs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
+int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
+int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
+int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/*
* inode.c
*/
-void f2fs_set_inode_flags(struct inode *);
-struct inode *f2fs_iget(struct super_block *, unsigned long);
-struct inode *f2fs_iget_retry(struct super_block *, unsigned long);
-int try_to_free_nats(struct f2fs_sb_info *, int);
-int update_inode(struct inode *, struct page *);
-int update_inode_page(struct inode *);
-int f2fs_write_inode(struct inode *, struct writeback_control *);
-void f2fs_evict_inode(struct inode *);
-void handle_failed_inode(struct inode *);
+void f2fs_set_inode_flags(struct inode *inode);
+struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
+struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
+int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
+int update_inode(struct inode *inode, struct page *node_page);
+int update_inode_page(struct inode *inode);
+int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
+void f2fs_evict_inode(struct inode *inode);
+void handle_failed_inode(struct inode *inode);
/*
* namei.c
@@ -1989,40 +2069,47 @@ struct dentry *f2fs_get_parent(struct dentry *child);
/*
* dir.c
*/
-void set_de_type(struct f2fs_dir_entry *, umode_t);
-unsigned char get_de_type(struct f2fs_dir_entry *);
-struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
- f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
-int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
- unsigned int, struct fscrypt_str *);
-void do_make_empty_dir(struct inode *, struct inode *,
- struct f2fs_dentry_ptr *);
-struct page *init_inode_metadata(struct inode *, struct inode *,
- const struct qstr *, const struct qstr *, struct page *);
-void update_parent_metadata(struct inode *, struct inode *, unsigned int);
-int room_for_filename(const void *, int, int);
-void f2fs_drop_nlink(struct inode *, struct inode *);
-struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *,
- struct page **);
-struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *,
- struct page **);
-struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
-ino_t f2fs_inode_by_name(struct inode *, const struct qstr *, struct page **);
-void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
- struct page *, struct inode *);
-int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
-void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
- const struct qstr *, f2fs_hash_t , unsigned int);
-int f2fs_add_regular_entry(struct inode *, const struct qstr *,
- const struct qstr *, struct inode *, nid_t, umode_t);
-int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *,
- nid_t, umode_t);
-int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
- umode_t);
-void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
- struct inode *);
-int f2fs_do_tmpfile(struct inode *, struct inode *);
-bool f2fs_empty_dir(struct inode *);
+void set_de_type(struct f2fs_dir_entry *de, umode_t mode);
+unsigned char get_de_type(struct f2fs_dir_entry *de);
+struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
+ f2fs_hash_t namehash, int *max_slots,
+ struct f2fs_dentry_ptr *d);
+int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
+ unsigned int start_pos, struct fscrypt_str *fstr);
+void do_make_empty_dir(struct inode *inode, struct inode *parent,
+ struct f2fs_dentry_ptr *d);
+struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+ const struct qstr *new_name,
+ const struct qstr *orig_name, struct page *dpage);
+void update_parent_metadata(struct inode *dir, struct inode *inode,
+ unsigned int current_depth);
+int room_for_filename(const void *bitmap, int slots, int max_slots);
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
+struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
+ struct fscrypt_name *fname, struct page **res_page);
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
+ const struct qstr *child, struct page **res_page);
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p);
+ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
+ struct page **page);
+void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
+ struct page *page, struct inode *inode);
+int update_dent_inode(struct inode *inode, struct inode *to,
+ const struct qstr *name);
+void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
+ const struct qstr *name, f2fs_hash_t name_hash,
+ unsigned int bit_pos);
+int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
+ const struct qstr *orig_name,
+ struct inode *inode, nid_t ino, umode_t mode);
+int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+ struct inode *inode, nid_t ino, umode_t mode);
+int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+ struct inode *inode, nid_t ino, umode_t mode);
+void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode);
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir);
+bool f2fs_empty_dir(struct inode *dir);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
{
@@ -2033,18 +2120,18 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
/*
* super.c
*/
-int f2fs_inode_dirtied(struct inode *, bool);
-void f2fs_inode_synced(struct inode *);
-int f2fs_commit_super(struct f2fs_sb_info *, bool);
-int f2fs_sync_fs(struct super_block *, int);
+int f2fs_inode_dirtied(struct inode *inode, bool sync);
+void f2fs_inode_synced(struct inode *inode);
+int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
+int f2fs_sync_fs(struct super_block *sb, int sync);
extern __printf(3, 4)
-void f2fs_msg(struct super_block *, const char *, const char *, ...);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
int sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info);
/*
* node.c
@@ -2052,163 +2139,183 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
struct dnode_of_data;
struct node_info;
-bool available_free_memory(struct f2fs_sb_info *, int);
-int need_dentry_mark(struct f2fs_sb_info *, nid_t);
-bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
-bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
-void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
-pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t);
-int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
-int truncate_inode_blocks(struct inode *, pgoff_t);
-int truncate_xattr_node(struct inode *, struct page *);
-int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
-int remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *);
-struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
-void ra_node_page(struct f2fs_sb_info *, nid_t);
-struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_node_page_ra(struct page *, int);
-void move_node_page(struct page *, int);
-int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
- struct writeback_control *, bool);
-int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
-void build_free_nids(struct f2fs_sb_info *, bool);
-bool alloc_nid(struct f2fs_sb_info *, nid_t *);
-void alloc_nid_done(struct f2fs_sb_info *, nid_t);
-void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
-int try_to_free_nids(struct f2fs_sb_info *, int);
-void recover_inline_xattr(struct inode *, struct page *);
-void recover_xattr_data(struct inode *, struct page *, block_t);
-int recover_inode_page(struct f2fs_sb_info *, struct page *);
-int restore_node_summary(struct f2fs_sb_info *, unsigned int,
- struct f2fs_summary_block *);
-void flush_nat_entries(struct f2fs_sb_info *);
-int build_node_manager(struct f2fs_sb_info *);
-void destroy_node_manager(struct f2fs_sb_info *);
+bool available_free_memory(struct f2fs_sb_info *sbi, int type);
+int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
+bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
+void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
+pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
+int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
+int truncate_inode_blocks(struct inode *inode, pgoff_t from);
+int truncate_xattr_node(struct inode *inode, struct page *page);
+int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
+int remove_inode_page(struct inode *inode);
+struct page *new_inode_page(struct inode *inode);
+struct page *new_node_page(struct dnode_of_data *dn,
+ unsigned int ofs, struct page *ipage);
+void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
+struct page *get_node_page_ra(struct page *parent, int start);
+void move_node_page(struct page *node_page, int gc_type);
+int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct writeback_control *wbc, bool atomic);
+int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
+bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
+void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
+void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
+int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
+void recover_inline_xattr(struct inode *inode, struct page *page);
+int recover_xattr_data(struct inode *inode, struct page *page,
+ block_t blkaddr);
+int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
+int restore_node_summary(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct f2fs_summary_block *sum);
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int build_node_manager(struct f2fs_sb_info *sbi);
+void destroy_node_manager(struct f2fs_sb_info *sbi);
int __init create_node_manager_caches(void);
void destroy_node_manager_caches(void);
/*
* segment.c
*/
-void register_inmem_page(struct inode *, struct page *);
-void drop_inmem_pages(struct inode *);
-int commit_inmem_pages(struct inode *);
-void f2fs_balance_fs(struct f2fs_sb_info *, bool);
-void f2fs_balance_fs_bg(struct f2fs_sb_info *);
-int f2fs_issue_flush(struct f2fs_sb_info *);
-int create_flush_cmd_control(struct f2fs_sb_info *);
-void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
-void invalidate_blocks(struct f2fs_sb_info *, block_t);
-bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
-void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
-void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
-void release_discard_addrs(struct f2fs_sb_info *);
-int npages_for_summary_flush(struct f2fs_sb_info *, bool);
-void allocate_new_segments(struct f2fs_sb_info *);
-int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
-struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
-void update_meta_page(struct f2fs_sb_info *, void *, block_t);
-void write_meta_page(struct f2fs_sb_info *, struct page *);
-void write_node_page(unsigned int, struct f2fs_io_info *);
-void write_data_page(struct dnode_of_data *, struct f2fs_io_info *);
-void rewrite_data_page(struct f2fs_io_info *);
-void __f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *,
- block_t, block_t, bool, bool);
-void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
- block_t, block_t, unsigned char, bool, bool);
-void allocate_data_block(struct f2fs_sb_info *, struct page *,
- block_t, block_t *, struct f2fs_summary *, int);
-void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
-void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
-void write_data_summaries(struct f2fs_sb_info *, block_t);
-void write_node_summaries(struct f2fs_sb_info *, block_t);
-int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int);
-void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
-int build_segment_manager(struct f2fs_sb_info *);
-void destroy_segment_manager(struct f2fs_sb_info *);
+void register_inmem_page(struct inode *inode, struct page *page);
+void drop_inmem_pages(struct inode *inode);
+int commit_inmem_pages(struct inode *inode);
+void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
+int f2fs_issue_flush(struct f2fs_sb_info *sbi);
+int create_flush_cmd_control(struct f2fs_sb_info *sbi);
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
+void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
+bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr);
+void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void release_discard_addrs(struct f2fs_sb_info *sbi);
+int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_new_segments(struct f2fs_sb_info *sbi);
+int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
+bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
+void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
+void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
+void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
+void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
+void rewrite_data_page(struct f2fs_io_info *fio);
+void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ block_t old_blkaddr, block_t new_blkaddr,
+ bool recover_curseg, bool recover_newaddr);
+void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
+ block_t old_addr, block_t new_addr,
+ unsigned char version, bool recover_curseg,
+ bool recover_newaddr);
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+ block_t old_blkaddr, block_t *new_blkaddr,
+ struct f2fs_summary *sum, int type);
+void f2fs_wait_on_page_writeback(struct page *page,
+ enum page_type type, bool ordered);
+void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
+ block_t blkaddr);
+void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
+ unsigned int val, int alloc);
+void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int build_segment_manager(struct f2fs_sb_info *sbi);
+void destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init create_segment_manager_caches(void);
void destroy_segment_manager_caches(void);
/*
* checkpoint.c
*/
-void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool);
-struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
-bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
-int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
-void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
-long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
-void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
-void release_ino_entry(struct f2fs_sb_info *, bool);
-bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
-int f2fs_sync_inode_meta(struct f2fs_sb_info *);
-int acquire_orphan_inode(struct f2fs_sb_info *);
-void release_orphan_inode(struct f2fs_sb_info *);
-void add_orphan_inode(struct inode *);
-void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
-int recover_orphan_inodes(struct f2fs_sb_info *);
-int get_valid_checkpoint(struct f2fs_sb_info *);
-void update_dirty_page(struct inode *, struct page *);
-void remove_dirty_inode(struct inode *);
-int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
-int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
-void init_ino_entry_info(struct f2fs_sb_info *);
+void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
+bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
+int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ int type, bool sync);
+void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+ long nr_to_write);
+void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
+int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
+int acquire_orphan_inode(struct f2fs_sb_info *sbi);
+void release_orphan_inode(struct f2fs_sb_info *sbi);
+void add_orphan_inode(struct inode *inode);
+void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
+int recover_orphan_inodes(struct f2fs_sb_info *sbi);
+int get_valid_checkpoint(struct f2fs_sb_info *sbi);
+void update_dirty_page(struct inode *inode, struct page *page);
+void remove_dirty_inode(struct inode *inode);
+int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void init_ino_entry_info(struct f2fs_sb_info *sbi);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
/*
* data.c
*/
-void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
- struct page *, nid_t, enum page_type, int);
-void f2fs_flush_merged_bios(struct f2fs_sb_info *);
-int f2fs_submit_page_bio(struct f2fs_io_info *);
-void f2fs_submit_page_mbio(struct f2fs_io_info *);
-struct block_device *f2fs_target_device(struct f2fs_sb_info *,
- block_t, struct bio *);
-int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
-void set_data_blkaddr(struct dnode_of_data *);
-void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
-int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
-int reserve_new_block(struct dnode_of_data *);
-int f2fs_get_block(struct dnode_of_data *, pgoff_t);
-int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
-int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
-struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
-struct page *find_data_page(struct inode *, pgoff_t);
-struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
-struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
-int do_write_data_page(struct f2fs_io_info *);
-int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
-int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
-void f2fs_set_page_dirty_nobuffers(struct page *);
-void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
-int f2fs_release_page(struct page *, gfp_t);
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
+ int rw);
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+ struct inode *inode, nid_t ino, pgoff_t idx,
+ enum page_type type, int rw);
+void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
+int f2fs_submit_page_bio(struct f2fs_io_info *fio);
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+ block_t blk_addr, struct bio *bio);
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
+void set_data_blkaddr(struct dnode_of_data *dn);
+void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
+int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
+int reserve_new_block(struct dnode_of_data *dn);
+int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
+int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
+int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
+struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+ int op_flags, bool for_write);
+struct page *find_data_page(struct inode *inode, pgoff_t index);
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+ bool for_write);
+struct page *get_new_data_page(struct inode *inode,
+ struct page *ipage, pgoff_t index, bool new_i_size);
+int do_write_data_page(struct f2fs_io_info *fio);
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+ int create, int flag);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len);
+void f2fs_set_page_dirty_nobuffers(struct page *page);
+void f2fs_invalidate_page(struct page *page, unsigned int offset,
+ unsigned int length);
+int f2fs_release_page(struct page *page, gfp_t wait);
#ifdef CONFIG_MIGRATION
-int f2fs_migrate_page(struct address_space *, struct page *, struct page *,
- enum migrate_mode);
+int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
+ struct page *page, enum migrate_mode mode);
#endif
/*
* gc.c
*/
-int start_gc_thread(struct f2fs_sb_info *);
-void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int, struct inode *);
-int f2fs_gc(struct f2fs_sb_info *, bool, bool);
-void build_gc_manager(struct f2fs_sb_info *);
+int start_gc_thread(struct f2fs_sb_info *sbi);
+void stop_gc_thread(struct f2fs_sb_info *sbi);
+block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background);
+void build_gc_manager(struct f2fs_sb_info *sbi);
/*
* recovery.c
*/
-int recover_fsync_data(struct f2fs_sb_info *, bool);
-bool space_for_roll_forward(struct f2fs_sb_info *);
+int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
+bool space_for_roll_forward(struct f2fs_sb_info *sbi);
/*
* debug.c
@@ -2227,8 +2334,9 @@ struct f2fs_stat_info {
unsigned int ndirty_dirs, ndirty_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
int total_count, utilization;
- int bg_gc, nr_wb_cp_data, nr_wb_data;
- int inline_xattr, inline_inode, inline_dir, orphans;
+ int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
+ int inline_xattr, inline_inode, inline_dir, append, update, orphans;
+ int aw_cnt, max_aw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
@@ -2300,6 +2408,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
((sbi)->block_count[(curseg)->alloc_type]++)
#define stat_inc_inplace_blocks(sbi) \
(atomic_inc(&(sbi)->inplace_count))
+#define stat_inc_atomic_write(inode) \
+ (atomic_inc(&F2FS_I_SB(inode)->aw_cnt))
+#define stat_dec_atomic_write(inode) \
+ (atomic_dec(&F2FS_I_SB(inode)->aw_cnt))
+#define stat_update_max_atomic_write(inode) \
+ do { \
+ int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \
+ int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \
+ if (cur > max) \
+ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
+ } while (0)
#define stat_inc_seg_count(sbi, type, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
@@ -2332,8 +2451,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
} while (0)
-int f2fs_build_stats(struct f2fs_sb_info *);
-void f2fs_destroy_stats(struct f2fs_sb_info *);
+int f2fs_build_stats(struct f2fs_sb_info *sbi);
+void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
int __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
@@ -2353,6 +2472,9 @@ void f2fs_destroy_root_stats(void);
#define stat_dec_inline_inode(inode)
#define stat_inc_inline_dir(inode)
#define stat_dec_inline_dir(inode)
+#define stat_inc_atomic_write(inode)
+#define stat_dec_atomic_write(inode)
+#define stat_update_max_atomic_write(inode)
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_inplace_blocks(sbi)
@@ -2382,49 +2504,55 @@ extern struct kmem_cache *inode_entry_slab;
/*
* inline.c
*/
-bool f2fs_may_inline_data(struct inode *);
-bool f2fs_may_inline_dentry(struct inode *);
-void read_inline_data(struct page *, struct page *);
-bool truncate_inline_inode(struct page *, u64);
-int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
-int f2fs_convert_inline_inode(struct inode *);
-int f2fs_write_inline_data(struct inode *, struct page *);
-bool recover_inline_data(struct inode *, struct page *);
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
- struct fscrypt_name *, struct page **);
-int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
-int f2fs_add_inline_entry(struct inode *, const struct qstr *,
- const struct qstr *, struct inode *, nid_t, umode_t);
-void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
- struct inode *, struct inode *);
-bool f2fs_empty_inline_dir(struct inode *);
-int f2fs_read_inline_dir(struct file *, struct dir_context *,
- struct fscrypt_str *);
-int f2fs_inline_data_fiemap(struct inode *,
- struct fiemap_extent_info *, __u64, __u64);
+bool f2fs_may_inline_data(struct inode *inode);
+bool f2fs_may_inline_dentry(struct inode *inode);
+void read_inline_data(struct page *page, struct page *ipage);
+bool truncate_inline_inode(struct page *ipage, u64 from);
+int f2fs_read_inline_data(struct inode *inode, struct page *page);
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
+int f2fs_convert_inline_inode(struct inode *inode);
+int f2fs_write_inline_data(struct inode *inode, struct page *page);
+bool recover_inline_data(struct inode *inode, struct page *npage);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+ struct fscrypt_name *fname, struct page **res_page);
+int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+ struct page *ipage);
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
+ const struct qstr *orig_name,
+ struct inode *inode, nid_t ino, umode_t mode);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode);
+bool f2fs_empty_inline_dir(struct inode *dir);
+int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
+ struct fscrypt_str *fstr);
+int f2fs_inline_data_fiemap(struct inode *inode,
+ struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len);
/*
* shrinker.c
*/
-unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *);
-unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *);
-void f2fs_join_shrinker(struct f2fs_sb_info *);
-void f2fs_leave_shrinker(struct f2fs_sb_info *);
+unsigned long f2fs_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+unsigned long f2fs_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
+void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
-bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
-void f2fs_drop_extent_tree(struct inode *);
-unsigned int f2fs_destroy_extent_node(struct inode *);
-void f2fs_destroy_extent_tree(struct inode *);
-bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
-void f2fs_update_extent_cache(struct dnode_of_data *);
+unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
+void f2fs_drop_extent_tree(struct inode *inode);
+unsigned int f2fs_destroy_extent_node(struct inode *inode);
+void f2fs_destroy_extent_tree(struct inode *inode);
+bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei);
+void f2fs_update_extent_cache(struct dnode_of_data *dn);
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
- pgoff_t, block_t, unsigned int);
-void init_extent_cache_info(struct f2fs_sb_info *);
+ pgoff_t fofs, block_t blkaddr, unsigned int len);
+void init_extent_cache_info(struct f2fs_sb_info *sbi);
int __init create_extent_cache(void);
void destroy_extent_cache(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 1edc86e..5f73178 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -20,6 +20,7 @@
#include <linux/uaccess.h>
#include <linux/mount.h>
#include <linux/pagevec.h>
+#include <linux/uio.h>
#include <linux/uuid.h>
#include <linux/file.h>
@@ -140,8 +141,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
need_cp = true;
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
need_cp = true;
- else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
- need_cp = true;
else if (test_opt(sbi, FASTBOOT))
need_cp = true;
else if (sbi->active_logs == 2)
@@ -167,7 +166,6 @@ static void try_to_fix_pino(struct inode *inode)
nid_t pino;
down_write(&fi->i_sem);
- fi->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
f2fs_i_pino_write(inode, pino);
@@ -276,7 +274,8 @@ sync_nodes:
flush_out:
remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
- ret = f2fs_issue_flush(sbi);
+ if (!atomic)
+ ret = f2fs_issue_flush(sbi);
f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -567,8 +566,9 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
}
if (f2fs_has_inline_data(inode)) {
- if (truncate_inline_inode(ipage, from))
- set_page_dirty(ipage);
+ truncate_inline_inode(ipage, from);
+ if (from == 0)
+ clear_inode_flag(inode, FI_DATA_EXIST);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
@@ -633,10 +633,10 @@ int f2fs_truncate(struct inode *inode)
return 0;
}
-int f2fs_getattr(struct vfsmount *mnt,
- struct dentry *dentry, struct kstat *stat)
+int f2fs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
generic_fillattr(inode, stat);
stat->blocks <<= 3;
return 0;
@@ -1541,6 +1541,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (ret)
clear_inode_flag(inode, FI_ATOMIC_FILE);
out:
+ stat_inc_atomic_write(inode);
+ stat_update_max_atomic_write(inode);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1564,15 +1566,18 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
goto err_out;
if (f2fs_is_atomic_file(inode)) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
ret = commit_inmem_pages(inode);
- if (ret) {
- set_inode_flag(inode, FI_ATOMIC_FILE);
+ if (ret)
goto err_out;
+
+ ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+ if (!ret) {
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ stat_dec_atomic_write(inode);
}
+ } else {
+ ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
-
- ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
err_out:
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1870,7 +1875,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
- struct extent_info ei;
+ struct extent_info ei = {0,0,0};
pgoff_t pg_start, pg_end;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
@@ -2250,8 +2255,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
- int err = f2fs_preallocate_blocks(iocb, from);
+ int err;
+
+ if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
+ set_inode_flag(inode, FI_NO_PREALLOC);
+ err = f2fs_preallocate_blocks(iocb, from);
if (err) {
inode_unlock(inode);
return err;
@@ -2259,6 +2268,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
+ clear_inode_flag(inode, FI_NO_PREALLOC);
}
inode_unlock(inode);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 88bfc3d..418fd98 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -48,8 +48,10 @@ static int gc_thread_func(void *data)
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(sbi, FAULT_CHECKPOINT))
+ if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
+ f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
+ }
#endif
/*
@@ -166,7 +168,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->ofs_unit = sbi->segs_per_sec;
}
- if (p->max_search > sbi->max_victim_search)
+ /* we need to check every dirty segments in the FG_GC case */
+ if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
@@ -199,6 +202,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
continue;
+
+ if (no_fggc_candidate(sbi, secno))
+ continue;
+
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
}
@@ -237,6 +244,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
+static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ unsigned int valid_blocks =
+ get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+
+ return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
+ valid_blocks * 2 : valid_blocks;
+}
+
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
@@ -245,7 +262,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
- return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+ return get_greedy_cost(sbi, segno);
else
return get_cb_cost(sbi, segno);
}
@@ -322,13 +339,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
-
secno = GET_SECNO(sbi, segno);
if (sec_usage_check(sbi, secno))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
+ if (gc_type == FG_GC && p.alloc_mode == LFS &&
+ no_fggc_candidate(sbi, secno))
+ goto next;
cost = get_gc_cost(sbi, segno, &p);
@@ -569,6 +588,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
+ if (f2fs_is_atomic_file(inode))
+ goto out;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
@@ -661,6 +683,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
+ if (f2fs_is_atomic_file(inode))
+ goto out;
+
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
@@ -921,8 +946,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
cpc.reason = __get_cp_reason(sbi);
gc_more:
- segno = NULL_SEGNO;
-
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (unlikely(f2fs_cp_error(sbi))) {
@@ -930,30 +953,23 @@ gc_more:
goto stop;
}
- if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
- gc_type = FG_GC;
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
/*
- * If there is no victim and no prefree segment but still not
- * enough free sections, we should flush dent/node blocks and do
- * garbage collections.
+ * For example, if there are many prefree_segments below given
+ * threshold, we can make them free by checkpoint. Then, we
+ * secure free segments which doesn't need fggc any more.
*/
- if (__get_victim(sbi, &segno, gc_type) ||
- prefree_segments(sbi)) {
- ret = write_checkpoint(sbi, &cpc);
- if (ret)
- goto stop;
- segno = NULL_SEGNO;
- } else if (has_not_enough_free_secs(sbi, 0, 0)) {
- ret = write_checkpoint(sbi, &cpc);
- if (ret)
- goto stop;
- }
- } else if (gc_type == BG_GC && !background) {
- /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
- goto stop;
+ ret = write_checkpoint(sbi, &cpc);
+ if (ret)
+ goto stop;
+ if (has_not_enough_free_secs(sbi, 0, 0))
+ gc_type = FG_GC;
}
- if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
+ /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
+ if (gc_type == BG_GC && !background)
+ goto stop;
+ if (!__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
@@ -983,5 +999,16 @@ stop:
void build_gc_manager(struct f2fs_sb_info *sbi)
{
+ u64 main_count, resv_count, ovp_count, blocks_per_sec;
+
DIRTY_I(sbi)->v_ops = &default_v_ops;
+
+ /* threshold of # of valid blocks in a section for victims of FG_GC */
+ main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
+ resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
+ ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
+ blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
+
+ sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec,
+ (main_count - resv_count));
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index af06bda..24bb821 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -373,8 +373,10 @@ void f2fs_evict_inode(struct inode *inode)
goto no_delete;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(sbi, FAULT_EVICT_INODE))
+ if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
+ f2fs_show_injection_info(FAULT_EVICT_INODE);
goto no_delete;
+ }
#endif
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 11cabca..98f00a3 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -321,9 +321,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (err)
goto err_out;
}
- if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
- (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
- !fscrypt_has_permitted_context(dir, inode)) {
+ if (f2fs_encrypted_inode(dir) &&
+ (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
+ !fscrypt_has_permitted_context(dir, inode)) {
bool nokey = f2fs_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
err = nokey ? -ENOKEY : -EPERM;
@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
+ if ((f2fs_encrypted_inode(old_dir) &&
+ !fscrypt_has_encryption_key(old_dir)) ||
+ (f2fs_encrypted_inode(new_dir) &&
+ !fscrypt_has_encryption_key(new_dir)))
+ return -ENOKEY;
+
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_permitted_context(new_dir, old_inode)) {
err = -EPERM;
@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_nlink = 0, new_nlink = 0;
int err = -ENOENT;
+ if ((f2fs_encrypted_inode(old_dir) &&
+ !fscrypt_has_encryption_key(old_dir)) ||
+ (f2fs_encrypted_inode(new_dir) &&
+ !fscrypt_has_encryption_key(new_dir)))
+ return -ENOKEY;
+
if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, old_inode) ||
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9078fd..9496717 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -245,12 +245,24 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
return need_update;
}
-static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
+static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
+ bool no_fail)
{
struct nat_entry *new;
- new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
- f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
+ if (no_fail) {
+ new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
+ f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
+ } else {
+ new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
+ if (!new)
+ return NULL;
+ if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
+ kmem_cache_free(nat_entry_slab, new);
+ return NULL;
+ }
+ }
+
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
nat_reset_flag(new);
@@ -267,8 +279,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
- e = grab_nat_entry(nm_i, nid);
- node_info_from_raw_nat(&e->ni, ne);
+ e = grab_nat_entry(nm_i, nid, false);
+ if (e)
+ node_info_from_raw_nat(&e->ni, ne);
} else {
f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
nat_get_blkaddr(e) !=
@@ -286,7 +299,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
- e = grab_nat_entry(nm_i, ni->nid);
+ e = grab_nat_entry(nm_i, ni->nid, true);
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -325,6 +338,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
+ if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
+ clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
+
/* update fsync_mark if its inode nat entry is still alive */
if (ni->nid != ni->ino)
e = __lookup_nat_cache(nm_i, ni->ino);
@@ -958,9 +974,6 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
f2fs_i_xnid_write(inode, 0);
- /* need to do checkpoint during fsync */
- F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
-
set_new_dnode(&dn, inode, page, npage, nid);
if (page)
@@ -1018,7 +1031,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
unsigned int ofs, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
- struct node_info old_ni, new_ni;
+ struct node_info new_ni;
struct page *page;
int err;
@@ -1033,13 +1046,15 @@ struct page *new_node_page(struct dnode_of_data *dn,
err = -ENOSPC;
goto fail;
}
-
- get_node_info(sbi, dn->nid, &old_ni);
-
- /* Reinitialize old_ni with new node page */
- f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
- new_ni = old_ni;
+#ifdef CONFIG_F2FS_CHECK_FS
+ get_node_info(sbi, dn->nid, &new_ni);
+ f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
+#endif
+ new_ni.nid = dn->nid;
new_ni.ino = dn->inode->i_ino;
+ new_ni.blk_addr = NULL_ADDR;
+ new_ni.flag = 0;
+ new_ni.version = 0;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
f2fs_wait_on_page_writeback(page, NODE, true);
@@ -1305,16 +1320,99 @@ continue_unlock:
return last_page;
}
+static int __write_node_page(struct page *page, bool atomic, bool *submitted,
+ struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ nid_t nid;
+ struct node_info ni;
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .type = NODE,
+ .op = REQ_OP_WRITE,
+ .op_flags = wbc_to_write_flags(wbc),
+ .page = page,
+ .encrypted_page = NULL,
+ .submitted = false,
+ };
+
+ trace_f2fs_writepage(page, NODE);
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto redirty_out;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
+
+ /* get old block addr of this node page */
+ nid = nid_of_node(page);
+ f2fs_bug_on(sbi, page->index != nid);
+
+ if (wbc->for_reclaim) {
+ if (!down_read_trylock(&sbi->node_write))
+ goto redirty_out;
+ } else {
+ down_read(&sbi->node_write);
+ }
+
+ get_node_info(sbi, nid, &ni);
+
+ /* This page is already truncated */
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ ClearPageUptodate(page);
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ up_read(&sbi->node_write);
+ unlock_page(page);
+ return 0;
+ }
+
+ if (atomic && !test_opt(sbi, NOBARRIER))
+ fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+
+ set_page_writeback(page);
+ fio.old_blkaddr = ni.blk_addr;
+ write_node_page(nid, &fio);
+ set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ up_read(&sbi->node_write);
+
+ if (wbc->for_reclaim) {
+ f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
+ page->index, NODE, WRITE);
+ submitted = NULL;
+ }
+
+ unlock_page(page);
+
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ submitted = NULL;
+ }
+ if (submitted)
+ *submitted = fio.submitted;
+
+ return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
+}
+
+static int f2fs_write_node_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ return __write_node_page(page, false, NULL, wbc);
+}
+
int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic)
{
pgoff_t index, end;
+ pgoff_t last_idx = ULONG_MAX;
struct pagevec pvec;
int ret = 0;
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
- int nwritten = 0;
if (atomic) {
last_page = last_fsync_dnode(sbi, ino);
@@ -1336,6 +1434,7 @@ retry:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ bool submitted = false;
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_put_page(last_page, 0);
@@ -1384,13 +1483,15 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
+ ret = __write_node_page(page, atomic &&
+ page == last_page,
+ &submitted, wbc);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
break;
- } else {
- nwritten++;
+ } else if (submitted) {
+ last_idx = page->index;
}
if (page == last_page) {
@@ -1416,8 +1517,9 @@ continue_unlock:
goto retry;
}
out:
- if (nwritten)
- f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
+ if (last_idx != ULONG_MAX)
+ f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
+ NODE, WRITE);
return ret ? -EIO: 0;
}
@@ -1445,6 +1547,7 @@ next_step:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ bool submitted = false;
if (unlikely(f2fs_cp_error(sbi))) {
pagevec_release(&pvec);
@@ -1498,9 +1601,10 @@ continue_unlock:
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
- if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
+ ret = __write_node_page(page, false, &submitted, wbc);
+ if (ret)
unlock_page(page);
- else
+ else if (submitted)
nwritten++;
if (--wbc->nr_to_write == 0)
@@ -1564,72 +1668,6 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
return ret;
}
-static int f2fs_write_node_page(struct page *page,
- struct writeback_control *wbc)
-{
- struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- nid_t nid;
- struct node_info ni;
- struct f2fs_io_info fio = {
- .sbi = sbi,
- .type = NODE,
- .op = REQ_OP_WRITE,
- .op_flags = wbc_to_write_flags(wbc),
- .page = page,
- .encrypted_page = NULL,
- };
-
- trace_f2fs_writepage(page, NODE);
-
- if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
- goto redirty_out;
- if (unlikely(f2fs_cp_error(sbi)))
- goto redirty_out;
-
- /* get old block addr of this node page */
- nid = nid_of_node(page);
- f2fs_bug_on(sbi, page->index != nid);
-
- if (wbc->for_reclaim) {
- if (!down_read_trylock(&sbi->node_write))
- goto redirty_out;
- } else {
- down_read(&sbi->node_write);
- }
-
- get_node_info(sbi, nid, &ni);
-
- /* This page is already truncated */
- if (unlikely(ni.blk_addr == NULL_ADDR)) {
- ClearPageUptodate(page);
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- up_read(&sbi->node_write);
- unlock_page(page);
- return 0;
- }
-
- set_page_writeback(page);
- fio.old_blkaddr = ni.blk_addr;
- write_node_page(nid, &fio);
- set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- up_read(&sbi->node_write);
-
- if (wbc->for_reclaim)
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
-
- unlock_page(page);
-
- if (unlikely(f2fs_cp_error(sbi)))
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
-
- return 0;
-
-redirty_out:
- redirty_page_for_writepage(wbc, page);
- return AOP_WRITEPAGE_ACTIVATE;
-}
-
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -1727,7 +1765,8 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
-static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
+/* return if the nid is recognized as free */
+static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
@@ -1736,14 +1775,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
/* 0 nid should not be used */
if (unlikely(nid == 0))
- return 0;
+ return false;
if (build) {
/* do not add allocated nids */
ne = __lookup_nat_cache(nm_i, nid);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
- return 0;
+ return false;
}
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
@@ -1752,7 +1791,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (radix_tree_preload(GFP_NOFS)) {
kmem_cache_free(free_nid_slab, i);
- return 0;
+ return true;
}
spin_lock(&nm_i->nid_list_lock);
@@ -1761,9 +1800,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
radix_tree_preload_end();
if (err) {
kmem_cache_free(free_nid_slab, i);
- return 0;
+ return true;
}
- return 1;
+ return true;
}
static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1784,17 +1823,36 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
+void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
+ unsigned int nid_ofs = nid - START_NID(nid);
+
+ if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+ return;
+
+ if (set)
+ set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+ else
+ clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+}
+
static void scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct f2fs_nat_block *nat_blk = page_address(nat_page);
block_t blk_addr;
+ unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
int i;
+ set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+
i = start_nid % NAT_ENTRY_PER_BLOCK;
for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
+ bool freed = false;
if (unlikely(start_nid >= nm_i->max_nid))
break;
@@ -1802,11 +1860,106 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR)
- add_free_nid(sbi, start_nid, true);
+ freed = add_free_nid(sbi, start_nid, true);
+ update_free_nid_bitmap(sbi, start_nid, freed);
+ }
+}
+
+static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ struct f2fs_journal *journal = curseg->journal;
+ unsigned int i, idx;
+
+ down_read(&nm_i->nat_tree_lock);
+
+ for (i = 0; i < nm_i->nat_blocks; i++) {
+ if (!test_bit_le(i, nm_i->nat_block_bitmap))
+ continue;
+ for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
+ nid_t nid;
+
+ if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
+ continue;
+
+ nid = i * NAT_ENTRY_PER_BLOCK + idx;
+ add_free_nid(sbi, nid, true);
+
+ if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
+ goto out;
+ }
+ }
+out:
+ down_read(&curseg->journal_rwsem);
+ for (i = 0; i < nats_in_cursum(journal); i++) {
+ block_t addr;
+ nid_t nid;
+
+ addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+ nid = le32_to_cpu(nid_in_journal(journal, i));
+ if (addr == NULL_ADDR)
+ add_free_nid(sbi, nid, true);
+ else
+ remove_free_nid(sbi, nid);
}
+ up_read(&curseg->journal_rwsem);
+ up_read(&nm_i->nat_tree_lock);
}
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+static int scan_nat_bits(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct page *page;
+ unsigned int i = 0;
+ nid_t nid;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return -EAGAIN;
+
+ down_read(&nm_i->nat_tree_lock);
+check_empty:
+ i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks) {
+ i = 0;
+ goto check_partial;
+ }
+
+ for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
+ nid++) {
+ if (unlikely(nid >= nm_i->max_nid))
+ break;
+ add_free_nid(sbi, nid, true);
+ }
+
+ if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
+ goto out;
+ i++;
+ goto check_empty;
+
+check_partial:
+ i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks) {
+ disable_nat_bits(sbi, true);
+ up_read(&nm_i->nat_tree_lock);
+ return -EINVAL;
+ }
+
+ nid = i * NAT_ENTRY_PER_BLOCK;
+ page = get_current_nat_page(sbi, nid);
+ scan_nat_page(sbi, page, nid);
+ f2fs_put_page(page, 1);
+
+ if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
+ i++;
+ goto check_partial;
+ }
+out:
+ up_read(&nm_i->nat_tree_lock);
+ return 0;
+}
+
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1821,6 +1974,29 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
if (!sync && !available_free_memory(sbi, FREE_NIDS))
return;
+ if (!mount) {
+ /* try to find free nids in free_nid_bitmap */
+ scan_free_nid_bits(sbi);
+
+ if (nm_i->nid_cnt[FREE_NID_LIST])
+ return;
+
+ /* try to find free nids with nat_bits */
+ if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+ return;
+ }
+
+ /* find next valid candidate */
+ if (enabled_nat_bits(sbi, NULL)) {
+ int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
+ nm_i->nat_blocks, 0);
+
+ if (idx >= nm_i->nat_blocks)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ else
+ nid = idx * NAT_ENTRY_PER_BLOCK;
+ }
+
/* readahead nat pages to be scanned */
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
@@ -1863,10 +2039,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
nm_i->ra_nid_pages, META_NAT, false);
}
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
mutex_lock(&NM_I(sbi)->build_lock);
- __build_free_nids(sbi, sync);
+ __build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
}
@@ -1881,8 +2057,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct free_nid *i = NULL;
retry:
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(sbi, FAULT_ALLOC_NID))
+ if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
+ f2fs_show_injection_info(FAULT_ALLOC_NID);
return false;
+ }
#endif
spin_lock(&nm_i->nid_list_lock);
@@ -1902,13 +2080,16 @@ retry:
i->state = NID_ALLOC;
__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
nm_i->available_nids--;
+
+ update_free_nid_bitmap(sbi, *nid, false);
+
spin_unlock(&nm_i->nid_list_lock);
return true;
}
spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- build_free_nids(sbi, true);
+ build_free_nids(sbi, true, false);
goto retry;
}
@@ -1956,6 +2137,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
nm_i->available_nids++;
+ update_free_nid_bitmap(sbi, nid, true);
+
spin_unlock(&nm_i->nid_list_lock);
if (need_free)
@@ -2018,18 +2201,18 @@ update_inode:
f2fs_put_page(ipage, 1);
}
-void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
nid_t new_xnid = nid_of_node(page);
struct node_info ni;
+ struct page *xpage;
- /* 1: invalidate the previous xattr nid */
if (!prev_xnid)
goto recover_xnid;
- /* Deallocate node address */
+ /* 1: invalidate the previous xattr nid */
get_node_info(sbi, prev_xnid, &ni);
f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
invalidate_blocks(sbi, ni.blk_addr);
@@ -2037,19 +2220,27 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
set_node_addr(sbi, &ni, NULL_ADDR, false);
recover_xnid:
- /* 2: allocate new xattr nid */
+ /* 2: update xattr nid in inode */
+ remove_free_nid(sbi, new_xnid);
+ f2fs_i_xnid_write(inode, new_xnid);
if (unlikely(!inc_valid_node_count(sbi, inode)))
f2fs_bug_on(sbi, 1);
+ update_inode_page(inode);
+
+ /* 3: update and set xattr node page dirty */
+ xpage = grab_cache_page(NODE_MAPPING(sbi), new_xnid);
+ if (!xpage)
+ return -ENOMEM;
+
+ memcpy(F2FS_NODE(xpage), F2FS_NODE(page), PAGE_SIZE);
- remove_free_nid(sbi, new_xnid);
get_node_info(sbi, new_xnid, &ni);
ni.ino = inode->i_ino;
set_node_addr(sbi, &ni, NEW_ADDR, false);
- f2fs_i_xnid_write(inode, new_xnid);
+ set_page_dirty(xpage);
+ f2fs_put_page(xpage, 1);
- /* 3: update xattr blkaddr */
- refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
- set_node_addr(sbi, &ni, blkaddr, false);
+ return 0;
}
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2152,7 +2343,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
- ne = grab_nat_entry(nm_i, nid);
+ ne = grab_nat_entry(nm_i, nid, true);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
@@ -2192,8 +2383,39 @@ add_out:
list_add_tail(&nes->set_list, head);
}
+void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+ struct page *page)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
+ struct f2fs_nat_block *nat_blk = page_address(page);
+ int valid = 0;
+ int i;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return;
+
+ for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
+ if (start_nid == 0 && i == 0)
+ valid++;
+ if (nat_blk->entries[i].block_addr)
+ valid++;
+ }
+ if (valid == 0) {
+ set_bit_le(nat_index, nm_i->empty_nat_bits);
+ clear_bit_le(nat_index, nm_i->full_nat_bits);
+ return;
+ }
+
+ clear_bit_le(nat_index, nm_i->empty_nat_bits);
+ if (valid == NAT_ENTRY_PER_BLOCK)
+ set_bit_le(nat_index, nm_i->full_nat_bits);
+ else
+ clear_bit_le(nat_index, nm_i->full_nat_bits);
+}
+
static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
- struct nat_entry_set *set)
+ struct nat_entry_set *set, struct cp_control *cpc)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
@@ -2208,7 +2430,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
+ if (enabled_nat_bits(sbi, cpc) ||
+ !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
if (to_journal) {
@@ -2244,14 +2467,21 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
add_free_nid(sbi, nid, false);
spin_lock(&NM_I(sbi)->nid_list_lock);
NM_I(sbi)->available_nids++;
+ update_free_nid_bitmap(sbi, nid, true);
+ spin_unlock(&NM_I(sbi)->nid_list_lock);
+ } else {
+ spin_lock(&NM_I(sbi)->nid_list_lock);
+ update_free_nid_bitmap(sbi, nid, false);
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
- if (to_journal)
+ if (to_journal) {
up_write(&curseg->journal_rwsem);
- else
+ } else {
+ __update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
+ }
f2fs_bug_on(sbi, set->entry_cnt);
@@ -2262,7 +2492,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
/*
* This function is called during the checkpointing process.
*/
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2283,7 +2513,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+ if (enabled_nat_bits(sbi, cpc) ||
+ !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
@@ -2297,27 +2528,69 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
- __flush_nat_entry_set(sbi, set);
+ __flush_nat_entry_set(sbi, set, cpc);
up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
+static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
+ unsigned int i;
+ __u64 cp_ver = cur_cp_version(ckpt);
+ block_t nat_bits_addr;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return 0;
+
+ nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
+ F2FS_BLKSIZE - 1);
+ nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
+ GFP_KERNEL);
+ if (!nm_i->nat_bits)
+ return -ENOMEM;
+
+ nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+ nm_i->nat_bits_blocks;
+ for (i = 0; i < nm_i->nat_bits_blocks; i++) {
+ struct page *page = get_meta_page(sbi, nat_bits_addr++);
+
+ memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+ page_address(page), F2FS_BLKSIZE);
+ f2fs_put_page(page, 1);
+ }
+
+ cp_ver |= (cur_cp_crc(ckpt) << 32);
+ if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
+ disable_nat_bits(sbi, true);
+ return 0;
+ }
+
+ nm_i->full_nat_bits = nm_i->nat_bits + 8;
+ nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+ f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+ return 0;
+}
+
static int init_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned char *version_bitmap;
- unsigned int nat_segs, nat_blocks;
+ unsigned int nat_segs;
+ int err;
nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
/* segment_count_nat includes pair segment so divide to 2. */
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
- nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-
- nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+ nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2350,6 +2623,34 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
GFP_KERNEL);
if (!nm_i->nat_bitmap)
return -ENOMEM;
+
+ err = __get_nat_bitmaps(sbi);
+ if (err)
+ return err;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
+ GFP_KERNEL);
+ if (!nm_i->nat_bitmap_mir)
+ return -ENOMEM;
+#endif
+
+ return 0;
+}
+
+int init_free_nid_cache(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
+ NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
+ if (!nm_i->free_nid_bitmap)
+ return -ENOMEM;
+
+ nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
+ GFP_KERNEL);
+ if (!nm_i->nat_block_bitmap)
+ return -ENOMEM;
return 0;
}
@@ -2365,7 +2666,11 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
- build_free_nids(sbi, true);
+ err = init_free_nid_cache(sbi);
+ if (err)
+ return err;
+
+ build_free_nids(sbi, true, true);
return 0;
}
@@ -2423,7 +2728,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
}
up_write(&nm_i->nat_tree_lock);
+ kvfree(nm_i->nat_block_bitmap);
+ kvfree(nm_i->free_nid_bitmap);
+
kfree(nm_i->nat_bitmap);
+ kfree(nm_i->nat_bits);
+#ifdef CONFIG_F2FS_CHECK_FS
+ kfree(nm_i->nat_bitmap_mir);
+#endif
sbi->nm_info = NULL;
kfree(nm_i);
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e7997e2..2f9603f 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
spin_unlock(&nm_i->nid_list_lock);
return;
}
- fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
+ fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
struct free_nid, list);
*nid = fnid->nid;
spin_unlock(&nm_i->nid_list_lock);
@@ -186,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
+ nm_i->bitmap_size))
+ f2fs_bug_on(sbi, 1);
+#endif
memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
}
@@ -228,6 +234,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
f2fs_change_bit(block_off, nm_i->nat_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
+#endif
}
static inline nid_t ino_of_node(struct page *node_page)
@@ -291,14 +300,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page);
- size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
- __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
+ __u64 cp_ver = cur_cp_version(ckpt);
+
+ if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
+ cp_ver |= (cur_cp_crc(ckpt) << 32);
- if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
- __u64 crc = le32_to_cpu(*((__le32 *)
- ((unsigned char *)ckpt + crc_offset)));
- cp_ver |= (crc << 32);
- }
rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
@@ -306,14 +312,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
static inline bool is_recoverable_dnode(struct page *page)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
- size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = cur_cp_version(ckpt);
- if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
- __u64 crc = le32_to_cpu(*((__le32 *)
- ((unsigned char *)ckpt + crc_offset)));
- cp_ver |= (crc << 32);
- }
+ if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
+ cp_ver |= (cur_cp_crc(ckpt) << 32);
+
return cp_ver == cpver_of_node(page);
}
@@ -343,7 +346,7 @@ static inline bool IS_DNODE(struct page *node_page)
unsigned int ofs = ofs_of_node(node_page);
if (f2fs_has_xattr_block(ofs))
- return false;
+ return true;
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
ofs == 5 + 2 * NIDS_PER_BLOCK)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 981a958..d025aa8 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -378,11 +378,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (IS_INODE(page)) {
recover_inline_xattr(inode, page);
} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
- /*
- * Deprecated; xattr blocks should be found from cold log.
- * But, we should remain this for backward compatibility.
- */
- recover_xattr_data(inode, page, blkaddr);
+ err = recover_xattr_data(inode, page, blkaddr);
+ if (!err)
+ recovered++;
goto out;
}
@@ -428,8 +426,9 @@ retry_dn:
}
if (!file_keep_isize(inode) &&
- (i_size_read(inode) <= (start << PAGE_SHIFT)))
- f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
+ (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
+ f2fs_i_size_write(inode,
+ (loff_t)(start + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
@@ -552,10 +551,8 @@ next:
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
struct list_head dir_list;
- block_t blkaddr;
int err;
int ret = 0;
bool need_writecp = false;
@@ -571,8 +568,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
- blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
if (err || list_empty(&inode_list))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0d88024..4bd7a8b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,7 +26,7 @@
#define __reverse_ffz(x) __reverse_ffs(~(x))
static struct kmem_cache *discard_entry_slab;
-static struct kmem_cache *bio_entry_slab;
+static struct kmem_cache *discard_cmd_slab;
static struct kmem_cache *sit_entry_set_slab;
static struct kmem_cache *inmem_entry_slab;
@@ -242,11 +242,12 @@ void drop_inmem_pages(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- clear_inode_flag(inode, FI_ATOMIC_FILE);
-
mutex_lock(&fi->inmem_lock);
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
mutex_unlock(&fi->inmem_lock);
+
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ stat_dec_atomic_write(inode);
}
static int __commit_inmem_pages(struct inode *inode,
@@ -262,7 +263,7 @@ static int __commit_inmem_pages(struct inode *inode,
.op_flags = REQ_SYNC | REQ_PRIO,
.encrypted_page = NULL,
};
- bool submit_bio = false;
+ pgoff_t last_idx = ULONG_MAX;
int err = 0;
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
@@ -288,15 +289,15 @@ static int __commit_inmem_pages(struct inode *inode,
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
-
- submit_bio = true;
+ last_idx = page->index;
}
unlock_page(page);
list_move_tail(&cur->list, revoke_list);
}
- if (submit_bio)
- f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
+ if (last_idx != ULONG_MAX)
+ f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
+ DATA, WRITE);
if (!err)
__revoke_inmem_pages(inode, revoke_list, false, false);
@@ -315,6 +316,8 @@ int commit_inmem_pages(struct inode *inode)
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
+ set_inode_flag(inode, FI_ATOMIC_COMMIT);
+
mutex_lock(&fi->inmem_lock);
err = __commit_inmem_pages(inode, &revoke_list);
if (err) {
@@ -336,6 +339,8 @@ int commit_inmem_pages(struct inode *inode)
}
mutex_unlock(&fi->inmem_lock);
+ clear_inode_flag(inode, FI_ATOMIC_COMMIT);
+
f2fs_unlock_op(sbi);
return err;
}
@@ -347,8 +352,10 @@ int commit_inmem_pages(struct inode *inode)
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(sbi, FAULT_CHECKPOINT))
+ if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
+ f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
+ }
#endif
if (!need)
@@ -381,7 +388,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, MAX_FREE_NIDS);
else
- build_free_nids(sbi, false);
+ build_free_nids(sbi, false, false);
if (!is_idle(sbi))
return;
@@ -423,6 +430,9 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
if (sbi->s_ndevs && !ret) {
for (i = 1; i < sbi->s_ndevs; i++) {
+ trace_f2fs_issue_flush(FDEV(i).bdev,
+ test_opt(sbi, NOBARRIER),
+ test_opt(sbi, FLUSH_MERGE));
ret = __submit_flush_wait(FDEV(i).bdev);
if (ret)
break;
@@ -434,7 +444,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
static int issue_flush_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
- struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
wait_queue_head_t *q = &fcc->flush_wait_queue;
repeat:
if (kthread_should_stop())
@@ -463,16 +473,16 @@ repeat:
int f2fs_issue_flush(struct f2fs_sb_info *sbi)
{
- struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
struct flush_cmd cmd;
- trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
- test_opt(sbi, FLUSH_MERGE));
-
if (test_opt(sbi, NOBARRIER))
return 0;
- if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return submit_flush_wait(sbi);
+
+ if (!atomic_read(&fcc->submit_flush)) {
int ret;
atomic_inc(&fcc->submit_flush);
@@ -506,8 +516,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc;
int err = 0;
- if (SM_I(sbi)->cmd_control_info) {
- fcc = SM_I(sbi)->cmd_control_info;
+ if (SM_I(sbi)->fcc_info) {
+ fcc = SM_I(sbi)->fcc_info;
goto init_thread;
}
@@ -517,14 +527,14 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
atomic_set(&fcc->submit_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
- SM_I(sbi)->cmd_control_info = fcc;
+ SM_I(sbi)->fcc_info = fcc;
init_thread:
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
kfree(fcc);
- SM_I(sbi)->cmd_control_info = NULL;
+ SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -533,7 +543,7 @@ init_thread:
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
{
- struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
if (fcc && fcc->f2fs_issue_flush) {
struct task_struct *flush_thread = fcc->f2fs_issue_flush;
@@ -543,7 +553,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
}
if (free) {
kfree(fcc);
- SM_I(sbi)->cmd_control_info = NULL;
+ SM_I(sbi)->fcc_info = NULL;
}
}
@@ -623,60 +633,144 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_unlock(&dirty_i->seglist_lock);
}
-static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
- struct bio *bio)
+static void __add_discard_cmd(struct f2fs_sb_info *sbi,
+ struct bio *bio, block_t lstart, block_t len)
{
- struct list_head *wait_list = &(SM_I(sbi)->wait_list);
- struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct list_head *cmd_list = &(dcc->discard_cmd_list);
+ struct discard_cmd *dc;
- INIT_LIST_HEAD(&be->list);
- be->bio = bio;
- init_completion(&be->event);
- list_add_tail(&be->list, wait_list);
+ dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
+ INIT_LIST_HEAD(&dc->list);
+ dc->bio = bio;
+ bio->bi_private = dc;
+ dc->lstart = lstart;
+ dc->len = len;
+ dc->state = D_PREP;
+ init_completion(&dc->wait);
- return be;
+ mutex_lock(&dcc->cmd_lock);
+ list_add_tail(&dc->list, cmd_list);
+ mutex_unlock(&dcc->cmd_lock);
}
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
+static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
{
- struct list_head *wait_list = &(SM_I(sbi)->wait_list);
- struct bio_entry *be, *tmp;
+ int err = dc->bio->bi_error;
- list_for_each_entry_safe(be, tmp, wait_list, list) {
- struct bio *bio = be->bio;
- int err;
+ if (dc->state == D_DONE)
+ atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
- wait_for_completion_io(&be->event);
- err = be->error;
- if (err == -EOPNOTSUPP)
- err = 0;
+ if (err == -EOPNOTSUPP)
+ err = 0;
- if (err)
- f2fs_msg(sbi->sb, KERN_INFO,
+ if (err)
+ f2fs_msg(sbi->sb, KERN_INFO,
"Issue discard failed, ret: %d", err);
+ bio_put(dc->bio);
+ list_del(&dc->list);
+ kmem_cache_free(discard_cmd_slab, dc);
+}
+
+/* This should be covered by global mutex, &sit_i->sentry_lock */
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct list_head *wait_list = &(dcc->discard_cmd_list);
+ struct discard_cmd *dc, *tmp;
+ struct blk_plug plug;
+
+ mutex_lock(&dcc->cmd_lock);
- bio_put(bio);
- list_del(&be->list);
- kmem_cache_free(bio_entry_slab, be);
+ blk_start_plug(&plug);
+
+ list_for_each_entry_safe(dc, tmp, wait_list, list) {
+
+ if (blkaddr == NULL_ADDR) {
+ if (dc->state == D_PREP) {
+ dc->state = D_SUBMIT;
+ submit_bio(dc->bio);
+ atomic_inc(&dcc->submit_discard);
+ }
+ continue;
+ }
+
+ if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
+ if (dc->state == D_SUBMIT)
+ wait_for_completion_io(&dc->wait);
+ else
+ __remove_discard_cmd(sbi, dc);
+ }
+ }
+ blk_finish_plug(&plug);
+
+ /* this comes from f2fs_put_super */
+ if (blkaddr == NULL_ADDR) {
+ list_for_each_entry_safe(dc, tmp, wait_list, list) {
+ wait_for_completion_io(&dc->wait);
+ __remove_discard_cmd(sbi, dc);
+ }
}
+ mutex_unlock(&dcc->cmd_lock);
+}
+
+static void f2fs_submit_discard_endio(struct bio *bio)
+{
+ struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
+
+ complete(&dc->wait);
+ dc->state = D_DONE;
}
-static void f2fs_submit_bio_wait_endio(struct bio *bio)
+static int issue_discard_thread(void *data)
{
- struct bio_entry *be = (struct bio_entry *)bio->bi_private;
+ struct f2fs_sb_info *sbi = data;
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ wait_queue_head_t *q = &dcc->discard_wait_queue;
+ struct list_head *cmd_list = &dcc->discard_cmd_list;
+ struct discard_cmd *dc, *tmp;
+ struct blk_plug plug;
+ int iter = 0;
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ blk_start_plug(&plug);
+
+ mutex_lock(&dcc->cmd_lock);
+ list_for_each_entry_safe(dc, tmp, cmd_list, list) {
+ if (dc->state == D_PREP) {
+ dc->state = D_SUBMIT;
+ submit_bio(dc->bio);
+ atomic_inc(&dcc->submit_discard);
+ if (iter++ > DISCARD_ISSUE_RATE)
+ break;
+ } else if (dc->state == D_DONE) {
+ __remove_discard_cmd(sbi, dc);
+ }
+ }
+ mutex_unlock(&dcc->cmd_lock);
+
+ blk_finish_plug(&plug);
+
+ iter = 0;
+ congestion_wait(BLK_RW_SYNC, HZ/50);
- be->error = bio->bi_error;
- complete(&be->event);
+ wait_event_interruptible(*q,
+ kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
+ goto repeat;
}
+
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
struct bio *bio = NULL;
+ block_t lblkstart = blkstart;
int err;
- trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+ trace_f2fs_issue_discard(bdev, blkstart, blklen);
if (sbi->s_ndevs) {
int devi = f2fs_target_device_index(sbi, blkstart);
@@ -688,14 +782,12 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
SECTOR_FROM_BLOCK(blklen),
GFP_NOFS, 0, &bio);
if (!err && bio) {
- struct bio_entry *be = __add_bio_entry(sbi, bio);
-
- bio->bi_private = be;
- bio->bi_end_io = f2fs_submit_bio_wait_endio;
+ bio->bi_end_io = f2fs_submit_discard_endio;
bio->bi_opf |= REQ_SYNC;
- submit_bio(bio);
- }
+ __add_discard_cmd(sbi, bio, lblkstart, blklen);
+ wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
+ }
return err;
}
@@ -703,24 +795,13 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
- sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
- sector_t sector;
+ sector_t sector, nr_sects;
int devi = 0;
if (sbi->s_ndevs) {
devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
- sector = SECTOR_FROM_BLOCK(blkstart);
-
- if (sector & (bdev_zone_sectors(bdev) - 1) ||
- nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "(%d) %s: Unaligned discard attempted (block %x + %x)",
- devi, sbi->s_ndevs ? FDEV(devi).path: "",
- blkstart, blklen);
- return -EIO;
- }
/*
* We need to know the type of the zone: for conventional zones,
@@ -735,7 +816,18 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
case BLK_ZONE_TYPE_SEQWRITE_REQ:
case BLK_ZONE_TYPE_SEQWRITE_PREF:
- trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
+ sector = SECTOR_FROM_BLOCK(blkstart);
+ nr_sects = SECTOR_FROM_BLOCK(blklen);
+
+ if (sector & (bdev_zone_sectors(bdev) - 1) ||
+ nr_sects != bdev_zone_sectors(bdev)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "(%d) %s: Unaligned discard attempted (block %x + %x)",
+ devi, sbi->s_ndevs ? FDEV(devi).path: "",
+ blkstart, blklen);
+ return -EIO;
+ }
+ trace_f2fs_issue_reset_zone(bdev, blkstart);
return blkdev_reset_zones(bdev, sector,
nr_sects, GFP_NOFS);
default:
@@ -800,13 +892,14 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
struct cp_control *cpc, struct seg_entry *se,
unsigned int start, unsigned int end)
{
- struct list_head *head = &SM_I(sbi)->discard_list;
+ struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
struct discard_entry *new, *last;
if (!list_empty(head)) {
last = list_last_entry(head, struct discard_entry, list);
if (START_BLOCK(sbi, cpc->trim_start) + start ==
- last->blkaddr + last->len) {
+ last->blkaddr + last->len &&
+ last->len < MAX_DISCARD_BLOCKS(sbi)) {
last->len += end - start;
goto done;
}
@@ -818,10 +911,11 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
new->len = end - start;
list_add_tail(&new->list, head);
done:
- SM_I(sbi)->nr_discards += end - start;
+ SM_I(sbi)->dcc_info->nr_discards += end - start;
}
-static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
+ bool check_only)
{
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
int max_blocks = sbi->blocks_per_seg;
@@ -835,12 +929,13 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int i;
if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
- return;
+ return false;
if (!force) {
if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
- SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
- return;
+ SM_I(sbi)->dcc_info->nr_discards >=
+ SM_I(sbi)->dcc_info->max_discards)
+ return false;
}
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
@@ -848,7 +943,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
- while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+ while (force || SM_I(sbi)->dcc_info->nr_discards <=
+ SM_I(sbi)->dcc_info->max_discards) {
start = __find_rev_next_bit(dmap, max_blocks, end + 1);
if (start >= max_blocks)
break;
@@ -858,13 +954,17 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
&& (end - start) < cpc->trim_minlen)
continue;
+ if (check_only)
+ return true;
+
__add_discard_entry(sbi, cpc, se, start, end);
}
+ return false;
}
void release_discard_addrs(struct f2fs_sb_info *sbi)
{
- struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
struct discard_entry *entry, *this;
/* drop caches */
@@ -890,17 +990,14 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
- struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- struct blk_plug plug;
unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason == CP_DISCARD);
- blk_start_plug(&plug);
-
mutex_lock(&dirty_i->seglist_lock);
while (1) {
@@ -916,9 +1013,13 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
dirty_i->nr_dirty[PRE] -= end - start;
- if (force || !test_opt(sbi, DISCARD))
+ if (!test_opt(sbi, DISCARD))
continue;
+ if (force && start >= cpc->trim_start &&
+ (end - 1) <= cpc->trim_end)
+ continue;
+
if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
@@ -935,6 +1036,8 @@ next:
start = start_segno + sbi->segs_per_sec;
if (start < end)
goto next;
+ else
+ end = start - 1;
}
mutex_unlock(&dirty_i->seglist_lock);
@@ -946,11 +1049,62 @@ next:
cpc->trimmed += entry->len;
skip:
list_del(&entry->list);
- SM_I(sbi)->nr_discards -= entry->len;
+ SM_I(sbi)->dcc_info->nr_discards -= entry->len;
kmem_cache_free(discard_entry_slab, entry);
}
+}
- blk_finish_plug(&plug);
+static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct discard_cmd_control *dcc;
+ int err = 0;
+
+ if (SM_I(sbi)->dcc_info) {
+ dcc = SM_I(sbi)->dcc_info;
+ goto init_thread;
+ }
+
+ dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
+ if (!dcc)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&dcc->discard_entry_list);
+ INIT_LIST_HEAD(&dcc->discard_cmd_list);
+ mutex_init(&dcc->cmd_lock);
+ atomic_set(&dcc->submit_discard, 0);
+ dcc->nr_discards = 0;
+ dcc->max_discards = 0;
+
+ init_waitqueue_head(&dcc->discard_wait_queue);
+ SM_I(sbi)->dcc_info = dcc;
+init_thread:
+ dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+ "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(dcc->f2fs_issue_discard)) {
+ err = PTR_ERR(dcc->f2fs_issue_discard);
+ kfree(dcc);
+ SM_I(sbi)->dcc_info = NULL;
+ return err;
+ }
+
+ return err;
+}
+
+static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+ if (dcc && dcc->f2fs_issue_discard) {
+ struct task_struct *discard_thread = dcc->f2fs_issue_discard;
+
+ dcc->f2fs_issue_discard = NULL;
+ kthread_stop(discard_thread);
+ }
+ if (free) {
+ kfree(dcc);
+ SM_I(sbi)->dcc_info = NULL;
+ }
}
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -995,14 +1149,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
/* Update valid block bitmap */
if (del > 0) {
- if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
+ if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (f2fs_test_and_set_bit(offset,
+ se->cur_valid_map_mir))
+ f2fs_bug_on(sbi, 1);
+ else
+ WARN_ON(1);
+#else
f2fs_bug_on(sbi, 1);
+#endif
+ }
if (f2fs_discard_en(sbi) &&
!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
} else {
- if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
+ if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (!f2fs_test_and_clear_bit(offset,
+ se->cur_valid_map_mir))
+ f2fs_bug_on(sbi, 1);
+ else
+ WARN_ON(1);
+#else
f2fs_bug_on(sbi, 1);
+#endif
+ }
if (f2fs_discard_en(sbi) &&
f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
@@ -1167,17 +1339,6 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
-{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
- unsigned int segno = curseg->segno + 1;
- struct free_segmap_info *free_i = FREE_I(sbi);
-
- if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
- return !test_bit(segno, free_i->free_segmap);
- return 0;
-}
-
/*
* Find a new segment from the free segments bitmap to right order
* This function should be returned with success, otherwise BUG
@@ -1382,16 +1543,39 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
+ int i, cnt;
+ bool reversed = false;
+
+ /* need_SSR() already forces to do this */
+ if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
+ return 1;
- if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0))
- return v_ops->get_victim(sbi,
- &(curseg)->next_segno, BG_GC, type, SSR);
+ /* For node segments, let's do SSR more intensively */
+ if (IS_NODESEG(type)) {
+ if (type >= CURSEG_WARM_NODE) {
+ reversed = true;
+ i = CURSEG_COLD_NODE;
+ } else {
+ i = CURSEG_HOT_NODE;
+ }
+ cnt = NR_CURSEG_NODE_TYPE;
+ } else {
+ if (type >= CURSEG_WARM_DATA) {
+ reversed = true;
+ i = CURSEG_COLD_DATA;
+ } else {
+ i = CURSEG_HOT_DATA;
+ }
+ cnt = NR_CURSEG_DATA_TYPE;
+ }
- /* For data segments, let's do SSR more intensively */
- for (; type >= CURSEG_HOT_DATA; type--)
+ for (; cnt-- > 0; reversed ? i-- : i++) {
+ if (i == type)
+ continue;
if (v_ops->get_victim(sbi, &(curseg)->next_segno,
- BG_GC, type, SSR))
+ BG_GC, i, SSR))
return 1;
+ }
return 0;
}
@@ -1402,20 +1586,17 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
int type, bool force)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
-
if (force)
new_curseg(sbi, type, true);
- else if (type == CURSEG_WARM_NODE)
- new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+ else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
+ type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
- stat_inc_seg_type(sbi, curseg);
+ stat_inc_seg_type(sbi, CURSEG_I(sbi, type));
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -1424,9 +1605,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
unsigned int old_segno;
int i;
- if (test_opt(sbi, LFS))
- return;
-
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
curseg = CURSEG_I(sbi, i);
old_segno = curseg->segno;
@@ -1439,6 +1617,24 @@ static const struct segment_allocation default_salloc_ops = {
.allocate_segment = allocate_segment_by_default,
};
+bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ __u64 trim_start = cpc->trim_start;
+ bool has_candidate = false;
+
+ mutex_lock(&SIT_I(sbi)->sentry_lock);
+ for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
+ if (add_discard_addrs(sbi, cpc, true)) {
+ has_candidate = true;
+ break;
+ }
+ }
+ mutex_unlock(&SIT_I(sbi)->sentry_lock);
+
+ cpc->trim_start = trim_start;
+ return has_candidate;
+}
+
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
__u64 start = F2FS_BYTES_TO_BLK(range->start);
@@ -1573,6 +1769,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ f2fs_wait_discard_bio(sbi, *new_blkaddr);
+
/*
* __add_sum_entry should be resided under the curseg_mutex
* because, this function updates a summary entry in the
@@ -1584,14 +1782,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
+ if (!__has_curseg_space(sbi, type))
+ sit_i->s_ops->allocate_segment(sbi, type, false);
+
mutex_unlock(&sit_i->sentry_lock);
if (page && IS_NODESEG(type))
@@ -1603,15 +1802,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio->page, fio->type);
+ int err;
if (fio->type == NODE || fio->type == DATA)
mutex_lock(&fio->sbi->wio_mutex[fio->type]);
-
+reallocate:
allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type);
/* writeout dirty page into bdev */
- f2fs_submit_page_mbio(fio);
+ err = f2fs_submit_page_mbio(fio);
+ if (err == -EAGAIN) {
+ fio->old_blkaddr = fio->new_blkaddr;
+ goto reallocate;
+ }
if (fio->type == NODE || fio->type == DATA)
mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
@@ -1753,7 +1957,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
+ f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
+ 0, page->index, type, WRITE);
if (ordered)
wait_on_page_writeback(page);
else
@@ -2228,7 +2433,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* add discard candidates */
if (cpc->reason != CP_DISCARD) {
cpc->trim_start = segno;
- add_discard_addrs(sbi, cpc);
+ add_discard_addrs(sbi, cpc, false);
}
if (to_journal) {
@@ -2263,8 +2468,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, sit_i->dirty_sentries);
out:
if (cpc->reason == CP_DISCARD) {
+ __u64 trim_start = cpc->trim_start;
+
for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
- add_discard_addrs(sbi, cpc);
+ add_discard_addrs(sbi, cpc, false);
+
+ cpc->trim_start = trim_start;
}
mutex_unlock(&sit_i->sentry_lock);
@@ -2276,7 +2485,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
- char *src_bitmap, *dst_bitmap;
+ char *src_bitmap;
unsigned int bitmap_size;
/* allocate memory for SIT information */
@@ -2305,6 +2514,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
!sit_i->sentries[start].ckpt_valid_map)
return -ENOMEM;
+#ifdef CONFIG_F2FS_CHECK_FS
+ sit_i->sentries[start].cur_valid_map_mir
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+ if (!sit_i->sentries[start].cur_valid_map_mir)
+ return -ENOMEM;
+#endif
+
if (f2fs_discard_en(sbi)) {
sit_i->sentries[start].discard_map
= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -2331,17 +2547,22 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
- dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
- if (!dst_bitmap)
+ sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ if (!sit_i->sit_bitmap)
return -ENOMEM;
+#ifdef CONFIG_F2FS_CHECK_FS
+ sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ if (!sit_i->sit_bitmap_mir)
+ return -ENOMEM;
+#endif
+
/* init SIT information */
sit_i->s_ops = &default_salloc_ops;
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = 0;
- sit_i->sit_bitmap = dst_bitmap;
sit_i->bitmap_size = bitmap_size;
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
@@ -2626,11 +2847,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
- INIT_LIST_HEAD(&sm_info->discard_list);
- INIT_LIST_HEAD(&sm_info->wait_list);
- sm_info->nr_discards = 0;
- sm_info->max_discards = 0;
-
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
INIT_LIST_HEAD(&sm_info->sit_entry_set);
@@ -2641,6 +2857,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
return err;
}
+ err = create_discard_cmd_control(sbi);
+ if (err)
+ return err;
+
err = build_sit_info(sbi);
if (err)
return err;
@@ -2734,6 +2954,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries) {
for (start = 0; start < MAIN_SEGS(sbi); start++) {
kfree(sit_i->sentries[start].cur_valid_map);
+#ifdef CONFIG_F2FS_CHECK_FS
+ kfree(sit_i->sentries[start].cur_valid_map_mir);
+#endif
kfree(sit_i->sentries[start].ckpt_valid_map);
kfree(sit_i->sentries[start].discard_map);
}
@@ -2746,6 +2969,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
SM_I(sbi)->sit_info = NULL;
kfree(sit_i->sit_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ kfree(sit_i->sit_bitmap_mir);
+#endif
kfree(sit_i);
}
@@ -2756,6 +2982,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
if (!sm_info)
return;
destroy_flush_cmd_control(sbi, true);
+ destroy_discard_cmd_control(sbi, true);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
@@ -2771,15 +2998,15 @@ int __init create_segment_manager_caches(void)
if (!discard_entry_slab)
goto fail;
- bio_entry_slab = f2fs_kmem_cache_create("bio_entry",
- sizeof(struct bio_entry));
- if (!bio_entry_slab)
+ discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
+ sizeof(struct discard_cmd));
+ if (!discard_cmd_slab)
goto destroy_discard_entry;
sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
sizeof(struct sit_entry_set));
if (!sit_entry_set_slab)
- goto destroy_bio_entry;
+ goto destroy_discard_cmd;
inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
sizeof(struct inmem_pages));
@@ -2789,8 +3016,8 @@ int __init create_segment_manager_caches(void)
destroy_sit_entry_set:
kmem_cache_destroy(sit_entry_set_slab);
-destroy_bio_entry:
- kmem_cache_destroy(bio_entry_slab);
+destroy_discard_cmd:
+ kmem_cache_destroy(discard_cmd_slab);
destroy_discard_entry:
kmem_cache_destroy(discard_entry_slab);
fail:
@@ -2800,7 +3027,7 @@ fail:
void destroy_segment_manager_caches(void)
{
kmem_cache_destroy(sit_entry_set_slab);
- kmem_cache_destroy(bio_entry_slab);
+ kmem_cache_destroy(discard_cmd_slab);
kmem_cache_destroy(discard_entry_slab);
kmem_cache_destroy(inmem_entry_slab);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 9d44ce8..5e8ad42 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -164,6 +164,9 @@ struct seg_entry {
unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
unsigned int padding:6; /* padding */
unsigned char *cur_valid_map; /* validity bitmap of blocks */
+#ifdef CONFIG_F2FS_CHECK_FS
+ unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
+#endif
/*
* # of valid blocks and the validity bitmap stored in the the last
* checkpoint pack. This information is used by the SSR mode.
@@ -186,9 +189,12 @@ struct segment_allocation {
* the page is atomically written, and it is in inmem_pages list.
*/
#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
+#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
+#define IS_DUMMY_WRITTEN_PAGE(page) \
+ (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
struct inmem_pages {
struct list_head list;
@@ -203,6 +209,9 @@ struct sit_info {
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
char *sit_bitmap; /* SIT bitmap pointer */
+#ifdef CONFIG_F2FS_CHECK_FS
+ char *sit_bitmap_mir; /* SIT bitmap mirror */
+#endif
unsigned int bitmap_size; /* SIT bitmap size */
unsigned long *tmp_map; /* bitmap for temporal use */
@@ -317,6 +326,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#ifdef CONFIG_F2FS_CHECK_FS
+ memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#endif
se->type = GET_SIT_TYPE(rs);
se->mtime = le64_to_cpu(rs->mtime);
}
@@ -414,6 +426,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
void *dst_addr)
{
struct sit_info *sit_i = SIT_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
+ sit_i->bitmap_size))
+ f2fs_bug_on(sbi, 1);
+#endif
memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
}
@@ -634,6 +652,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
check_seg_range(sbi, start);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
+ f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
+ f2fs_bug_on(sbi, 1);
+#endif
+
/* calculate sit block address */
if (f2fs_test_bit(offset, sit_i->sit_bitmap))
blk_addr += sit_i->sit_blocks;
@@ -659,6 +683,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
unsigned int block_off = SIT_BLOCK_OFFSET(start);
f2fs_change_bit(block_off, sit_i->sit_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
+#endif
}
static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
@@ -689,6 +716,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
- (base + 1) + type;
}
+static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
+ unsigned int secno)
+{
+ if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
+ sbi->fggc_threshold)
+ return true;
+ return false;
+}
+
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
@@ -700,8 +736,8 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
* By default, 512 pages for directory data,
- * 512 pages (2MB) * 3 for three types of nodes, and
- * max_bio_blocks for meta are set.
+ * 512 pages (2MB) * 8 for nodes, and
+ * 256 pages * 8 for meta are set.
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a831303..96fe8ed 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -89,6 +89,7 @@ enum {
Opt_active_logs,
Opt_disable_ext_identify,
Opt_inline_xattr,
+ Opt_noinline_xattr,
Opt_inline_data,
Opt_inline_dentry,
Opt_noinline_dentry,
@@ -101,6 +102,7 @@ enum {
Opt_noinline_data,
Opt_data_flush,
Opt_mode,
+ Opt_io_size_bits,
Opt_fault_injection,
Opt_lazytime,
Opt_nolazytime,
@@ -121,6 +123,7 @@ static match_table_t f2fs_tokens = {
{Opt_active_logs, "active_logs=%u"},
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
+ {Opt_noinline_xattr, "noinline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_inline_dentry, "inline_dentry"},
{Opt_noinline_dentry, "noinline_dentry"},
@@ -133,6 +136,7 @@ static match_table_t f2fs_tokens = {
{Opt_noinline_data, "noinline_data"},
{Opt_data_flush, "data_flush"},
{Opt_mode, "mode=%s"},
+ {Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
@@ -143,6 +147,7 @@ static match_table_t f2fs_tokens = {
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
+ DCC_INFO, /* struct discard_cmd_control */
NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -166,6 +171,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
+ else if (struct_type == DCC_INFO)
+ return (unsigned char *)SM_I(sbi)->dcc_info;
else if (struct_type == NM_INFO)
return (unsigned char *)NM_I(sbi);
else if (struct_type == F2FS_SBI)
@@ -281,7 +288,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
@@ -439,6 +446,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_inline_xattr:
set_opt(sbi, INLINE_XATTR);
break;
+ case Opt_noinline_xattr:
+ clear_opt(sbi, INLINE_XATTR);
+ break;
#else
case Opt_user_xattr:
f2fs_msg(sb, KERN_INFO,
@@ -452,6 +462,10 @@ static int parse_options(struct super_block *sb, char *options)
f2fs_msg(sb, KERN_INFO,
"inline_xattr options not supported");
break;
+ case Opt_noinline_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "noinline_xattr options not supported");
+ break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
case Opt_acl:
@@ -535,11 +549,23 @@ static int parse_options(struct super_block *sb, char *options)
}
kfree(name);
break;
+ case Opt_io_size_bits:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
+ f2fs_msg(sb, KERN_WARNING,
+ "Not support %d, larger than %d",
+ 1 << arg, BIO_MAX_PAGES);
+ return -EINVAL;
+ }
+ sbi->write_io_size_bits = arg;
+ break;
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, arg);
+ set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
"FAULT_INJECTION was not selected");
@@ -558,6 +584,13 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
}
}
+
+ if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Should set mode=lfs with %uKB-sized IO",
+ F2FS_IO_SIZE_KB(sbi));
+ return -EINVAL;
+ }
return 0;
}
@@ -591,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
static int f2fs_drop_inode(struct inode *inode)
{
+ int ret;
/*
* This is to avoid a deadlock condition like below.
* writeback_single_inode(inode)
@@ -623,10 +657,12 @@ static int f2fs_drop_inode(struct inode *inode)
spin_lock(&inode->i_lock);
atomic_dec(&inode->i_count);
}
+ trace_f2fs_drop_inode(inode, 0);
return 0;
}
-
- return generic_drop_inode(inode);
+ ret = generic_drop_inode(inode);
+ trace_f2fs_drop_inode(inode, ret);
+ return ret;
}
int f2fs_inode_dirtied(struct inode *inode, bool sync)
@@ -750,6 +786,9 @@ static void f2fs_put_super(struct super_block *sb)
write_checkpoint(sbi, &cpc);
}
+ /* be sure to wait for any on-going discard commands */
+ f2fs_wait_discard_bio(sbi, NULL_ADDR);
+
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
@@ -782,7 +821,7 @@ static void f2fs_put_super(struct super_block *sb)
kfree(sbi->raw_super);
destroy_device_list(sbi);
-
+ mempool_destroy(sbi->write_io_dummy);
destroy_percpu_info(sbi);
kfree(sbi);
}
@@ -882,6 +921,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",nouser_xattr");
if (test_opt(sbi, INLINE_XATTR))
seq_puts(seq, ",inline_xattr");
+ else
+ seq_puts(seq, ",noinline_xattr");
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
@@ -918,6 +959,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (test_opt(sbi, LFS))
seq_puts(seq, "lfs");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+ if (F2FS_IO_SIZE_BITS(sbi))
+ seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (test_opt(sbi, FAULT_INJECTION))
+ seq_puts(seq, ",fault_injection");
+#endif
return 0;
}
@@ -995,6 +1042,7 @@ static void default_options(struct f2fs_sb_info *sbi)
sbi->active_logs = NR_CURSEG_TYPE;
set_opt(sbi, BG_GC);
+ set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
set_opt(sbi, EXTENT_CACHE);
@@ -1686,36 +1734,55 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ unsigned int max_devices = MAX_DEVICES;
int i;
- for (i = 0; i < MAX_DEVICES; i++) {
- if (!RDEV(i).path[0])
+ /* Initialize single device information */
+ if (!RDEV(0).path[0]) {
+ if (!bdev_is_zoned(sbi->sb->s_bdev))
return 0;
+ max_devices = 1;
+ }
- if (i == 0) {
- sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
- MAX_DEVICES, GFP_KERNEL);
- if (!sbi->devs)
- return -ENOMEM;
- }
+ /*
+ * Initialize multiple devices information, or single
+ * zoned block device information.
+ */
+ sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
+ GFP_KERNEL);
+ if (!sbi->devs)
+ return -ENOMEM;
- memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
- FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
- if (i == 0) {
- FDEV(i).start_blk = 0;
- FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1 +
- le32_to_cpu(raw_super->segment0_blkaddr);
- } else {
- FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
- FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1;
- }
+ for (i = 0; i < max_devices; i++) {
- FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
+ if (i > 0 && !RDEV(i).path[0])
+ break;
+
+ if (max_devices == 1) {
+ /* Single zoned block device mount */
+ FDEV(0).bdev =
+ blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
+ sbi->sb->s_mode, sbi->sb->s_type);
+ } else {
+ /* Multi-device mount */
+ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
+ FDEV(i).total_segments =
+ le32_to_cpu(RDEV(i).total_segments);
+ if (i == 0) {
+ FDEV(i).start_blk = 0;
+ FDEV(i).end_blk = FDEV(i).start_blk +
+ (FDEV(i).total_segments <<
+ sbi->log_blocks_per_seg) - 1 +
+ le32_to_cpu(raw_super->segment0_blkaddr);
+ } else {
+ FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
+ FDEV(i).end_blk = FDEV(i).start_blk +
+ (FDEV(i).total_segments <<
+ sbi->log_blocks_per_seg) - 1;
+ }
+ FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
sbi->sb->s_mode, sbi->sb->s_type);
+ }
if (IS_ERR(FDEV(i).bdev))
return PTR_ERR(FDEV(i).bdev);
@@ -1735,6 +1802,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
"Failed to initialize F2FS blkzone information");
return -EINVAL;
}
+ if (max_devices == 1)
+ break;
f2fs_msg(sbi->sb, KERN_INFO,
"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
i, FDEV(i).path,
@@ -1751,6 +1820,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk);
}
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -1868,12 +1939,19 @@ try_onemore:
if (err)
goto free_options;
+ if (F2FS_IO_SIZE(sbi) > 1) {
+ sbi->write_io_dummy =
+ mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
+ if (!sbi->write_io_dummy)
+ goto free_options;
+ }
+
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
- goto free_options;
+ goto free_io_dummy;
}
err = get_valid_checkpoint(sbi);
@@ -2048,6 +2126,8 @@ skip_recovery:
sbi->valid_super_block ? 1 : 2, err);
}
+ f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
+ cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
return 0;
@@ -2091,6 +2171,8 @@ free_devices:
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
+free_io_dummy:
+ mempool_destroy(sbi->write_io_dummy);
free_options:
destroy_percpu_info(sbi);
kfree(options);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c47ce2f..7298a44 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,6 +217,112 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
+static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
+ void **last_addr, int index,
+ size_t len, const char *name)
+{
+ struct f2fs_xattr_entry *entry;
+ unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
+
+ list_for_each_xattr(entry, base_addr) {
+ if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
+ (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
+ base_addr + inline_size) {
+ *last_addr = entry;
+ return NULL;
+ }
+ if (entry->e_name_index != index)
+ continue;
+ if (entry->e_name_len != len)
+ continue;
+ if (!memcmp(entry->e_name, name, len))
+ break;
+ }
+ return entry;
+}
+
+static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
+ unsigned int index, unsigned int len,
+ const char *name, struct f2fs_xattr_entry **xe,
+ void **base_addr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ void *cur_addr, *txattr_addr, *last_addr = NULL;
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+ unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
+ unsigned int inline_size = 0;
+ int err = 0;
+
+ inline_size = inline_xattr_size(inode);
+
+ if (!size && !inline_size)
+ return -ENODATA;
+
+ txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
+ GFP_F2FS_ZERO);
+ if (!txattr_addr)
+ return -ENOMEM;
+
+ /* read from inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+ inline_addr = inline_xattr_addr(page);
+ }
+ memcpy(txattr_addr, inline_addr, inline_size);
+ f2fs_put_page(page, 1);
+
+ *xe = __find_inline_xattr(txattr_addr, &last_addr,
+ index, len, name);
+ if (*xe)
+ goto check;
+ }
+
+ /* read from xattr node block */
+ if (xnid) {
+ struct page *xpage;
+ void *xattr_addr;
+
+ /* The inode already has an extended attribute block. */
+ xpage = get_node_page(sbi, xnid);
+ if (IS_ERR(xpage)) {
+ err = PTR_ERR(xpage);
+ goto out;
+ }
+
+ xattr_addr = page_address(xpage);
+ memcpy(txattr_addr + inline_size, xattr_addr, size);
+ f2fs_put_page(xpage, 1);
+ }
+
+ if (last_addr)
+ cur_addr = XATTR_HDR(last_addr) - 1;
+ else
+ cur_addr = txattr_addr;
+
+ *xe = __find_xattr(cur_addr, index, len, name);
+check:
+ if (IS_XATTR_LAST_ENTRY(*xe)) {
+ err = -ENODATA;
+ goto out;
+ }
+
+ *base_addr = txattr_addr;
+ return 0;
+out:
+ kzfree(txattr_addr);
+ return err;
+}
+
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
@@ -348,23 +454,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
xattr_addr = page_address(xpage);
- memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
- sizeof(struct node_footer));
+ memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
- /* need to checkpoint during fsync */
- F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
return 0;
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size, struct page *ipage)
{
- struct f2fs_xattr_entry *entry;
- void *base_addr;
+ struct f2fs_xattr_entry *entry = NULL;
int error = 0;
- size_t size, len;
+ unsigned int size, len;
+ void *base_addr = NULL;
if (name == NULL)
return -EINVAL;
@@ -373,21 +476,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
- error = read_all_xattrs(inode, ipage, &base_addr);
+ error = lookup_all_xattrs(inode, ipage, index, len, name,
+ &entry, &base_addr);
if (error)
return error;
- entry = __find_xattr(base_addr, index, len, name);
- if (IS_XATTR_LAST_ENTRY(entry)) {
- error = -ENODATA;
- goto cleanup;
- }
-
size = le16_to_cpu(entry->e_value_size);
if (buffer && size > buffer_size) {
error = -ERANGE;
- goto cleanup;
+ goto out;
}
if (buffer) {
@@ -395,8 +493,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
memcpy(buffer, pval, size);
}
error = size;
-
-cleanup:
+out:
kzfree(base_addr);
return error;
}
@@ -445,6 +542,13 @@ cleanup:
return error;
}
+static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
+ const void *value, size_t size)
+{
+ void *pval = entry->e_name + entry->e_name_len;
+ return (entry->e_value_size == size) && !memcmp(pval, value, size);
+}
+
static int __f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *ipage, int flags)
@@ -479,12 +583,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
- if ((flags & XATTR_REPLACE) && !found) {
+ if (found) {
+ if ((flags & XATTR_CREATE)) {
+ error = -EEXIST;
+ goto exit;
+ }
+
+ if (f2fs_xattr_value_same(here, value, size))
+ goto exit;
+ } else if ((flags & XATTR_REPLACE)) {
error = -ENODATA;
goto exit;
- } else if ((flags & XATTR_CREATE) && found) {
- error = -EEXIST;
- goto exit;
}
last = here;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index f990de2..d5a9492 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
for (entry = XATTR_FIRST_ENTRY(addr);\
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
-
-#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
- sizeof(struct node_footer) - sizeof(__u32))
+#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
+#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
+#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
+ VALID_XATTR_BLOCK_SIZE)
#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
sizeof(struct f2fs_xattr_header) - \
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e6b764a..051dac1 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -364,8 +364,8 @@ extern const struct file_operations fat_file_operations;
extern const struct inode_operations fat_file_inode_operations;
extern int fat_setattr(struct dentry *dentry, struct iattr *attr);
extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
-extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
+extern int fat_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3d04b12..4724cc9 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -365,9 +365,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
fat_flush_inodes(inode->i_sb, inode, NULL);
}
-int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int fat_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
generic_fillattr(inode, stat);
stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e1c54f2..be8fbe2 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -7,6 +7,7 @@
#include <linux/syscalls.h>
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched/task.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/fdtable.h>
diff --git a/fs/file.c b/fs/file.c
index 69d6990..ad6f094 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,7 +12,7 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/file.h>
diff --git a/fs/file_table.c b/fs/file_table.c
index 6d982b5..954d510 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/security.h>
+#include <linux/cred.h>
#include <linux/eventpoll.h>
#include <linux/rcupdate.h>
#include <linux/mount.h>
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 7dca743..be02507 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -1,5 +1,6 @@
#include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
#include <linux/fs.h>
#include <linux/path.h>
#include <linux/slab.h>
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 5d5ddaa..67f9408 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -329,7 +329,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
config = 0;
rcu_read_lock();
- confkey = user_key_payload(key);
+ confkey = user_key_payload_rcu(key);
buf = confkey->data;
for (len = confkey->datalen - 1; len >= 0; len--) {
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f117926..b681b43 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/poll.h>
+#include <linux/sched/signal.h>
#include <linux/uio.h>
#include <linux/miscdevice.h>
#include <linux/pagemap.h>
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 811fd89..00800c0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -473,7 +473,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (err) {
fuse_sync_release(ff, flags);
} else {
- file->private_data = fuse_file_get(ff);
+ file->private_data = ff;
fuse_finish_open(inode, file);
}
return err;
@@ -1777,10 +1777,10 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
return ret;
}
-static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
- struct kstat *stat)
+static int fuse_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct inode *inode = d_inode(entry);
+ struct inode *inode = d_inode(path->dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
if (!fuse_allow_current_process(fc))
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e80bfd0..ec238fb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -58,7 +58,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
}
INIT_LIST_HEAD(&ff->write_entry);
- atomic_set(&ff->count, 0);
+ atomic_set(&ff->count, 1);
RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait);
@@ -75,7 +75,7 @@ void fuse_file_free(struct fuse_file *ff)
kfree(ff);
}
-struct fuse_file *fuse_file_get(struct fuse_file *ff)
+static struct fuse_file *fuse_file_get(struct fuse_file *ff)
{
atomic_inc(&ff->count);
return ff;
@@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else if (sync) {
+ __set_bit(FR_FORCE, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
fuse_request_send(ff->fc, req);
iput(req->misc.release.inode);
@@ -146,7 +147,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
ff->open_flags &= ~FOPEN_DIRECT_IO;
ff->nodeid = nodeid;
- file->private_data = fuse_file_get(ff);
+ file->private_data = ff;
return 0;
}
@@ -245,14 +246,9 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
void fuse_release_common(struct file *file, int opcode)
{
- struct fuse_file *ff;
- struct fuse_req *req;
-
- ff = file->private_data;
- if (unlikely(!ff))
- return;
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req = ff->reserved_req;
- req = ff->reserved_req;
fuse_prepare_release(ff, file->f_flags, opcode);
if (ff->flock) {
@@ -297,13 +293,13 @@ static int fuse_release(struct inode *inode, struct file *file)
void fuse_sync_release(struct fuse_file *ff, int flags)
{
- WARN_ON(atomic_read(&ff->count) > 1);
+ WARN_ON(atomic_read(&ff->count) != 1);
fuse_prepare_release(ff, flags, FUSE_RELEASE);
- __set_bit(FR_FORCE, &ff->reserved_req->flags);
- __clear_bit(FR_BACKGROUND, &ff->reserved_req->flags);
- fuse_request_send(ff->fc, ff->reserved_req);
- fuse_put_request(ff->fc, ff->reserved_req);
- kfree(ff);
+ /*
+ * iput(NULL) is a no-op and since the refcount is 1 and everything's
+ * synchronous, we are fine with not doing igrab() here"
+ */
+ fuse_file_put(ff, true);
}
EXPORT_SYMBOL_GPL(fuse_sync_release);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 052f8d3..32ac2c9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -732,7 +732,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
-struct fuse_file *fuse_file_get(struct fuse_file *ff);
void fuse_file_free(struct fuse_file *ff);
void fuse_finish_open(struct inode *inode, struct file *file);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index eb7724b..e279c3c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -13,6 +13,7 @@
#include <linux/buffer_head.h>
#include <linux/namei.h>
#include <linux/mm.h>
+#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/gfs2_ondisk.h>
@@ -1959,9 +1960,10 @@ out:
/**
* gfs2_getattr - Read out an inode's attributes
- * @mnt: The vfsmount the inode is being accessed from
- * @dentry: The dentry to stat
+ * @path: Object to query
* @stat: The inode's stats
+ * @request_mask: Mask of STATX_xxx flags indicating the caller's interests
+ * @flags: AT_STATX_xxx setting
*
* This may be called from the VFS directly, or from within GFS2 with the
* inode locked, so we look to see if the glock is already locked and only
@@ -1972,10 +1974,10 @@ out:
* Returns: errno
*/
-static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int gfs2_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int error;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8b907c5..0515f0a 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/sched/signal.h>
#include "incore.h"
#include "glock.h"
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e3ee387..361796a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -10,7 +10,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/bio.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index f8d30e4..7a51534 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 5de5c48..75b2542 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -169,7 +169,7 @@ static int hfs_readdir(struct file *file, struct dir_context *ctx)
* Can be done after the list insertion; exclusion with
* hfs_delete_cat() is provided by directory lock.
*/
- memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key));
+ memcpy(&rd->key, &fd.key->cat, sizeof(struct hfs_cat_key));
out:
hfs_find_exit(&fd);
return err;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index f776acf..bfbba79 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/uio.h>
#include <linux/xattr.h>
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 2e796f8..e8638d5 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/uio.h>
#include "hfsplus_fs.h"
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index aebb78f..d352f3a 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -18,7 +18,7 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/blkdev.h>
#include <asm/unaligned.h>
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 54de77e..8f96461 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -11,7 +11,7 @@
#include <linux/thread_info.h>
#include <asm/current.h>
-#include <linux/sched.h> /* remove ASAP */
+#include <linux/sched/signal.h> /* remove ASAP */
#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/mount.h>
diff --git a/fs/ioctl.c b/fs/ioctl.c
index cb9b029..569db68 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,8 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include <linux/sched/signal.h>
+
#include "internal.h"
#include <asm/ioctls.h>
diff --git a/fs/iomap.c b/fs/iomap.c
index 0f85f24..3ca1a8e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -26,6 +26,8 @@
#include <linux/buffer_head.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/dax.h>
+#include <linux/sched/signal.h>
+
#include "internal.h"
/*
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 871c8b3..020ba09 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/cred.h>
#include <linux/nls.h>
#include <linux/ctype.h>
#include <linux/statfs.h>
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index e5c1783..453a6a1 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -16,7 +16,7 @@
#include <linux/jffs2.h>
#include <linux/mtd/mtd.h>
#include <linux/completion.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include "nodelist.h"
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 567653f..76fa814 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -15,6 +15,7 @@
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/fs.h>
#include <linux/list.h>
#include <linux/mtd/mtd.h>
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index cda0774..a7bbe87 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -14,7 +14,7 @@
#include <linux/kernel.h>
#include <linux/mtd/mtd.h>
#include <linux/compiler.h>
-#include <linux/sched.h> /* For cond_resched() */
+#include <linux/sched/signal.h>
#include "nodelist.h"
#include "debug.h"
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 35043a8..8e4dc7a 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -13,7 +13,7 @@
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/pagemap.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/fsnotify.h>
#include "kernfs-internal.h"
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index ac9e108..fb4b4a7 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -200,11 +200,11 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
set_nlink(inode, kn->dir.subdirs + 2);
}
-int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct kernfs_node *kn = dentry->d_fsdata;
- struct inode *inode = d_inode(dentry);
+ struct kernfs_node *kn = path->dentry->d_fsdata;
+ struct inode *inode = d_inode(path->dentry);
mutex_lock(&kernfs_mutex);
kernfs_refresh_inode(kn, inode);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3100987..2d5144a 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -80,8 +80,8 @@ extern const struct xattr_handler *kernfs_xattr_handlers[];
void kernfs_evict_inode(struct inode *inode);
int kernfs_iop_permission(struct inode *inode, int mask);
int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
-int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
+int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags);
ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
/*
diff --git a/fs/libfs.c b/fs/libfs.c
index 28d6f35..a8b62e5 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -7,6 +7,7 @@
#include <linux/export.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
+#include <linux/cred.h>
#include <linux/mount.h>
#include <linux/vfs.h>
#include <linux/quotaops.h>
@@ -20,10 +21,10 @@
#include "internal.h"
-int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int simple_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
generic_fillattr(inode, stat);
stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
return 0;
@@ -1143,10 +1144,10 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENOENT);
}
-static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int empty_dir_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
generic_fillattr(inode, stat);
return 0;
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1c13dd8..e7c8b9c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -17,7 +17,7 @@
#include <linux/sysctl.h>
#include <linux/moduleparam.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/uio.h>
@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin6);
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e7d9bf8..6ac76b0 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -622,11 +622,14 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
return err;
}
-int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int minix_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct super_block *sb = dentry->d_sb;
- generic_fillattr(d_inode(dentry), stat);
- if (INODE_VERSION(d_inode(dentry)) == MINIX_V1)
+ struct super_block *sb = path->dentry->d_sb;
+ struct inode *inode = d_inode(path->dentry);
+
+ generic_fillattr(inode, stat);
+ if (INODE_VERSION(inode) == MINIX_V1)
stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
else
stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 01ad81d..663d661 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -51,7 +51,7 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb);
extern int minix_new_block(struct inode * inode);
extern void minix_free_block(struct inode *inode, unsigned long block);
extern unsigned long minix_count_free_blocks(struct super_block *sb);
-extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int minix_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
extern void V1_minix_truncate(struct inode *);
diff --git a/fs/namei.c b/fs/namei.c
index da689c9..d41fab7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -672,17 +672,15 @@ static bool legitimize_links(struct nameidata *nd)
/**
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
- * @dentry: child of nd->path.dentry or NULL
- * @seq: seq number to check dentry against
* Returns: 0 on success, -ECHILD on failure
*
- * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
- * for ref-walk mode. @dentry must be a path found by a do_lookup call on
- * @nd or NULL. Must be called from rcu-walk context.
+ * unlazy_walk attempts to legitimize the current nd->path and nd->root
+ * for ref-walk mode.
+ * Must be called from rcu-walk context.
* Nothing should touch nameidata between unlazy_walk() failure and
* terminate_walk().
*/
-static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+static int unlazy_walk(struct nameidata *nd)
{
struct dentry *parent = nd->path.dentry;
@@ -691,33 +689,66 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
nd->flags &= ~LOOKUP_RCU;
if (unlikely(!legitimize_links(nd)))
goto out2;
+ if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
+ goto out1;
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq)))
+ goto out;
+ }
+ rcu_read_unlock();
+ BUG_ON(nd->inode != parent->d_inode);
+ return 0;
+
+out2:
+ nd->path.mnt = NULL;
+ nd->path.dentry = NULL;
+out1:
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+out:
+ rcu_read_unlock();
+ return -ECHILD;
+}
+
+/**
+ * unlazy_child - try to switch to ref-walk mode.
+ * @nd: nameidata pathwalk data
+ * @dentry: child of nd->path.dentry
+ * @seq: seq number to check dentry against
+ * Returns: 0 on success, -ECHILD on failure
+ *
+ * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry
+ * for ref-walk mode. @dentry must be a path found by a do_lookup call on
+ * @nd. Must be called from rcu-walk context.
+ * Nothing should touch nameidata between unlazy_child() failure and
+ * terminate_walk().
+ */
+static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+{
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+
+ nd->flags &= ~LOOKUP_RCU;
+ if (unlikely(!legitimize_links(nd)))
+ goto out2;
if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
goto out2;
- if (unlikely(!lockref_get_not_dead(&parent->d_lockref)))
+ if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
goto out1;
/*
- * For a negative lookup, the lookup sequence point is the parents
- * sequence point, and it only needs to revalidate the parent dentry.
- *
- * For a positive lookup, we need to move both the parent and the
- * dentry from the RCU domain to be properly refcounted. And the
- * sequence number in the dentry validates *both* dentry counters,
- * since we checked the sequence number of the parent after we got
- * the child sequence number. So we know the parent must still
- * be valid if the child sequence number is still valid.
+ * We need to move both the parent and the dentry from the RCU domain
+ * to be properly refcounted. And the sequence number in the dentry
+ * validates *both* dentry counters, since we checked the sequence
+ * number of the parent after we got the child sequence number. So we
+ * know the parent must still be valid if the child sequence number is
*/
- if (!dentry) {
- if (read_seqcount_retry(&parent->d_seq, nd->seq))
- goto out;
- BUG_ON(nd->inode != parent->d_inode);
- } else {
- if (!lockref_get_not_dead(&dentry->d_lockref))
- goto out;
- if (read_seqcount_retry(&dentry->d_seq, seq))
- goto drop_dentry;
+ if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
+ goto out;
+ if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) {
+ rcu_read_unlock();
+ dput(dentry);
+ goto drop_root_mnt;
}
-
/*
* Sequence counts matched. Now make sure that the root is
* still valid and get it if required.
@@ -733,10 +764,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
rcu_read_unlock();
return 0;
-drop_dentry:
- rcu_read_unlock();
- dput(dentry);
- goto drop_root_mnt;
out2:
nd->path.mnt = NULL;
out1:
@@ -749,27 +776,12 @@ drop_root_mnt:
return -ECHILD;
}
-static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq)
-{
- if (unlikely(!legitimize_path(nd, link, seq))) {
- drop_links(nd);
- nd->depth = 0;
- nd->flags &= ~LOOKUP_RCU;
- nd->path.mnt = NULL;
- nd->path.dentry = NULL;
- if (!(nd->flags & LOOKUP_ROOT))
- nd->root.mnt = NULL;
- rcu_read_unlock();
- } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) {
- return 0;
- }
- path_put(link);
- return -ECHILD;
-}
-
static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
{
- return dentry->d_op->d_revalidate(dentry, flags);
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
+ return dentry->d_op->d_revalidate(dentry, flags);
+ else
+ return 1;
}
/**
@@ -790,7 +802,7 @@ static int complete_walk(struct nameidata *nd)
if (nd->flags & LOOKUP_RCU) {
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- if (unlikely(unlazy_walk(nd, NULL, 0)))
+ if (unlikely(unlazy_walk(nd)))
return -ECHILD;
}
@@ -1016,7 +1028,7 @@ const char *get_link(struct nameidata *nd)
touch_atime(&last->link);
cond_resched();
} else if (atime_needs_update_rcu(&last->link, inode)) {
- if (unlikely(unlazy_walk(nd, NULL, 0)))
+ if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
}
@@ -1035,7 +1047,7 @@ const char *get_link(struct nameidata *nd)
if (nd->flags & LOOKUP_RCU) {
res = get(NULL, inode, &last->done);
if (res == ERR_PTR(-ECHILD)) {
- if (unlikely(unlazy_walk(nd, NULL, 0)))
+ if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
res = get(dentry, inode, &last->done);
}
@@ -1469,19 +1481,14 @@ static struct dentry *lookup_dcache(const struct qstr *name,
struct dentry *dir,
unsigned int flags)
{
- struct dentry *dentry;
- int error;
-
- dentry = d_lookup(dir, name);
+ struct dentry *dentry = d_lookup(dir, name);
if (dentry) {
- if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
- error = d_revalidate(dentry, flags);
- if (unlikely(error <= 0)) {
- if (!error)
- d_invalidate(dentry);
- dput(dentry);
- return ERR_PTR(error);
- }
+ int error = d_revalidate(dentry, flags);
+ if (unlikely(error <= 0)) {
+ if (!error)
+ d_invalidate(dentry);
+ dput(dentry);
+ return ERR_PTR(error);
}
}
return dentry;
@@ -1546,7 +1553,7 @@ static int lookup_fast(struct nameidata *nd,
bool negative;
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
if (unlikely(!dentry)) {
- if (unlazy_walk(nd, NULL, 0))
+ if (unlazy_walk(nd))
return -ECHILD;
return 0;
}
@@ -1571,14 +1578,8 @@ static int lookup_fast(struct nameidata *nd,
return -ECHILD;
*seqp = seq;
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
- status = d_revalidate(dentry, nd->flags);
- if (unlikely(status <= 0)) {
- if (unlazy_walk(nd, dentry, seq))
- return -ECHILD;
- if (status == -ECHILD)
- status = d_revalidate(dentry, nd->flags);
- } else {
+ status = d_revalidate(dentry, nd->flags);
+ if (likely(status > 0)) {
/*
* Note: do negative dentry check after revalidation in
* case that drops it.
@@ -1589,15 +1590,17 @@ static int lookup_fast(struct nameidata *nd,
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
return 1;
- if (unlazy_walk(nd, dentry, seq))
- return -ECHILD;
}
+ if (unlazy_child(nd, dentry, seq))
+ return -ECHILD;
+ if (unlikely(status == -ECHILD))
+ /* we'd been told to redo it in non-rcu mode */
+ status = d_revalidate(dentry, nd->flags);
} else {
dentry = __d_lookup(parent, &nd->last);
if (unlikely(!dentry))
return 0;
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
- status = d_revalidate(dentry, nd->flags);
+ status = d_revalidate(dentry, nd->flags);
}
if (unlikely(status <= 0)) {
if (!status)
@@ -1636,8 +1639,7 @@ again:
if (IS_ERR(dentry))
goto out;
if (unlikely(!d_in_lookup(dentry))) {
- if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
- !(flags & LOOKUP_NO_REVAL)) {
+ if (!(flags & LOOKUP_NO_REVAL)) {
int error = d_revalidate(dentry, flags);
if (unlikely(error <= 0)) {
if (!error) {
@@ -1668,7 +1670,7 @@ static inline int may_lookup(struct nameidata *nd)
int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD)
return err;
- if (unlazy_walk(nd, NULL, 0))
+ if (unlazy_walk(nd))
return -ECHILD;
}
return inode_permission(nd->inode, MAY_EXEC);
@@ -1703,9 +1705,17 @@ static int pick_link(struct nameidata *nd, struct path *link,
error = nd_alloc_stack(nd);
if (unlikely(error)) {
if (error == -ECHILD) {
- if (unlikely(unlazy_link(nd, link, seq)))
- return -ECHILD;
- error = nd_alloc_stack(nd);
+ if (unlikely(!legitimize_path(nd, link, seq))) {
+ drop_links(nd);
+ nd->depth = 0;
+ nd->flags &= ~LOOKUP_RCU;
+ nd->path.mnt = NULL;
+ nd->path.dentry = NULL;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ } else if (likely(unlazy_walk(nd)) == 0)
+ error = nd_alloc_stack(nd);
}
if (error) {
path_put(link);
@@ -2122,7 +2132,7 @@ OK:
}
if (unlikely(!d_can_lookup(nd->path.dentry))) {
if (nd->flags & LOOKUP_RCU) {
- if (unlazy_walk(nd, NULL, 0))
+ if (unlazy_walk(nd))
return -ECHILD;
}
return -ENOTDIR;
@@ -2579,7 +2589,7 @@ mountpoint_last(struct nameidata *nd)
/* If we're in rcuwalk, drop out of it to handle last component */
if (nd->flags & LOOKUP_RCU) {
- if (unlazy_walk(nd, NULL, 0))
+ if (unlazy_walk(nd))
return -ECHILD;
}
@@ -3072,9 +3082,6 @@ static int lookup_open(struct nameidata *nd, struct path *path,
if (d_in_lookup(dentry))
break;
- if (!(dentry->d_flags & DCACHE_OP_REVALIDATE))
- break;
-
error = d_revalidate(dentry, nd->flags);
if (likely(error > 0))
break;
@@ -3356,13 +3363,50 @@ out:
return error;
}
+struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+{
+ static const struct qstr name = QSTR_INIT("/", 1);
+ struct dentry *child = NULL;
+ struct inode *dir = dentry->d_inode;
+ struct inode *inode;
+ int error;
+
+ /* we want directory to be writable */
+ error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ if (error)
+ goto out_err;
+ error = -EOPNOTSUPP;
+ if (!dir->i_op->tmpfile)
+ goto out_err;
+ error = -ENOMEM;
+ child = d_alloc(dentry, &name);
+ if (unlikely(!child))
+ goto out_err;
+ error = dir->i_op->tmpfile(dir, child, mode);
+ if (error)
+ goto out_err;
+ error = -ENOENT;
+ inode = child->d_inode;
+ if (unlikely(!inode))
+ goto out_err;
+ if (!(open_flag & O_EXCL)) {
+ spin_lock(&inode->i_lock);
+ inode->i_state |= I_LINKABLE;
+ spin_unlock(&inode->i_lock);
+ }
+ return child;
+
+out_err:
+ dput(child);
+ return ERR_PTR(error);
+}
+EXPORT_SYMBOL(vfs_tmpfile);
+
static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
struct file *file, int *opened)
{
- static const struct qstr name = QSTR_INIT("/", 1);
struct dentry *child;
- struct inode *dir;
struct path path;
int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
if (unlikely(error))
@@ -3370,25 +3414,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = mnt_want_write(path.mnt);
if (unlikely(error))
goto out;
- dir = path.dentry->d_inode;
- /* we want directory to be writable */
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
- if (error)
+ child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
+ error = PTR_ERR(child);
+ if (unlikely(IS_ERR(child)))
goto out2;
- if (!dir->i_op->tmpfile) {
- error = -EOPNOTSUPP;
- goto out2;
- }
- child = d_alloc(path.dentry, &name);
- if (unlikely(!child)) {
- error = -ENOMEM;
- goto out2;
- }
dput(path.dentry);
path.dentry = child;
- error = dir->i_op->tmpfile(dir, child, op->mode);
- if (error)
- goto out2;
audit_inode(nd->name, child, 0);
/* Don't check for other permissions, the inode was just created */
error = may_open(&path, 0, op->open_flag);
@@ -3399,14 +3430,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
if (error)
goto out2;
error = open_check_o_direct(file);
- if (error) {
+ if (error)
fput(file);
- } else if (!(op->open_flag & O_EXCL)) {
- struct inode *inode = file_inode(file);
- spin_lock(&inode->i_lock);
- inode->i_state |= I_LINKABLE;
- spin_unlock(&inode->i_lock);
- }
out2:
mnt_drop_write(path.mnt);
out:
diff --git a/fs/namespace.c b/fs/namespace.c
index 8bfad42..cc1375ef 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -15,6 +15,7 @@
#include <linux/user_namespace.h>
#include <linux/namei.h>
#include <linux/security.h>
+#include <linux/cred.h>
#include <linux/idr.h>
#include <linux/init.h> /* init_rootfs */
#include <linux/fs_struct.h> /* get_fs_root et.al. */
@@ -24,6 +25,8 @@
#include <linux/magic.h>
#include <linux/bootmem.h>
#include <linux/task_work.h>
+#include <linux/sched/task.h>
+
#include "pnode.h"
#include "internal.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 7eb89c2..d560609 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -30,6 +30,7 @@
#include <linux/vfs.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
+#include <linux/sched/signal.h>
#include <linux/namei.h>
#include <net/sock.h>
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 4434e49..12550c2 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -19,6 +19,7 @@
#include <linux/highuid.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/uaccess.h>
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 97b111d..98b6db0 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -16,6 +16,7 @@
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
+#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/in.h>
#include <linux/net.h>
@@ -40,19 +41,12 @@ static int _recv(struct socket *sock, void *buf, int size, unsigned flags)
return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
}
-static inline int do_send(struct socket *sock, struct kvec *vec, int count,
- int len, unsigned flags)
-{
- struct msghdr msg = { .msg_flags = flags };
- return kernel_sendmsg(sock, &msg, vec, count, len);
-}
-
static int _send(struct socket *sock, const void *buff, int len)
{
- struct kvec vec;
- vec.iov_base = (void *) buff;
- vec.iov_len = len;
- return do_send(sock, &vec, 1, len, 0);
+ struct msghdr msg = { .msg_flags = 0 };
+ struct kvec vec = {.iov_base = (void *)buff, .iov_len = len};
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &vec, 1, len);
+ return sock_sendmsg(sock, &msg);
}
struct ncp_request_reply {
@@ -63,9 +57,7 @@ struct ncp_request_reply {
size_t datalen;
int result;
enum { RQ_DONE, RQ_INPROGRESS, RQ_QUEUED, RQ_IDLE, RQ_ABANDONED } status;
- struct kvec* tx_ciov;
- size_t tx_totallen;
- size_t tx_iovlen;
+ struct iov_iter from;
struct kvec tx_iov[3];
u_int16_t tx_type;
u_int32_t sign[6];
@@ -205,28 +197,22 @@ static inline void __ncptcp_abort(struct ncp_server *server)
static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req)
{
- struct kvec vec[3];
- /* sock_sendmsg updates iov pointers for us :-( */
- memcpy(vec, req->tx_ciov, req->tx_iovlen * sizeof(vec[0]));
- return do_send(sock, vec, req->tx_iovlen,
- req->tx_totallen, MSG_DONTWAIT);
+ struct msghdr msg = { .msg_iter = req->from, .msg_flags = MSG_DONTWAIT };
+ return sock_sendmsg(sock, &msg);
}
static void __ncptcp_try_send(struct ncp_server *server)
{
struct ncp_request_reply *rq;
- struct kvec *iov;
- struct kvec iovc[3];
+ struct msghdr msg = { .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT };
int result;
rq = server->tx.creq;
if (!rq)
return;
- /* sock_sendmsg updates iov pointers for us :-( */
- memcpy(iovc, rq->tx_ciov, rq->tx_iovlen * sizeof(iov[0]));
- result = do_send(server->ncp_sock, iovc, rq->tx_iovlen,
- rq->tx_totallen, MSG_NOSIGNAL | MSG_DONTWAIT);
+ msg.msg_iter = rq->from;
+ result = sock_sendmsg(server->ncp_sock, &msg);
if (result == -EAGAIN)
return;
@@ -236,21 +222,12 @@ static void __ncptcp_try_send(struct ncp_server *server)
__ncp_abort_request(server, rq, result);
return;
}
- if (result >= rq->tx_totallen) {
+ if (!msg_data_left(&msg)) {
server->rcv.creq = rq;
server->tx.creq = NULL;
return;
}
- rq->tx_totallen -= result;
- iov = rq->tx_ciov;
- while (iov->iov_len <= result) {
- result -= iov->iov_len;
- iov++;
- rq->tx_iovlen--;
- }
- iov->iov_base += result;
- iov->iov_len -= result;
- rq->tx_ciov = iov;
+ rq->from = msg.msg_iter;
}
static inline void ncp_init_header(struct ncp_server *server, struct ncp_request_reply *req, struct ncp_request_header *h)
@@ -263,22 +240,21 @@ static inline void ncp_init_header(struct ncp_server *server, struct ncp_request
static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request_reply *req)
{
- size_t signlen;
- struct ncp_request_header* h;
+ size_t signlen, len = req->tx_iov[1].iov_len;
+ struct ncp_request_header *h = req->tx_iov[1].iov_base;
- req->tx_ciov = req->tx_iov + 1;
-
- h = req->tx_iov[1].iov_base;
ncp_init_header(server, req, h);
- signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
- req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
- cpu_to_le32(req->tx_totallen), req->sign);
+ signlen = sign_packet(server,
+ req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
+ len - sizeof(struct ncp_request_header) + 1,
+ cpu_to_le32(len), req->sign);
if (signlen) {
- req->tx_ciov[1].iov_base = req->sign;
- req->tx_ciov[1].iov_len = signlen;
- req->tx_iovlen += 1;
- req->tx_totallen += signlen;
+ /* NCP over UDP appends signature */
+ req->tx_iov[2].iov_base = req->sign;
+ req->tx_iov[2].iov_len = signlen;
}
+ iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+ req->tx_iov + 1, signlen ? 2 : 1, len + signlen);
server->rcv.creq = req;
server->timeout_last = server->m.time_out;
server->timeout_retries = server->m.retry_count;
@@ -292,24 +268,23 @@ static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request
static void ncptcp_start_request(struct ncp_server *server, struct ncp_request_reply *req)
{
- size_t signlen;
- struct ncp_request_header* h;
+ size_t signlen, len = req->tx_iov[1].iov_len;
+ struct ncp_request_header *h = req->tx_iov[1].iov_base;
- req->tx_ciov = req->tx_iov;
- h = req->tx_iov[1].iov_base;
ncp_init_header(server, req, h);
signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
- req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
- cpu_to_be32(req->tx_totallen + 24), req->sign + 4) + 16;
+ len - sizeof(struct ncp_request_header) + 1,
+ cpu_to_be32(len + 24), req->sign + 4) + 16;
req->sign[0] = htonl(NCP_TCP_XMIT_MAGIC);
- req->sign[1] = htonl(req->tx_totallen + signlen);
+ req->sign[1] = htonl(len + signlen);
req->sign[2] = htonl(NCP_TCP_XMIT_VERSION);
req->sign[3] = htonl(req->datalen + 8);
+ /* NCP over TCP prepends signature */
req->tx_iov[0].iov_base = req->sign;
req->tx_iov[0].iov_len = signlen;
- req->tx_iovlen += 1;
- req->tx_totallen += signlen;
+ iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+ req->tx_iov, 2, len + signlen);
server->tx.creq = req;
__ncptcp_try_send(server);
@@ -364,18 +339,17 @@ static void __ncp_next_request(struct ncp_server *server)
static void info_server(struct ncp_server *server, unsigned int id, const void * data, size_t len)
{
if (server->info_sock) {
- struct kvec iov[2];
- __be32 hdr[2];
-
- hdr[0] = cpu_to_be32(len + 8);
- hdr[1] = cpu_to_be32(id);
-
- iov[0].iov_base = hdr;
- iov[0].iov_len = 8;
- iov[1].iov_base = (void *) data;
- iov[1].iov_len = len;
+ struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+ __be32 hdr[2] = {cpu_to_be32(len + 8), cpu_to_be32(id)};
+ struct kvec iov[2] = {
+ {.iov_base = hdr, .iov_len = 8},
+ {.iov_base = (void *)data, .iov_len = len},
+ };
+
+ iov_iter_kvec(&msg.msg_iter, ITER_KVEC | WRITE,
+ iov, 2, len + 8);
- do_send(server->info_sock, iov, 2, len + 8, MSG_NOSIGNAL);
+ sock_sendmsg(server->info_sock, &msg);
}
}
@@ -711,8 +685,6 @@ static int do_ncp_rpc_call(struct ncp_server *server, int size,
req->datalen = max_reply_size;
req->tx_iov[1].iov_base = server->packet;
req->tx_iov[1].iov_len = size;
- req->tx_iovlen = 1;
- req->tx_totallen = size;
req->tx_type = *(u_int16_t*)server->packet;
result = ncp_add_request(server, req);
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index 6de1570..2ae676f 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -141,8 +141,7 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd)
{
- if (cd->u.pipefs.dir)
- sunrpc_cache_unregister_pipefs(cd);
+ sunrpc_cache_unregister_pipefs(cd);
}
void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 484bebc..bb79972 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,6 +9,7 @@
#include <linux/completion.h>
#include <linux/ip.h>
#include <linux/module.h>
+#include <linux/sched/signal.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/nfs_fs.h>
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index eb094c6..d051fc3 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -83,23 +83,15 @@ static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes)
return p;
}
-static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
+static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
+ const char **str, size_t maxlen)
{
- __be32 *p;
-
- p = read_buf(xdr, 4);
- if (unlikely(p == NULL))
- return htonl(NFS4ERR_RESOURCE);
- *len = ntohl(*p);
-
- if (*len != 0) {
- p = read_buf(xdr, *len);
- if (unlikely(p == NULL))
- return htonl(NFS4ERR_RESOURCE);
- *str = (const char *)p;
- } else
- *str = NULL;
+ ssize_t err;
+ err = xdr_stream_decode_opaque_inline(xdr, (void **)str, maxlen);
+ if (err < 0)
+ return cpu_to_be32(NFS4ERR_RESOURCE);
+ *len = err;
return 0;
}
@@ -162,15 +154,9 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
__be32 *p;
__be32 status;
- status = decode_string(xdr, &hdr->taglen, &hdr->tag);
+ status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ);
if (unlikely(status != 0))
return status;
- /* We do not like overly long tags! */
- if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) {
- printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n",
- __func__, hdr->taglen);
- return htonl(NFS4ERR_RESOURCE);
- }
p = read_buf(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
@@ -582,12 +568,8 @@ out:
static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
- __be32 *p;
-
- p = xdr_reserve_space(xdr, 4 + len);
- if (unlikely(p == NULL))
- return htonl(NFS4ERR_RESOURCE);
- xdr_encode_opaque(p, str, len);
+ if (unlikely(xdr_stream_encode_opaque(xdr, str, len) < 0))
+ return cpu_to_be32(NFS4ERR_RESOURCE);
return 0;
}
@@ -1083,7 +1065,8 @@ struct svc_version nfs4_callback_version1 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
- .vs_hidden = 1,
+ .vs_hidden = true,
+ .vs_need_cong_ctrl = true,
};
struct svc_version nfs4_callback_version4 = {
@@ -1092,5 +1075,6 @@ struct svc_version nfs4_callback_version4 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
- .vs_hidden = 1,
+ .vs_hidden = true,
+ .vs_need_cong_ctrl = true,
};
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fad8104..fb499a3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2002,6 +2002,29 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(nfs_link);
+static void
+nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
+{
+ struct dentry *old_dentry = data->old_dentry;
+ struct dentry *new_dentry = data->new_dentry;
+ struct inode *old_inode = d_inode(old_dentry);
+ struct inode *new_inode = d_inode(new_dentry);
+
+ nfs_mark_for_revalidate(old_inode);
+
+ switch (task->tk_status) {
+ case 0:
+ if (new_inode != NULL)
+ nfs_drop_nlink(new_inode);
+ d_move(old_dentry, new_dentry);
+ nfs_set_verifier(new_dentry,
+ nfs_save_change_attribute(data->new_dir));
+ break;
+ case -ENOENT:
+ nfs_dentry_handle_enoent(old_dentry);
+ }
+}
+
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2084,7 +2107,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode != NULL)
NFS_PROTO(new_inode)->return_delegation(new_inode);
- task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
+ nfs_complete_rename);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
@@ -2094,21 +2118,11 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error == 0)
error = task->tk_status;
rpc_put_task(task);
- nfs_mark_for_revalidate(old_inode);
out:
if (rehash)
d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
- if (!error) {
- if (new_inode != NULL)
- nfs_drop_nlink(new_inode);
- d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
- nfs_save_change_attribute(new_dir));
- } else if (error == -ENOENT)
- nfs_dentry_handle_enoent(old_dentry);
-
/* new dentry created? */
if (dentry)
dput(dentry);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 18f98e0..44347f4 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -305,7 +305,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
}
hdr->pgio_done_cb = filelayout_read_done_cb;
- if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+ if (nfs4_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
@@ -403,7 +403,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
rpc_exit(task, 0);
return;
}
- if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+ if (nfs4_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
@@ -438,7 +438,7 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data)
{
struct nfs_commit_data *wdata = data;
- nfs41_setup_sequence(wdata->ds_clp->cl_session,
+ nfs4_setup_sequence(wdata->ds_clp,
&wdata->args.seq_args,
&wdata->res.seq_res,
task);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index d6acc68..42dedf2 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1053,9 +1053,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- if (task->tk_status >= 0)
- return 0;
-
switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
@@ -1157,9 +1154,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
- if (task->tk_status >= 0)
- return 0;
-
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@@ -1195,6 +1189,13 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
+ if (task->tk_status >= 0)
+ return 0;
+
+ /* Handle the case of an invalid layout segment */
+ if (!pnfs_is_valid_lseg(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+
switch (vers) {
case 3:
return ff_layout_async_handle_error_v3(task, lseg, idx);
@@ -1384,30 +1385,14 @@ static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data)
rpc_call_start(task);
}
-static int ff_layout_setup_sequence(struct nfs_client *ds_clp,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res,
- struct rpc_task *task)
-{
- if (ds_clp->cl_session)
- return nfs41_setup_sequence(ds_clp->cl_session,
- args,
- res,
- task);
- return nfs40_setup_sequence(ds_clp->cl_slot_tbl,
- args,
- res,
- task);
-}
-
static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
- if (ff_layout_setup_sequence(hdr->ds_clp,
- &hdr->args.seq_args,
- &hdr->res.seq_res,
- task))
+ if (nfs4_setup_sequence(hdr->ds_clp,
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
+ task))
return;
if (ff_layout_read_prepare_common(task, hdr))
@@ -1578,10 +1563,10 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
- if (ff_layout_setup_sequence(hdr->ds_clp,
- &hdr->args.seq_args,
- &hdr->res.seq_res,
- task))
+ if (nfs4_setup_sequence(hdr->ds_clp,
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
+ task))
return;
if (ff_layout_write_prepare_common(task, hdr))
@@ -1667,10 +1652,10 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_commit_data *wdata = data;
- if (ff_layout_setup_sequence(wdata->ds_clp,
- &wdata->args.seq_args,
- &wdata->res.seq_res,
- task))
+ if (nfs4_setup_sequence(wdata->ds_clp,
+ &wdata->args.seq_args,
+ &wdata->res.seq_res,
+ task))
return;
ff_layout_commit_prepare_common(task, data);
}
@@ -1965,10 +1950,7 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
static void
encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
{
- __be32 *p;
-
- p = xdr_reserve_space(xdr, len);
- xdr_encode_opaque_fixed(p, buf, len);
+ WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
}
static void
@@ -2092,7 +2074,7 @@ ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args)
kfree(ff_args);
}
-const struct nfs4_xdr_opaque_ops layoutreturn_ops = {
+static const struct nfs4_xdr_opaque_ops layoutreturn_ops = {
.encode = ff_layout_encode_layoutreturn,
.free = ff_layout_free_layoutreturn,
};
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5ca4d96..f489a5a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -15,7 +15,7 @@
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -703,9 +703,10 @@ static bool nfs_need_revalidate_inode(struct inode *inode)
return false;
}
-int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int nfs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
int err = 0;
@@ -726,17 +727,17 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
* - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
* no point in checking those.
*/
- if ((mnt->mnt_flags & MNT_NOATIME) ||
- ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
+ if ((path->mnt->mnt_flags & MNT_NOATIME) ||
+ ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
need_atime = 0;
if (need_atime || nfs_need_revalidate_inode(inode)) {
struct nfs_server *server = NFS_SERVER(inode);
- nfs_readdirplus_parent_cache_miss(dentry);
+ nfs_readdirplus_parent_cache_miss(path->dentry);
err = __nfs_revalidate_inode(server, inode);
} else
- nfs_readdirplus_parent_cache_hit(dentry);
+ nfs_readdirplus_parent_cache_hit(path->dentry);
if (!err) {
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e49d831..786f175 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -178,11 +178,12 @@ out_nofree:
}
static int
-nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+nfs_namespace_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- if (NFS_FH(d_inode(dentry))->size != 0)
- return nfs_getattr(mnt, dentry, stat);
- generic_fillattr(d_inode(dentry), stat);
+ if (NFS_FH(d_inode(path->dentry))->size != 0)
+ return nfs_getattr(path, stat, request_mask, query_flags);
+ generic_fillattr(d_inode(path->dentry), stat);
return 0;
}
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d12ff93..1e486c7 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -12,6 +12,7 @@
#include "nfs42.h"
#include "iostat.h"
#include "pnfs.h"
+#include "nfs4session.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -128,30 +129,26 @@ out_unlock:
return err;
}
-static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
+static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_lock_context *src_lock,
- struct file *dst, loff_t pos_dst,
+ struct file *dst,
struct nfs_lock_context *dst_lock,
- size_t count)
+ struct nfs42_copy_args *args,
+ struct nfs42_copy_res *res)
{
- struct nfs42_copy_args args = {
- .src_fh = NFS_FH(file_inode(src)),
- .src_pos = pos_src,
- .dst_fh = NFS_FH(file_inode(dst)),
- .dst_pos = pos_dst,
- .count = count,
- };
- struct nfs42_copy_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
- .rpc_argp = &args,
- .rpc_resp = &res,
+ .rpc_argp = args,
+ .rpc_resp = res,
};
struct inode *dst_inode = file_inode(dst);
struct nfs_server *server = NFS_SERVER(dst_inode);
+ loff_t pos_src = args->src_pos;
+ loff_t pos_dst = args->dst_pos;
+ size_t count = args->count;
int status;
- status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+ status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
src_lock, FMODE_READ);
if (status)
return status;
@@ -161,7 +158,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
if (status)
return status;
- status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+ status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
if (status)
return status;
@@ -171,22 +168,22 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
return status;
status = nfs4_call_sync(server->client, server, &msg,
- &args.seq_args, &res.seq_res, 0);
+ &args->seq_args, &res->seq_res, 0);
if (status == -ENOTSUPP)
server->caps &= ~NFS_CAP_COPY;
if (status)
return status;
- if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
- status = nfs_commit_file(dst, &res.write_res.verifier.verifier);
+ if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
+ status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
if (status)
return status;
}
truncate_pagecache_range(dst_inode, pos_dst,
- pos_dst + res.write_res.count);
+ pos_dst + res->write_res.count);
- return res.write_res.count;
+ return res->write_res.count;
}
ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -196,8 +193,22 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
struct nfs_server *server = NFS_SERVER(file_inode(dst));
struct nfs_lock_context *src_lock;
struct nfs_lock_context *dst_lock;
- struct nfs4_exception src_exception = { };
- struct nfs4_exception dst_exception = { };
+ struct nfs42_copy_args args = {
+ .src_fh = NFS_FH(file_inode(src)),
+ .src_pos = pos_src,
+ .dst_fh = NFS_FH(file_inode(dst)),
+ .dst_pos = pos_dst,
+ .count = count,
+ };
+ struct nfs42_copy_res res;
+ struct nfs4_exception src_exception = {
+ .inode = file_inode(src),
+ .stateid = &args.src_stateid,
+ };
+ struct nfs4_exception dst_exception = {
+ .inode = file_inode(dst),
+ .stateid = &args.dst_stateid,
+ };
ssize_t err, err2;
if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
@@ -207,7 +218,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
if (IS_ERR(src_lock))
return PTR_ERR(src_lock);
- src_exception.inode = file_inode(src);
src_exception.state = src_lock->open_context->state;
dst_lock = nfs_get_lock_context(nfs_file_open_context(dst));
@@ -216,15 +226,17 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
goto out_put_src_lock;
}
- dst_exception.inode = file_inode(dst);
dst_exception.state = dst_lock->open_context->state;
do {
inode_lock(file_inode(dst));
- err = _nfs42_proc_copy(src, pos_src, src_lock,
- dst, pos_dst, dst_lock, count);
+ err = _nfs42_proc_copy(src, src_lock,
+ dst, dst_lock,
+ &args, &res);
inode_unlock(file_inode(dst));
+ if (err >= 0)
+ break;
if (err == -ENOTSUPP) {
err = -EOPNOTSUPP;
break;
@@ -331,9 +343,8 @@ nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
}
nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid);
spin_unlock(&inode->i_lock);
- nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
- &data->res.seq_res, task);
-
+ nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
+ &data->res.seq_res, task);
}
static void
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6651658..af285cc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -273,14 +273,6 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
fmode_t fmode);
#if defined(CONFIG_NFS_V4_1)
-static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
-{
- return server->nfs_client->cl_session;
-}
-
-extern int nfs41_setup_sequence(struct nfs4_session *session,
- struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
- struct rpc_task *task);
extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
@@ -357,11 +349,6 @@ nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
hdr->args.stable = NFS_FILE_SYNC;
}
#else /* CONFIG_NFS_v4_1 */
-static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
-{
- return NULL;
-}
-
static inline bool
is_ds_only_client(struct nfs_client *clp)
{
@@ -466,7 +453,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_release_seqid(struct nfs_seqid *seqid);
extern void nfs_free_seqid(struct nfs_seqid *seqid);
-extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
+extern int nfs4_setup_sequence(const struct nfs_client *client,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
struct rpc_task *task);
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index c444285..835c163 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -316,7 +316,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
if (ret < 0)
goto out_up;
- payload = user_key_payload(rkey);
+ payload = user_key_payload_rcu(rkey);
if (IS_ERR_OR_NULL(payload)) {
ret = PTR_ERR(payload);
goto out_up;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0a0eaec..1b18368 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -577,12 +577,7 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
static bool _nfs4_is_integrity_protected(struct nfs_client *clp)
{
rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor;
-
- if (flavor == RPC_AUTH_GSS_KRB5I ||
- flavor == RPC_AUTH_GSS_KRB5P)
- return true;
-
- return false;
+ return (flavor == RPC_AUTH_GSS_KRB5I) || (flavor == RPC_AUTH_GSS_KRB5P);
}
static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
@@ -622,48 +617,6 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
args->sa_privileged = 1;
}
-int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res,
- struct rpc_task *task)
-{
- struct nfs4_slot *slot;
-
- /* slot already allocated? */
- if (res->sr_slot != NULL)
- goto out_start;
-
- spin_lock(&tbl->slot_tbl_lock);
- if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
- goto out_sleep;
-
- slot = nfs4_alloc_slot(tbl);
- if (IS_ERR(slot)) {
- if (slot == ERR_PTR(-ENOMEM))
- task->tk_timeout = HZ >> 2;
- goto out_sleep;
- }
- spin_unlock(&tbl->slot_tbl_lock);
-
- slot->privileged = args->sa_privileged ? 1 : 0;
- args->sa_slot = slot;
- res->sr_slot = slot;
-
-out_start:
- rpc_call_start(task);
- return 0;
-
-out_sleep:
- if (args->sa_privileged)
- rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
- NULL, RPC_PRIORITY_PRIVILEGED);
- else
- rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
- spin_unlock(&tbl->slot_tbl_lock);
- return -EAGAIN;
-}
-EXPORT_SYMBOL_GPL(nfs40_setup_sequence);
-
static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
{
struct nfs4_slot *slot = res->sr_slot;
@@ -815,10 +768,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
case -NFS4ERR_SEQ_FALSE_RETRY:
++slot->seq_nr;
goto retry_nowait;
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_BADSESSION:
- nfs4_schedule_session_recovery(session, res->sr_status);
- goto retry_nowait;
default:
/* Just update the slot sequence no. */
slot->seq_done = 1;
@@ -882,101 +831,14 @@ int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
}
EXPORT_SYMBOL_GPL(nfs4_sequence_done);
-int nfs41_setup_sequence(struct nfs4_session *session,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res,
- struct rpc_task *task)
-{
- struct nfs4_slot *slot;
- struct nfs4_slot_table *tbl;
-
- dprintk("--> %s\n", __func__);
- /* slot already allocated? */
- if (res->sr_slot != NULL)
- goto out_success;
-
- tbl = &session->fc_slot_table;
-
- task->tk_timeout = 0;
-
- spin_lock(&tbl->slot_tbl_lock);
- if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) &&
- !args->sa_privileged) {
- /* The state manager will wait until the slot table is empty */
- dprintk("%s session is draining\n", __func__);
- goto out_sleep;
- }
-
- slot = nfs4_alloc_slot(tbl);
- if (IS_ERR(slot)) {
- /* If out of memory, try again in 1/4 second */
- if (slot == ERR_PTR(-ENOMEM))
- task->tk_timeout = HZ >> 2;
- dprintk("<-- %s: no free slots\n", __func__);
- goto out_sleep;
- }
- spin_unlock(&tbl->slot_tbl_lock);
-
- slot->privileged = args->sa_privileged ? 1 : 0;
- args->sa_slot = slot;
-
- dprintk("<-- %s slotid=%u seqid=%u\n", __func__,
- slot->slot_nr, slot->seq_nr);
-
- res->sr_slot = slot;
- res->sr_timestamp = jiffies;
- res->sr_status_flags = 0;
- /*
- * sr_status is only set in decode_sequence, and so will remain
- * set to 1 if an rpc level failure occurs.
- */
- res->sr_status = 1;
- trace_nfs4_setup_sequence(session, args);
-out_success:
- rpc_call_start(task);
- return 0;
-out_sleep:
- /* Privileged tasks are queued with top priority */
- if (args->sa_privileged)
- rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
- NULL, RPC_PRIORITY_PRIVILEGED);
- else
- rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
- spin_unlock(&tbl->slot_tbl_lock);
- return -EAGAIN;
-}
-EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
-
-static int nfs4_setup_sequence(const struct nfs_server *server,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res,
- struct rpc_task *task)
-{
- struct nfs4_session *session = nfs4_get_session(server);
- int ret = 0;
-
- if (!session)
- return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
- args, res, task);
-
- dprintk("--> %s clp %p session %p sr_slot %u\n",
- __func__, session->clp, session, res->sr_slot ?
- res->sr_slot->slot_nr : NFS4_NO_SLOT);
-
- ret = nfs41_setup_sequence(session, args, res, task);
-
- dprintk("<-- %s status=%d\n", __func__, ret);
- return ret;
-}
-
static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_call_sync_data *data = calldata;
- struct nfs4_session *session = nfs4_get_session(data->seq_server);
dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
- nfs41_setup_sequence(session, data->seq_args, data->seq_res, task);
+ nfs4_setup_sequence(data->seq_server->nfs_client,
+ data->seq_args, data->seq_res, task);
}
static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
@@ -993,15 +855,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
#else /* !CONFIG_NFS_V4_1 */
-static int nfs4_setup_sequence(const struct nfs_server *server,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res,
- struct rpc_task *task)
-{
- return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
- args, res, task);
-}
-
static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
{
return nfs40_sequence_done(task, res);
@@ -1022,10 +875,68 @@ EXPORT_SYMBOL_GPL(nfs4_sequence_done);
#endif /* !CONFIG_NFS_V4_1 */
+int nfs4_setup_sequence(const struct nfs_client *client,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ struct rpc_task *task)
+{
+ struct nfs4_session *session = nfs4_get_session(client);
+ struct nfs4_slot_table *tbl = client->cl_slot_tbl;
+ struct nfs4_slot *slot;
+
+ /* slot already allocated? */
+ if (res->sr_slot != NULL)
+ goto out_start;
+
+ if (session) {
+ tbl = &session->fc_slot_table;
+ task->tk_timeout = 0;
+ }
+
+ spin_lock(&tbl->slot_tbl_lock);
+ /* The state manager will wait until the slot table is empty */
+ if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+ goto out_sleep;
+
+ slot = nfs4_alloc_slot(tbl);
+ if (IS_ERR(slot)) {
+ /* Try again in 1/4 second */
+ if (slot == ERR_PTR(-ENOMEM))
+ task->tk_timeout = HZ >> 2;
+ goto out_sleep;
+ }
+ spin_unlock(&tbl->slot_tbl_lock);
+
+ slot->privileged = args->sa_privileged ? 1 : 0;
+ args->sa_slot = slot;
+
+ res->sr_slot = slot;
+ if (session) {
+ res->sr_timestamp = jiffies;
+ res->sr_status_flags = 0;
+ res->sr_status = 1;
+ }
+
+ trace_nfs4_setup_sequence(session, args);
+out_start:
+ rpc_call_start(task);
+ return 0;
+
+out_sleep:
+ if (args->sa_privileged)
+ rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
+ NULL, RPC_PRIORITY_PRIVILEGED);
+ else
+ rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+ spin_unlock(&tbl->slot_tbl_lock);
+ return -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
+
static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_call_sync_data *data = calldata;
- nfs4_setup_sequence(data->seq_server,
+ nfs4_setup_sequence(data->seq_server->nfs_client,
data->seq_args, data->seq_res, task);
}
@@ -1330,14 +1241,6 @@ static void nfs4_opendata_put(struct nfs4_opendata *p)
kref_put(&p->kref, nfs4_opendata_free);
}
-static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
-{
- int ret;
-
- ret = rpc_wait_for_completion_task(task);
- return ret;
-}
-
static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
fmode_t fmode)
{
@@ -1732,17 +1635,15 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
int ret;
if (!data->rpc_done) {
- if (data->rpc_status) {
- ret = data->rpc_status;
- goto err;
- }
+ if (data->rpc_status)
+ return ERR_PTR(data->rpc_status);
/* cached opens have already been processed */
goto update;
}
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
- goto err;
+ return ERR_PTR(ret);
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
@@ -1752,9 +1653,6 @@ update:
atomic_inc(&state->count);
return state;
-err:
- return ERR_PTR(ret);
-
}
static struct nfs4_state *
@@ -2048,8 +1946,8 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
- nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl,
- &data->c_arg.seq_args, &data->c_res.seq_res, task);
+ nfs4_setup_sequence(data->o_arg.server->nfs_client,
+ &data->c_arg.seq_args, &data->c_res.seq_res, task);
}
static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
@@ -2124,7 +2022,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
- status = nfs4_wait_for_completion_rpc_task(task);
+ status = rpc_wait_for_completion_task(task);
if (status != 0) {
data->cancelled = 1;
smp_wmb();
@@ -2172,7 +2070,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
}
data->timestamp = jiffies;
- if (nfs4_setup_sequence(data->o_arg.server,
+ if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
&data->o_arg.seq_args,
&data->o_res.seq_res,
task) != 0)
@@ -2289,15 +2187,15 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
data->is_recover = 1;
}
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
- status = nfs4_wait_for_completion_rpc_task(task);
- if (status != 0) {
- data->cancelled = 1;
- smp_wmb();
- } else
- status = data->rpc_status;
- rpc_put_task(task);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = rpc_wait_for_completion_task(task);
+ if (status != 0) {
+ data->cancelled = 1;
+ smp_wmb();
+ } else
+ status = data->rpc_status;
+ rpc_put_task(task);
return status;
}
@@ -2306,7 +2204,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
{
struct inode *dir = d_inode(data->dir);
struct nfs_openres *o_res = &data->o_res;
- int status;
+ int status;
status = nfs4_run_open_task(data, 1);
if (status != 0 || !data->rpc_done)
@@ -2314,11 +2212,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
- if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
+ if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM)
status = _nfs4_proc_open_confirm(data);
- if (status != 0)
- return status;
- }
return status;
}
@@ -2412,11 +2307,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
return 0;
}
-static int nfs4_recover_expired_lease(struct nfs_server *server)
-{
- return nfs4_client_recover_expired_lease(server->nfs_client);
-}
-
/*
* OPEN_EXPIRED:
* reclaim state on the server after a network partition.
@@ -2730,6 +2620,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
ret = PTR_ERR(state);
if (IS_ERR(state))
goto out;
+ ctx->state = state;
if (server->caps & NFS_CAP_POSIX_LOCK)
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK)
@@ -2755,7 +2646,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (ret != 0)
goto out;
- ctx->state = state;
if (d_inode(dentry) == state->inode) {
nfs_inode_attach_open_context(ctx);
if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -2794,7 +2684,7 @@ static int _nfs4_do_open(struct inode *dir,
dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
goto out_err;
}
- status = nfs4_recover_expired_lease(server);
+ status = nfs4_client_recover_expired_lease(server->nfs_client);
if (status != 0)
goto err_put_state_owner;
if (d_really_is_positive(dentry))
@@ -2940,12 +2830,12 @@ static int _nfs4_do_setattr(struct inode *inode,
struct nfs_open_context *ctx)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct rpc_message msg = {
+ struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
.rpc_argp = arg,
.rpc_resp = res,
.rpc_cred = cred,
- };
+ };
struct rpc_cred *delegation_cred = NULL;
unsigned long timestamp = jiffies;
fmode_t fmode;
@@ -2993,18 +2883,18 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_state *state = ctx ? ctx->state : NULL;
- struct nfs_setattrargs arg = {
- .fh = NFS_FH(inode),
- .iap = sattr,
+ struct nfs_setattrargs arg = {
+ .fh = NFS_FH(inode),
+ .iap = sattr,
.server = server,
.bitmask = server->attr_bitmask,
.label = ilabel,
- };
- struct nfs_setattrres res = {
+ };
+ struct nfs_setattrres res = {
.fattr = fattr,
.label = olabel,
.server = server,
- };
+ };
struct nfs4_exception exception = {
.state = state,
.inode = inode,
@@ -3118,7 +3008,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
}
}
- /* hmm. we are done with the inode, and in the process of freeing
+ /* hmm. we are done with the inode, and in the process of freeing
* the state_owner. we keep this around to process errors
*/
switch (task->tk_status) {
@@ -3234,7 +3124,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
else if (calldata->arg.bitmask == NULL)
calldata->res.fattr = NULL;
calldata->timestamp = jiffies;
- if (nfs4_setup_sequence(NFS_SERVER(inode),
+ if (nfs4_setup_sequence(NFS_SERVER(inode)->nfs_client,
&calldata->arg.seq_args,
&calldata->res.seq_res,
task) != 0)
@@ -3522,16 +3412,11 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
.pseudoflavor = flavor,
};
struct rpc_auth *auth;
- int ret;
auth = rpcauth_create(&auth_args, server->client);
- if (IS_ERR(auth)) {
- ret = -EACCES;
- goto out;
- }
- ret = nfs4_lookup_root(server, fhandle, info);
-out:
- return ret;
+ if (IS_ERR(auth))
+ return -EACCES;
+ return nfs4_lookup_root(server, fhandle, info);
}
/*
@@ -4114,7 +3999,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
{
- nfs4_setup_sequence(NFS_SB(data->dentry->d_sb),
+ nfs4_setup_sequence(NFS_SB(data->dentry->d_sb)->nfs_client,
&data->args.seq_args,
&data->res.seq_res,
task);
@@ -4148,7 +4033,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
{
- nfs4_setup_sequence(NFS_SERVER(data->old_dir),
+ nfs4_setup_sequence(NFS_SERVER(data->old_dir)->nfs_client,
&data->args.seq_args,
&data->res.seq_res,
task);
@@ -4723,7 +4608,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
- if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
+ if (nfs4_setup_sequence(NFS_SERVER(hdr->inode)->nfs_client,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
@@ -4822,7 +4707,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
- nfs4_setup_sequence(NFS_SERVER(data->inode),
+ nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
&data->args.seq_args,
&data->res.seq_res,
task);
@@ -4975,8 +4860,8 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
if (newpage == NULL)
goto unwind;
memcpy(page_address(newpage), buf, len);
- buf += len;
- buflen -= len;
+ buf += len;
+ buflen -= len;
*pages++ = newpage;
rc++;
} while (buflen != 0);
@@ -5069,7 +4954,7 @@ out:
*/
static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+ struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
.acl_pages = pages,
@@ -5083,13 +4968,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
.rpc_argp = &args,
.rpc_resp = &res,
};
- unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
+ unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
int ret = -ENOMEM, i;
- /* As long as we're doing a round trip to the server anyway,
- * let's be prepared for a page of acl data. */
- if (npages == 0)
- npages = 1;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
@@ -5299,8 +5180,8 @@ static int _nfs4_do_set_security_label(struct inode *inode,
struct nfs_server *server = NFS_SERVER(inode);
const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
struct nfs_setattrargs arg = {
- .fh = NFS_FH(inode),
- .iap = &sattr,
+ .fh = NFS_FH(inode),
+ .iap = &sattr,
.server = server,
.bitmask = bitmask,
.label = ilabel,
@@ -5311,9 +5192,9 @@ static int _nfs4_do_set_security_label(struct inode *inode,
.server = server,
};
struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
- .rpc_argp = &arg,
- .rpc_resp = &res,
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
};
int status;
@@ -5747,7 +5628,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
return;
- nfs4_setup_sequence(d_data->res.server,
+ nfs4_setup_sequence(d_data->res.server->nfs_client,
&d_data->args.seq_args,
&d_data->res.seq_res,
task);
@@ -5817,7 +5698,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
return PTR_ERR(task);
if (!issync)
goto out;
- status = nfs4_wait_for_completion_rpc_task(task);
+ status = rpc_wait_for_completion_task(task);
if (status != 0)
goto out;
status = data->rpc_status;
@@ -5859,8 +5740,8 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT],
- .rpc_argp = &arg,
- .rpc_resp = &res,
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
.rpc_cred = state->owner->so_cred,
};
struct nfs4_lock_state *lsp;
@@ -5989,7 +5870,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
goto out_no_action;
}
calldata->timestamp = jiffies;
- if (nfs4_setup_sequence(calldata->server,
+ if (nfs4_setup_sequence(calldata->server->nfs_client,
&calldata->arg.seq_args,
&calldata->res.seq_res,
task) != 0)
@@ -6087,7 +5968,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
status = PTR_ERR(task);
if (IS_ERR(task))
goto out;
- status = nfs4_wait_for_completion_rpc_task(task);
+ status = rpc_wait_for_completion_task(task);
rpc_put_task(task);
out:
request->fl_flags = fl_flags;
@@ -6174,7 +6055,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
goto out_release_open_seqid;
}
data->timestamp = jiffies;
- if (nfs4_setup_sequence(data->server,
+ if (nfs4_setup_sequence(data->server->nfs_client,
&data->arg.seq_args,
&data->res.seq_res,
task) == 0)
@@ -6314,7 +6195,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
- ret = nfs4_wait_for_completion_rpc_task(task);
+ ret = rpc_wait_for_completion_task(task);
if (ret == 0) {
ret = data->rpc_status;
if (ret)
@@ -6393,8 +6274,7 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) ||
test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
return 0;
- status = nfs4_lock_expired(state, request);
- return status;
+ return nfs4_lock_expired(state, request);
}
#endif
@@ -6640,8 +6520,8 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata
{
struct nfs_release_lockowner_data *data = calldata;
struct nfs_server *server = data->server;
- nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
- &data->args.seq_args, &data->res.seq_res, task);
+ nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
+ &data->res.seq_res, task);
data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
data->timestamp = jiffies;
}
@@ -7232,11 +7112,9 @@ static bool
nfs41_same_server_scope(struct nfs41_server_scope *a,
struct nfs41_server_scope *b)
{
- if (a->server_scope_sz == b->server_scope_sz &&
- memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
- return true;
-
- return false;
+ if (a->server_scope_sz != b->server_scope_sz)
+ return false;
+ return memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0;
}
static void
@@ -7831,7 +7709,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
dprintk("--> %s\n", __func__);
/* just setup sequence, do not trigger session recovery
since we're invoked within one */
- nfs41_setup_sequence(data->clp->cl_session,
+ nfs4_setup_sequence(data->clp,
&data->args->la_seq_args,
&data->res->lr_seq_res,
task);
@@ -8202,7 +8080,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
args = task->tk_msg.rpc_argp;
res = task->tk_msg.rpc_resp;
- nfs41_setup_sequence(clp->cl_session, args, res, task);
+ nfs4_setup_sequence(clp, args, res, task);
}
static const struct rpc_call_ops nfs41_sequence_ops = {
@@ -8290,7 +8168,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
{
struct nfs4_reclaim_complete_data *calldata = data;
- nfs41_setup_sequence(calldata->clp->cl_session,
+ nfs4_setup_sequence(calldata->clp,
&calldata->arg.seq_args,
&calldata->res.seq_res,
task);
@@ -8382,7 +8260,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
status = PTR_ERR(task);
goto out;
}
- status = nfs4_wait_for_completion_rpc_task(task);
+ status = rpc_wait_for_completion_task(task);
if (status == 0)
status = task->tk_status;
rpc_put_task(task);
@@ -8397,10 +8275,9 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
- struct nfs4_session *session = nfs4_get_session(server);
dprintk("--> %s\n", __func__);
- nfs41_setup_sequence(session, &lgp->args.seq_args,
+ nfs4_setup_sequence(server->nfs_client, &lgp->args.seq_args,
&lgp->res.seq_res, task);
dprintk("<-- %s\n", __func__);
}
@@ -8615,7 +8492,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return ERR_CAST(task);
- status = nfs4_wait_for_completion_rpc_task(task);
+ status = rpc_wait_for_completion_task(task);
if (status == 0) {
status = nfs4_layoutget_handle_exception(task, lgp, &exception);
*timeout = exception.timeout;
@@ -8644,7 +8521,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s\n", __func__);
- nfs41_setup_sequence(lrp->clp->cl_session,
+ nfs4_setup_sequence(lrp->clp,
&lrp->args.seq_args,
&lrp->res.seq_res,
task);
@@ -8794,9 +8671,8 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
struct nfs_server *server = NFS_SERVER(data->args.inode);
- struct nfs4_session *session = nfs4_get_session(server);
- nfs41_setup_sequence(session,
+ nfs4_setup_sequence(server->nfs_client,
&data->args.seq_args,
&data->res.seq_res,
task);
@@ -9120,7 +8996,7 @@ struct nfs_free_stateid_data {
static void nfs41_free_stateid_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_free_stateid_data *data = calldata;
- nfs41_setup_sequence(nfs4_get_session(data->server),
+ nfs4_setup_sequence(data->server->nfs_client,
&data->args.seq_args,
&data->res.seq_res,
task);
@@ -9232,10 +9108,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1,
if (s1->seqid == s2->seqid)
return true;
- if (s1->seqid == 0 || s2->seqid == 0)
- return true;
- return false;
+ return s1->seqid == 0 || s2->seqid == 0;
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 82e7719..1f8c2ae 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -153,7 +153,7 @@ void nfs4_set_lease_period(struct nfs_client *clp,
spin_unlock(&clp->cl_lock);
/* Cap maximum reconnect timeout at 1/2 lease period */
- rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+ rpc_set_connect_timeout(clp->cl_rpcclient, lease, lease >> 1);
}
/*
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index dae3855..dfae488 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -103,6 +103,11 @@ static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl,
return !!test_bit(slotid, tbl->used_slots);
}
+static inline struct nfs4_session *nfs4_get_session(const struct nfs_client *clp)
+{
+ return clp->cl_session;
+}
+
#if defined(CONFIG_NFS_V4_1)
extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
u32 target_highest_slotid);
@@ -170,6 +175,8 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
return 0;
}
+#define nfs_session_id_hash(session) (0)
+
#endif /* defined(CONFIG_NFS_V4_1) */
#endif /* IS_ENABLED(CONFIG_NFS_V4) */
#endif /* __LINUX_FS_NFS_NFS4SESSION_H */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index daeb94e..8156bad 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -868,7 +868,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
for(;;) {
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, owner, 0);
+ lsp = __nfs4_find_lock_state(state, owner, NULL);
if (lsp != NULL)
break;
if (new != NULL) {
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index cfb8f7c..845d0ea 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -241,38 +241,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
-TRACE_EVENT(nfs4_setup_sequence,
- TP_PROTO(
- const struct nfs4_session *session,
- const struct nfs4_sequence_args *args
- ),
- TP_ARGS(session, args),
-
- TP_STRUCT__entry(
- __field(unsigned int, session)
- __field(unsigned int, slot_nr)
- __field(unsigned int, seq_nr)
- __field(unsigned int, highest_used_slotid)
- ),
-
- TP_fast_assign(
- const struct nfs4_slot *sa_slot = args->sa_slot;
- __entry->session = nfs_session_id_hash(&session->sess_id);
- __entry->slot_nr = sa_slot->slot_nr;
- __entry->seq_nr = sa_slot->seq_nr;
- __entry->highest_used_slotid =
- sa_slot->table->highest_used_slotid;
- ),
- TP_printk(
- "session=0x%08x slot_nr=%u seq_nr=%u "
- "highest_used_slotid=%u",
- __entry->session,
- __entry->slot_nr,
- __entry->seq_nr,
- __entry->highest_used_slotid
- )
-);
-
#define show_nfs4_sequence_status_flags(status) \
__print_flags((unsigned long)status, "|", \
{ SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
@@ -382,6 +350,38 @@ TRACE_EVENT(nfs4_cb_sequence,
);
#endif /* CONFIG_NFS_V4_1 */
+TRACE_EVENT(nfs4_setup_sequence,
+ TP_PROTO(
+ const struct nfs4_session *session,
+ const struct nfs4_sequence_args *args
+ ),
+ TP_ARGS(session, args),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, session)
+ __field(unsigned int, slot_nr)
+ __field(unsigned int, seq_nr)
+ __field(unsigned int, highest_used_slotid)
+ ),
+
+ TP_fast_assign(
+ const struct nfs4_slot *sa_slot = args->sa_slot;
+ __entry->session = session ? nfs_session_id_hash(&session->sess_id) : 0;
+ __entry->slot_nr = sa_slot->slot_nr;
+ __entry->seq_nr = sa_slot->seq_nr;
+ __entry->highest_used_slotid =
+ sa_slot->table->highest_used_slotid;
+ ),
+ TP_printk(
+ "session=0x%08x slot_nr=%u seq_nr=%u "
+ "highest_used_slotid=%u",
+ __entry->session,
+ __entry->slot_nr,
+ __entry->seq_nr,
+ __entry->highest_used_slotid
+ )
+);
+
DECLARE_EVENT_CLASS(nfs4_open_event,
TP_PROTO(
const struct nfs_open_context *ctx,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e9255cb..f0369e3 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -169,8 +169,10 @@ static int nfs4_stat_to_errno(int);
open_owner_id_maxsz + \
encode_opentype_maxsz + \
encode_claim_null_maxsz)
+#define decode_space_limit_maxsz (3)
#define decode_ace_maxsz (3 + nfs4_owner_maxsz)
#define decode_delegation_maxsz (1 + decode_stateid_maxsz + 1 + \
+ decode_space_limit_maxsz + \
decode_ace_maxsz)
#define decode_change_info_maxsz (5)
#define decode_open_maxsz (op_decode_hdr_maxsz + \
@@ -924,34 +926,22 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
{
- __be32 *p;
-
- p = xdr_reserve_space(xdr, len);
- xdr_encode_opaque_fixed(p, buf, len);
+ WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
}
static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4 + len);
- xdr_encode_opaque(p, str, len);
+ WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0);
}
static void encode_uint32(struct xdr_stream *xdr, u32 n)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(n);
+ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
}
static void encode_uint64(struct xdr_stream *xdr, u64 n)
{
- __be32 *p;
-
- p = reserve_space(xdr, 8);
- xdr_encode_hyper(p, n);
+ WARN_ON_ONCE(xdr_stream_encode_u64(xdr, n) < 0);
}
static void encode_nfs4_seqid(struct xdr_stream *xdr,
@@ -2524,7 +2514,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 1;
+ replen = hdr.replen + op_decode_hdr_maxsz;
encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
@@ -3062,20 +3052,15 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
{
- __be32 *p;
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- *len = be32_to_cpup(p);
- p = xdr_inline_decode(xdr, *len);
- if (unlikely(!p))
- goto out_overflow;
- *string = (char *)p;
+ ssize_t ret = xdr_stream_decode_opaque_inline(xdr, (void **)string,
+ NFS4_OPAQUE_LIMIT);
+ if (unlikely(ret < 0)) {
+ if (ret == -EBADMSG)
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+ }
+ *len = ret;
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}
static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
@@ -3142,7 +3127,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
}
/* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace)
{
__be32 *p;
unsigned int strlen;
@@ -3890,45 +3875,50 @@ out_overflow:
return -EIO;
}
+static ssize_t decode_nfs4_string(struct xdr_stream *xdr,
+ struct nfs4_string *name, gfp_t gfp_flags)
+{
+ ssize_t ret;
+
+ ret = xdr_stream_decode_string_dup(xdr, &name->data,
+ XDR_MAX_NETOBJ, gfp_flags);
+ name->len = 0;
+ if (ret > 0)
+ name->len = ret;
+ return ret;
+}
+
static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
const struct nfs_server *server, kuid_t *uid,
struct nfs4_string *owner_name)
{
- uint32_t len;
- __be32 *p;
- int ret = 0;
+ ssize_t len;
+ char *p;
*uid = make_kuid(&init_user_ns, -2);
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
return -EIO;
- if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- len = be32_to_cpup(p);
- p = xdr_inline_decode(xdr, len);
- if (unlikely(!p))
- goto out_overflow;
- if (owner_name != NULL) {
- owner_name->data = kmemdup(p, len, GFP_NOWAIT);
- if (owner_name->data != NULL) {
- owner_name->len = len;
- ret = NFS_ATTR_FATTR_OWNER_NAME;
- }
- } else if (len < XDR_MAX_NETOBJ) {
- if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
- ret = NFS_ATTR_FATTR_OWNER;
- else
- dprintk("%s: nfs_map_name_to_uid failed!\n",
- __func__);
- } else
- dprintk("%s: name too long (%u)!\n",
- __func__, len);
- bitmap[1] &= ~FATTR4_WORD1_OWNER;
+ if (!(bitmap[1] & FATTR4_WORD1_OWNER))
+ return 0;
+ bitmap[1] &= ~FATTR4_WORD1_OWNER;
+
+ if (owner_name != NULL) {
+ len = decode_nfs4_string(xdr, owner_name, GFP_NOWAIT);
+ if (len <= 0)
+ goto out;
+ dprintk("%s: name=%s\n", __func__, owner_name->data);
+ return NFS_ATTR_FATTR_OWNER_NAME;
+ } else {
+ len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
+ XDR_MAX_NETOBJ);
+ if (len <= 0 || nfs_map_name_to_uid(server, p, len, uid) != 0)
+ goto out;
+ dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
+ return NFS_ATTR_FATTR_OWNER;
}
- dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
- return ret;
-out_overflow:
+out:
+ if (len != -EBADMSG)
+ return 0;
print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -3937,41 +3927,33 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
const struct nfs_server *server, kgid_t *gid,
struct nfs4_string *group_name)
{
- uint32_t len;
- __be32 *p;
- int ret = 0;
+ ssize_t len;
+ char *p;
*gid = make_kgid(&init_user_ns, -2);
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
return -EIO;
- if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- len = be32_to_cpup(p);
- p = xdr_inline_decode(xdr, len);
- if (unlikely(!p))
- goto out_overflow;
- if (group_name != NULL) {
- group_name->data = kmemdup(p, len, GFP_NOWAIT);
- if (group_name->data != NULL) {
- group_name->len = len;
- ret = NFS_ATTR_FATTR_GROUP_NAME;
- }
- } else if (len < XDR_MAX_NETOBJ) {
- if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
- ret = NFS_ATTR_FATTR_GROUP;
- else
- dprintk("%s: nfs_map_group_to_gid failed!\n",
- __func__);
- } else
- dprintk("%s: name too long (%u)!\n",
- __func__, len);
- bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
+ if (!(bitmap[1] & FATTR4_WORD1_OWNER_GROUP))
+ return 0;
+ bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
+
+ if (group_name != NULL) {
+ len = decode_nfs4_string(xdr, group_name, GFP_NOWAIT);
+ if (len <= 0)
+ goto out;
+ dprintk("%s: name=%s\n", __func__, group_name->data);
+ return NFS_ATTR_FATTR_OWNER_NAME;
+ } else {
+ len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
+ XDR_MAX_NETOBJ);
+ if (len <= 0 || nfs_map_group_to_gid(server, p, len, gid) != 0)
+ goto out;
+ dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
+ return NFS_ATTR_FATTR_GROUP;
}
- dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
- return ret;
-out_overflow:
+out:
+ if (len != -EBADMSG)
+ return 0;
print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -4294,15 +4276,12 @@ out_overflow:
static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
{
- __be32 *p;
-
- p = xdr_inline_decode(xdr, len);
- if (likely(p)) {
- memcpy(buf, p, len);
- return 0;
+ ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
+ if (unlikely(ret < 0)) {
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
- print_overflow_msg(__func__, xdr);
- return -EIO;
+ return 0;
}
static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
@@ -5093,7 +5072,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
if (decode_space_limit(xdr, &res->pagemod_limit) < 0)
return -EIO;
}
- return decode_ace(xdr, NULL, res->server->nfs_client);
+ return decode_ace(xdr, NULL);
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
@@ -5660,8 +5639,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
- if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
- return -EIO;
memcpy(res->server_owner->major_id, dummy_str, dummy);
res->server_owner->major_id_sz = dummy;
@@ -5669,8 +5646,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
- if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
- return -EIO;
memcpy(res->server_scope->server_scope, dummy_str, dummy);
res->server_scope->server_scope_sz = dummy;
@@ -5685,16 +5660,12 @@ static int decode_exchange_id(struct xdr_stream *xdr,
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
- if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
- return -EIO;
memcpy(res->impl_id->domain, dummy_str, dummy);
/* nii_name */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
- if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
- return -EIO;
memcpy(res->impl_id->name, dummy_str, dummy);
/* nii_date */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 6bca178..54e0f9f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -531,39 +531,32 @@ static void nfs_show_mountd_netid(struct seq_file *m, struct nfs_server *nfss,
int showdefaults)
{
struct sockaddr *sap = (struct sockaddr *) &nfss->mountd_address;
+ char *proto = NULL;
- seq_printf(m, ",mountproto=");
switch (sap->sa_family) {
case AF_INET:
switch (nfss->mountd_protocol) {
case IPPROTO_UDP:
- seq_printf(m, RPCBIND_NETID_UDP);
+ proto = RPCBIND_NETID_UDP;
break;
case IPPROTO_TCP:
- seq_printf(m, RPCBIND_NETID_TCP);
+ proto = RPCBIND_NETID_TCP;
break;
- default:
- if (showdefaults)
- seq_printf(m, "auto");
}
break;
case AF_INET6:
switch (nfss->mountd_protocol) {
case IPPROTO_UDP:
- seq_printf(m, RPCBIND_NETID_UDP6);
+ proto = RPCBIND_NETID_UDP6;
break;
case IPPROTO_TCP:
- seq_printf(m, RPCBIND_NETID_TCP6);
+ proto = RPCBIND_NETID_TCP6;
break;
- default:
- if (showdefaults)
- seq_printf(m, "auto");
}
break;
- default:
- if (showdefaults)
- seq_printf(m, "auto");
}
+ if (proto || showdefaults)
+ seq_printf(m, ",mountproto=%s", proto ?: "auto");
}
static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0060685..e75b056 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1785,7 +1785,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
if (status < 0) {
nfs_context_set_write_error(req->wb_context, status);
nfs_inode_remove_request(req);
- dprintk(", error = %d\n", status);
+ dprintk_cont(", error = %d\n", status);
goto next;
}
@@ -1794,11 +1794,11 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
/* We have a match */
nfs_inode_remove_request(req);
- dprintk(" OK\n");
+ dprintk_cont(" OK\n");
goto next;
}
/* We have a mismatch. Write the page again */
- dprintk(" mismatch\n");
+ dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
next:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 43e109c..e71f11b 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1102,6 +1102,7 @@ static struct flags {
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}},
+ { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
{ 0, {"", ""}}
};
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index d08cd88..838f90f 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -376,5 +376,4 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 0c89034..dcb5f79 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -266,6 +266,5 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index d818e4f..045c908 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nfserr = nfsd_write(rqstp, &resp->fh, NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &resp->committed);
+ nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
+ rqstp->rq_vec, argp->vlen,
+ &cnt, resp->committed);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index eb78109..0274db6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, length + 4);
if (unlikely(p == NULL))
goto out_overflow;
+ p += XDR_QUADLEN(length);
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_callback *cb)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
- struct nfs4_sessionid id;
- int status;
+ int status = -ESERVERFAULT;
__be32 *p;
u32 dummy;
- status = -ESERVERFAULT;
-
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
- memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
- if (memcmp(id.data, session->se_sessionid.data,
- NFS4_MAX_SESSIONID_LEN) != 0) {
+
+ if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
dprintk("NFS: %s Invalid session id\n", __func__);
goto out;
}
@@ -753,6 +750,14 @@ int set_callback_cred(void)
return 0;
}
+void cleanup_callback_cred(void)
+{
+ if (callback_cred) {
+ put_rpccred(callback_cred);
+ callback_cred = NULL;
+ }
+}
+
static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b20577..6b9b6cc 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
+
+ if (name == NULL || namelen == 0)
+ return nfserr_inval;
+
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
*uid = make_kuid(&init_user_ns, id);
if (!uid_valid(*uid))
@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
+
+ if (name == NULL || namelen == 0)
+ return nfserr_inval;
+
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
*gid = make_kgid(&init_user_ns, id);
if (!gid_valid(*gid))
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 74a6e57..cbeeda1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u32 *bmval, u32 *writable)
{
struct dentry *dentry = cstate->current_fh.fh_dentry;
+ struct svc_export *exp = cstate->current_fh.fh_export;
if (!nfsd_attrs_supported(cstate->minorversion, bmval))
return nfserr_attrnotsupp;
if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
return nfserr_attrnotsupp;
+ if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
+ !(exp->ex_flags & NFSEXP_SECURITY_LABEL))
+ return nfserr_attrnotsupp;
if (writable && !bmval_is_subset(bmval, writable))
return nfserr_inval;
if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- &write->wr_how_written);
+ write->wr_how_written);
fput(filp);
write->wr_bytes_written = cnt;
@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
}
+static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ /* ac_supported, ac_resp_access */
+ return (op_encode_hdr_size + 2)* sizeof(__be32);
+}
+
static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
return ret;
}
+static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
+}
+
static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
XDR_QUADLEN(rlen)) * sizeof(__be32);
}
+static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
+}
+
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
}
+static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
+ * sizeof(__be32);
+}
+
static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
}
+static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
+ (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
+}
+
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
}
#ifdef CONFIG_NFSD_PNFS
+static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = 0, rlen = 0;
+
+ maxcount = svc_max_payload(rqstp);
+ rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+
+ return (op_encode_hdr_size +
+ 1 /* gd_layout_type*/ +
+ XDR_QUADLEN(rlen) +
+ 2 /* gd_notify_types */) * sizeof(__be32);
+}
+
/*
* At this stage we don't really know what layout driver will handle the request,
* so we need to define an arbitrary upper bound here.
@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
}
#endif /* CONFIG_NFSD_PNFS */
+
+static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 3) * sizeof(__be32);
+}
+
static struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
.op_name = "OP_ACCESS",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh,
.op_name = "OP_GETFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
.op_name = "OP_LOCKT",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUP",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUPP",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify,
.op_name = "OP_NVERIFY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
.op_name = "OP_READLINK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
.op_name = "OP_VERIFY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_TEST_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_test_stateid,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_TEST_STATEID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
},
[OP_FREE_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_free_stateid,
@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
},
[OP_LAYOUTGET] = {
.op_func = (nfsd4op_func)nfsd4_layoutget,
@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
},
};
@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
- struct nfsd4_operation *opdesc;
- nfsd4op_rsize estimator;
-
if (op->opnum == OP_ILLEGAL)
return op_encode_hdr_size * sizeof(__be32);
- opdesc = OPDESC(op);
- estimator = opdesc->op_rsize_bop;
- return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+
+ BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
+ return OPDESC(op)->op_rsize_bop(rqstp, op);
}
void warn_on_nonidempotent_op(struct nfsd4_op *op)
@@ -2476,12 +2537,13 @@ static struct svc_procedure nfsd_procedures4[2] = {
};
struct svc_version nfsd_version4 = {
- .vs_vers = 4,
- .vs_nproc = 2,
- .vs_proc = nfsd_procedures4,
- .vs_dispatch = nfsd_dispatch,
- .vs_xdrsize = NFS4_SVC_XDRSIZE,
- .vs_rpcb_optnl = 1,
+ .vs_vers = 4,
+ .vs_nproc = 2,
+ .vs_proc = nfsd_procedures4,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS4_SVC_XDRSIZE,
+ .vs_rpcb_optnl = true,
+ .vs_need_cong_ctrl = true,
};
/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a0dee8a..e9ef50a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0;
- dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+ dprintk("NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
ret = set_callback_cred();
if (ret)
- return -ENOMEM;
+ return ret;
+
laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
if (laundry_wq == NULL) {
ret = -ENOMEM;
- goto out_recovery;
+ goto out_cleanup_cred;
}
ret = nfsd4_create_callback_queue();
if (ret)
goto out_free_laundry;
set_max_delegations();
-
return 0;
out_free_laundry:
destroy_workqueue(laundry_wq);
-out_recovery:
+out_cleanup_cred:
+ cleanup_callback_cred();
return ret;
}
@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
{
destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue();
+ cleanup_callback_cred();
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 8fae53c..33017d6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -58,7 +58,7 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
-u32 nfsd_suppattrs[3][3] = {
+const u32 nfsd_suppattrs[3][3] = {
{NFSD4_SUPPORTED_ATTRS_WORD0,
NFSD4_SUPPORTED_ATTRS_WORD1,
NFSD4_SUPPORTED_ATTRS_WORD2},
@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
READ_BUF(16);
p = xdr_decode_hyper(p, &write->wr_offset);
write->wr_stable_how = be32_to_cpup(p++);
- if (write->wr_stable_how > 2)
+ if (write->wr_stable_how > NFS_FILE_SYNC)
goto xdr_error;
write->wr_buflen = be32_to_cpup(p++);
@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
} else
max_reply += nfsd4_max_reply(argp->rqstp, op);
/*
- * OP_LOCK may return a conflicting lock. (Special case
- * because it will just skip encoding this if it runs
- * out of xdr buffer space, and it is the only operation
- * that behaves this way.)
+ * OP_LOCK and OP_LOCKT may return a conflicting lock.
+ * (Special case because it will just skip encoding this
+ * if it runs out of xdr buffer space, and it is the only
+ * operation that behaves this way.)
*/
- if (op->opnum == OP_LOCK)
+ if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
max_reply += NFS4_OPAQUE_LIMIT;
if (op->status) {
@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
DECODE_TAIL;
}
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+ struct svc_export *exp)
{
- if (IS_I_VERSION(inode)) {
+ if (exp->ex_flags & NFSEXP_V4ROOT) {
+ *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
+ *p++ = 0;
+ } else if (IS_I_VERSION(inode)) {
p = xdr_encode_hyper(p, inode->i_version);
} else {
*p++ = cpu_to_be32(stat->ctime.tv_sec);
@@ -2297,7 +2301,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
if (path.dentry != path.mnt->mnt_root)
break;
}
- err = vfs_getattr(&path, stat);
+ err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
path_put(&path);
return err;
}
@@ -2381,7 +2385,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
goto out;
}
- err = vfs_getattr(&path, &stat);
+ err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
- err = security_inode_getsecctx(d_inode(dentry),
+ if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
+ err = security_inode_getsecctx(d_inode(dentry),
&context, &contextlen);
+ else
+ err = -EOPNOTSUPP;
contextsupport = (err == 0);
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 8);
if (!p)
goto out_resource;
- p = encode_change(p, &stat, d_inode(dentry));
+ p = encode_change(p, &stat, d_inode(dentry), exp);
}
if (bmval0 & FATTR4_WORD0_SIZE) {
p = xdr_reserve_space(xdr, 8);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f3b2f34..73e75ac 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -536,6 +536,19 @@ out_free:
return rv;
}
+static ssize_t
+nfsd_print_version_support(char *buf, int remaining, const char *sep,
+ unsigned vers, unsigned minor)
+{
+ const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+ bool supported = !!nfsd_vers(vers, NFSD_TEST);
+
+ if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+ supported = false;
+ return snprintf(buf, remaining, format, sep,
+ supported ? '+' : '-', vers, minor);
+}
+
static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, vers, size);
if (len <= 0) return -EINVAL;
do {
+ enum vers_op cmd;
sign = *vers;
if (sign == '+' || sign == '-')
num = simple_strtol((vers+1), &minorp, 0);
@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (*minorp == '.') {
if (num != 4)
return -EINVAL;
- minor = simple_strtoul(minorp+1, NULL, 0);
- if (minor == 0)
- return -EINVAL;
- if (nfsd_minorversion(minor, sign == '-' ?
- NFSD_CLEAR : NFSD_SET) < 0)
+ if (kstrtouint(minorp+1, 0, &minor) < 0)
return -EINVAL;
- goto next;
- }
+ } else
+ minor = 0;
+ cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
switch(num) {
case 2:
case 3:
- case 4:
- nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
+ nfsd_vers(num, cmd);
break;
+ case 4:
+ if (nfsd_minorversion(minor, cmd) >= 0)
+ break;
default:
return -EINVAL;
}
- next:
vers += len + 1;
} while ((len = qword_get(&mesg, vers, size)) > 0);
/* If all get turned off, turn them back on, as
@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = 0;
sep = "";
remaining = SIMPLE_TRANSACTION_LIMIT;
- for (num=2 ; num <= 4 ; num++)
- if (nfsd_vers(num, NFSD_AVAIL)) {
- len = snprintf(buf, remaining, "%s%c%d", sep,
- nfsd_vers(num, NFSD_TEST)?'+':'-',
- num);
- sep = " ";
-
- if (len >= remaining)
- break;
- remaining -= len;
- buf += len;
- tlen += len;
- }
- if (nfsd_vers(4, NFSD_AVAIL))
- for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
- minor++) {
- len = snprintf(buf, remaining, " %c4.%u",
- (nfsd_vers(4, NFSD_TEST) &&
- nfsd_minorversion(minor, NFSD_TEST)) ?
- '+' : '-',
- minor);
-
+ for (num=2 ; num <= 4 ; num++) {
+ if (!nfsd_vers(num, NFSD_AVAIL))
+ continue;
+ minor = 0;
+ do {
+ len = nfsd_print_version_support(buf, remaining,
+ sep, num, minor);
if (len >= remaining)
- break;
+ goto out;
remaining -= len;
buf += len;
tlen += len;
- }
-
+ minor++;
+ sep = " ";
+ } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
+ }
+out:
len = snprintf(buf, remaining, "\n");
if (len >= remaining)
return -EINVAL;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d74c8c4..d966068 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void);
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)
-extern u32 nfsd_suppattrs[3][3];
+extern const u32 nfsd_suppattrs[3][3];
-static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
+static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
(bm1[1] & ~bm2[1]) ||
(bm1[2] & ~bm2[2]));
}
-static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
+static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
{
return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 010aff5..fa82b77 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
struct nfsd_attrstat *resp)
{
__be32 nfserr;
- int stable = 1;
unsigned long cnt = argp->len;
dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &stable);
+ nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
+ rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
return nfsd_return_attrs(nfserr, resp);
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e6bfd96..786a4a2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -6,7 +6,7 @@
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/freezer.h>
#include <linux/module.h>
#include <linux/fs_struct.h>
@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
return 0;
}
+static void
+nfsd_adjust_nfsd_versions4(void)
+{
+ unsigned i;
+
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
+ if (nfsd_supported_minorversions[i])
+ return;
+ }
+ nfsd_vers(4, NFSD_CLEAR);
+}
+
int nfsd_minorversion(u32 minorversion, enum vers_op change)
{
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
switch(change) {
case NFSD_SET:
nfsd_supported_minorversions[minorversion] = true;
+ nfsd_vers(4, NFSD_SET);
break;
case NFSD_CLEAR:
nfsd_supported_minorversions[minorversion] = false;
+ nfsd_adjust_nfsd_versions4();
break;
case NFSD_TEST:
return nfsd_supported_minorversions[minorversion];
@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4516e8b..005c911 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void);
+extern void cleanup_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 26c6fdb..19d50f6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err;
int host_err;
bool get_write_count;
- int size_change = 0;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
- goto out;
+ return err;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
- return nfserrno(host_err);
+ goto out;
}
dentry = fhp->fh_dentry;
@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid)
- goto out;
+ return 0;
nfsd_sanitize_attrs(inode, iap);
+ if (check_guard && guardtime != inode->i_ctime.tv_sec)
+ return nfserr_notsync;
+
/*
* The size case is special, it changes the file in addition to the
- * attributes.
+ * attributes, and file systems don't expect it to be mixed with
+ * "random" attribute changes. We thus split out the size change
+ * into a separate call to ->setattr, and do the rest as a separate
+ * setattr call.
*/
- if (iap->ia_valid & ATTR_SIZE) {
+ if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap);
if (err)
- goto out;
- size_change = 1;
+ return err;
+ }
+ fh_lock(fhp);
+ if (size_change) {
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*
* (and similar for the older RFCs)
*/
- if (iap->ia_size != i_size_read(inode))
- iap->ia_valid |= ATTR_MTIME;
- }
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
- iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(dentry, &size_attr, NULL);
+ if (host_err)
+ goto out_unlock;
+ iap->ia_valid &= ~ATTR_SIZE;
- if (check_guard && guardtime != inode->i_ctime.tv_sec) {
- err = nfserr_notsync;
- goto out_put_write_access;
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ goto out_unlock;
}
- fh_lock(fhp);
+ iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL);
- fh_unlock(fhp);
- err = nfserrno(host_err);
-out_put_write_access:
+out_unlock:
+ fh_unlock(fhp);
if (size_change)
put_write_access(inode);
- if (!err)
- err = nfserrno(commit_metadata(fhp));
out:
- return err;
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+ return nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file)
__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int *stablep)
+ unsigned long *cnt, int stable)
{
struct svc_export *exp;
- struct inode *inode;
mm_segment_t oldfs;
__be32 err = 0;
int host_err;
- int stable = *stablep;
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
*/
current->flags |= PF_LESS_THROTTLE;
- inode = file_inode(file);
- exp = fhp->fh_export;
-
+ exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp))
- stable = 0;
+ stable = NFS_UNSTABLE;
if (stable && !use_wgather)
flags |= RWF_SYNC;
@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
* N.B. After this call fhp needs an fh_put
*/
__be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable)
{
- __be32 err = 0;
+ struct file *file = NULL;
+ __be32 err = 0;
trace_write_start(rqstp, fhp, offset, vlen);
- if (file) {
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
- stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- } else {
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
- if (err)
- goto out;
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ if (err)
+ goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- if (cnt)
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
- cnt, stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- fput(file);
- }
+ trace_write_opened(rqstp, fhp, offset, vlen);
+ err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
+ fput(file);
out:
trace_write_done(rqstp, fhp, offset, vlen);
return err;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 0bf9e7b..1bbdcce 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
- loff_t, struct kvec *,int, unsigned long *, int *);
+__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
+ struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep);
+ int stable);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
@@ -135,7 +135,8 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
{
struct path p = {.mnt = fh->fh_export->ex_path.mnt,
.dentry = fh->fh_dentry};
- return nfserrno(vfs_getattr(&p, stat));
+ return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT));
}
static inline int nfsd_create_is_exclusive(int createmode)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 7d18d62..febed12 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -30,6 +30,8 @@
#include <linux/crc32.h>
#include <linux/pagevec.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
+
#include "nilfs.h"
#include "btnode.h"
#include "page.h"
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index a4c4622..e5f7e47 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -6,6 +6,7 @@
#include <linux/kernel.h> /* UINT_MAX */
#include <linux/mount.h>
#include <linux/sched.h>
+#include <linux/sched/user.h>
#include <linux/types.h>
#include <linux/wait.h>
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 7ebfca6..2b37f27 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/compat.h>
+#include <linux/sched/signal.h>
#include <asm/ioctls.h>
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f36c293..1aeb837 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -30,6 +30,7 @@
#include <linux/slab.h> /* kmem_* */
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/user.h>
#include "inotify.h"
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf41c6..498d609 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -30,7 +30,7 @@
#include <linux/inotify.h>
#include <linux/kernel.h> /* roundup() */
#include <linux/namei.h> /* LOOKUP_FOLLOW */
-#include <linux/sched.h> /* struct user */
+#include <linux/sched/signal.h>
#include <linux/slab.h> /* struct kmem_cache */
#include <linux/syscalls.h>
#include <linux/types.h>
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 358ed7e..c4f68c3 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -24,7 +24,7 @@
#include <linux/gfp.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/writeback.h>
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d4ec0d8..fb15a96 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -30,6 +30,7 @@
#include <linux/swap.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
+#include <linux/sched/signal.h>
#include <cluster/masklog.h>
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ec00057..4348027 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -54,6 +54,7 @@
*/
#include <linux/kernel.h>
+#include <linux/sched/mm.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
#include <linux/idr.h>
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 32fd261..a2b19fb 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -33,6 +33,7 @@
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/debugfs.h>
+#include <linux/sched/signal.h>
#include "cluster/heartbeat.h"
#include "cluster/nodemanager.h"
diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c
index f70cda2..9cecf48 100644
--- a/fs/ocfs2/dlmfs/userdlm.c
+++ b/fs/ocfs2/dlmfs/userdlm.c
@@ -28,6 +28,7 @@
*/
#include <linux/signal.h>
+#include <linux/sched/signal.h>
#include <linux/module.h>
#include <linux/fs.h>
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8dce409..3b7c937 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -33,6 +33,7 @@
#include <linux/seq_file.h>
#include <linux/time.h>
#include <linux/quotaops.h>
+#include <linux/sched/signal.h>
#define MLOG_MASK_PREFIX ML_DLM_GLUE
#include <cluster/masklog.h>
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8836305..bfeb647 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1306,16 +1306,15 @@ bail:
return status;
}
-int ocfs2_getattr(struct vfsmount *mnt,
- struct dentry *dentry,
- struct kstat *stat)
+int ocfs2_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct inode *inode = d_inode(dentry);
- struct super_block *sb = dentry->d_sb;
+ struct inode *inode = d_inode(path->dentry);
+ struct super_block *sb = path->dentry->d_sb;
struct ocfs2_super *osb = sb->s_fs_info;
int err;
- err = ocfs2_inode_revalidate(dentry);
+ err = ocfs2_inode_revalidate(path->dentry);
if (err) {
if (err != -ENOENT)
mlog_errno(err);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 897fd9a..1fdc983 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -68,8 +68,8 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
u32 clusters_to_add, int mark_unwritten);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
-int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
+int ocfs2_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
int ocfs2_permission(struct inode *inode, int mask);
int ocfs2_should_update_atime(struct inode *inode,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a24e42f..ca1646f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,6 +42,7 @@
#include <linux/seq_file.h>
#include <linux/quotaops.h>
#include <linux/cleancache.h>
+#include <linux/signal.h>
#define CREATE_TRACE_POINTS
#include "ocfs2_trace.h"
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index df7ea85..8c9034e 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/vfs.h>
+#include <linux/cred.h>
#include <linux/parser.h>
#include <linux/buffer_head.h>
#include <linux/vmalloc.h>
diff --git a/fs/open.c b/fs/open.c
index 9921f70..949cef2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -301,12 +301,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (S_ISFIFO(inode->i_mode))
return -ESPIPE;
- /*
- * Let individual file system decide if it supports preallocation
- * for directories or not.
- */
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
- !S_ISBLK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode))
+ return -EISDIR;
+
+ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
return -ENODEV;
/* Check for wrap through zero too */
@@ -316,7 +314,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!file->f_op->fallocate)
return -EOPNOTSUPP;
- sb_start_write(inode->i_sb);
+ file_start_write(file);
ret = file->f_op->fallocate(file, mode, offset, len);
/*
@@ -329,7 +327,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret == 0)
fsnotify_modify(file);
- sb_end_write(inode->i_sb);
+ file_end_write(file);
return ret;
}
EXPORT_SYMBOL_GPL(vfs_fallocate);
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 5cd6179..a304bf3 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -245,25 +245,24 @@ out:
/*
* Obtain attributes of an object given a dentry
*/
-int orangefs_getattr(struct vfsmount *mnt,
- struct dentry *dentry,
- struct kstat *kstat)
+int orangefs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
int ret = -ENOENT;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = path->dentry->d_inode;
struct orangefs_inode_s *orangefs_inode = NULL;
gossip_debug(GOSSIP_INODE_DEBUG,
"orangefs_getattr: called on %pd\n",
- dentry);
+ path->dentry);
ret = orangefs_inode_getattr(inode, 0, 0);
if (ret == 0) {
- generic_fillattr(inode, kstat);
+ generic_fillattr(inode, stat);
/* override block size reported to stat */
orangefs_inode = ORANGEFS_I(inode);
- kstat->blksize = orangefs_inode->blksize;
+ stat->blksize = orangefs_inode->blksize;
}
return ret;
}
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 70355a9..5e48a0b 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -41,7 +41,7 @@
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/uio.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/mm.h>
#include <linux/wait.h>
#include <linux/dcache.h>
@@ -439,9 +439,8 @@ struct inode *orangefs_new_inode(struct super_block *sb,
int orangefs_setattr(struct dentry *dentry, struct iattr *iattr);
-int orangefs_getattr(struct vfsmount *mnt,
- struct dentry *dentry,
- struct kstat *kstat);
+int orangefs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
int orangefs_permission(struct inode *inode, int mask);
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index c48859f..67c2435 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb)
return &orangefs_inode->vfs_inode;
}
+static void orangefs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+}
+
static void orangefs_destroy_inode(struct inode *inode)
{
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
@@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struct inode *inode)
"%s: deallocated %p destroying inode %pU\n",
__func__, orangefs_inode, get_khandle_from_ino(inode));
- kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+ call_rcu(&inode->i_rcu, orangefs_i_callback);
}
/*
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043d..906ea6c 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -15,11 +15,13 @@
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/uaccess.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
+#include "ovl_entry.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
@@ -232,12 +234,14 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
struct dentry *dentry, struct path *lowerpath,
- struct kstat *stat, const char *link)
+ struct kstat *stat, const char *link,
+ struct kstat *pstat, bool tmpfile)
{
struct inode *wdir = workdir->d_inode;
struct inode *udir = upperdir->d_inode;
struct dentry *newdentry = NULL;
struct dentry *upper = NULL;
+ struct dentry *temp = NULL;
int err;
const struct cred *old_creds = NULL;
struct cred *new_creds = NULL;
@@ -248,25 +252,30 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
.link = link
};
- newdentry = ovl_lookup_temp(workdir, dentry);
- err = PTR_ERR(newdentry);
- if (IS_ERR(newdentry))
- goto out;
-
upper = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out1;
+ goto out;
err = security_inode_copy_up(dentry, &new_creds);
if (err < 0)
- goto out2;
+ goto out1;
if (new_creds)
old_creds = override_creds(new_creds);
- err = ovl_create_real(wdir, newdentry, &cattr, NULL, true);
+ if (tmpfile)
+ temp = ovl_do_tmpfile(upperdir, stat->mode);
+ else
+ temp = ovl_lookup_temp(workdir, dentry);
+ err = PTR_ERR(temp);
+ if (IS_ERR(temp))
+ goto out1;
+
+ err = 0;
+ if (!tmpfile)
+ err = ovl_create_real(wdir, temp, &cattr, NULL, true);
if (new_creds) {
revert_creds(old_creds);
@@ -281,39 +290,55 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
ovl_path_upper(dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
- upperpath.dentry = newdentry;
+ upperpath.dentry = temp;
+
+ if (tmpfile) {
+ inode_unlock(udir);
+ err = ovl_copy_up_data(lowerpath, &upperpath,
+ stat->size);
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ } else {
+ err = ovl_copy_up_data(lowerpath, &upperpath,
+ stat->size);
+ }
- err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
if (err)
goto out_cleanup;
}
- err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+ err = ovl_copy_xattr(lowerpath->dentry, temp);
if (err)
goto out_cleanup;
- inode_lock(newdentry->d_inode);
- err = ovl_set_attr(newdentry, stat);
- inode_unlock(newdentry->d_inode);
+ inode_lock(temp->d_inode);
+ err = ovl_set_attr(temp, stat);
+ inode_unlock(temp->d_inode);
if (err)
goto out_cleanup;
- err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+ if (tmpfile)
+ err = ovl_do_link(temp, udir, upper, true);
+ else
+ err = ovl_do_rename(wdir, temp, udir, upper, 0);
if (err)
goto out_cleanup;
+ newdentry = dget(tmpfile ? upper : temp);
ovl_dentry_update(dentry, newdentry);
ovl_inode_update(d_inode(dentry), d_inode(newdentry));
- newdentry = NULL;
+
+ /* Restore timestamps on parent (best effort) */
+ ovl_set_timestamps(upperdir, pstat);
out2:
- dput(upper);
+ dput(temp);
out1:
- dput(newdentry);
+ dput(upper);
out:
return err;
out_cleanup:
- ovl_cleanup(wdir, newdentry);
+ if (!tmpfile)
+ ovl_cleanup(wdir, temp);
goto out2;
}
@@ -337,6 +362,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
struct dentry *lowerdentry = lowerpath->dentry;
struct dentry *upperdir;
const char *link = NULL;
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
if (WARN_ON(!workdir))
return -EROFS;
@@ -346,7 +372,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
- err = vfs_getattr(&parentpath, &pstat);
+ err = vfs_getattr(&parentpath, &pstat,
+ STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
@@ -356,6 +383,25 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return PTR_ERR(link);
}
+ /* Should we copyup with O_TMPFILE or with workdir? */
+ if (S_ISREG(stat->mode) && ofs->tmpfile) {
+ err = ovl_copy_up_start(dentry);
+ /* err < 0: interrupted, err > 0: raced with another copy-up */
+ if (unlikely(err)) {
+ pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
+ if (err > 0)
+ err = 0;
+ goto out_done;
+ }
+
+ inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
+ err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
+ stat, link, &pstat, true);
+ inode_unlock(upperdir->d_inode);
+ ovl_copy_up_end(dentry);
+ goto out_done;
+ }
+
err = -EIO;
if (lock_rename(workdir, upperdir) != NULL) {
pr_err("overlayfs: failed to lock workdir+upperdir\n");
@@ -368,13 +414,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
}
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, link);
- if (!err) {
- /* Restore timestamps on parent (best effort) */
- ovl_set_timestamps(upperdir, &pstat);
- }
+ stat, link, &pstat, false);
out_unlock:
unlock_rename(workdir, upperdir);
+out_done:
do_delayed_call(&done);
return err;
@@ -409,7 +452,8 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
}
ovl_path_lower(next, &lowerpath);
- err = vfs_getattr(&lowerpath, &stat);
+ err = vfs_getattr(&lowerpath, &stat,
+ STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
/* maybe truncate regular file. this has no effect on dirs */
if (flags & O_TRUNC)
stat.size = 0;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 16e06dd..6515796 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -138,9 +138,10 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
return err;
}
-static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
int err;
enum ovl_path_type type;
struct path realpath;
@@ -148,7 +149,7 @@ static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getattr(&realpath, stat);
+ err = vfs_getattr(&realpath, stat, request_mask, flags);
revert_creds(old_cred);
if (err)
return err;
@@ -264,7 +265,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
goto out;
ovl_path_upper(dentry, &upperpath);
- err = vfs_getattr(&upperpath, &stat);
+ err = vfs_getattr(&upperpath, &stat,
+ STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
goto out_unlock;
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 08643ac..f8fe6bf 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -9,6 +9,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include "overlayfs.h"
@@ -56,16 +57,17 @@ out:
return err;
}
-static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int ovl_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
struct path realpath;
const struct cred *old_cred;
int err;
ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getattr(&realpath, stat);
+ err = vfs_getattr(&realpath, stat, request_mask, flags);
revert_creds(old_cred);
return err;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 023bb0b..b8b0778 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -8,6 +8,7 @@
*/
#include <linux/fs.h>
+#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/ratelimit.h>
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 8af450b..741dc0b 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -127,6 +127,15 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
return err;
}
+static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
+{
+ struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+ int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
+
+ pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
+ return ret;
+}
+
static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
{
unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
@@ -169,6 +178,8 @@ void ovl_dentry_version_inc(struct dentry *dentry);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
+int ovl_copy_up_start(struct dentry *dentry);
+void ovl_copy_up_end(struct dentry *dentry);
/* namei.c */
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index d14bca1..59614fa 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -27,6 +27,8 @@ struct ovl_fs {
struct ovl_config config;
/* creds of process who forced instantiation of super block */
const struct cred *creator_cred;
+ bool tmpfile;
+ wait_queue_head_t copyup_wq;
};
/* private information held for every overlayfs dentry */
@@ -38,6 +40,7 @@ struct ovl_entry {
u64 version;
const char *redirect;
bool opaque;
+ bool copying;
};
struct rcu_head rcu;
};
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 20f48ab..c9e70d3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -7,6 +7,7 @@
* the Free Software Foundation.
*/
+#include <uapi/linux/magic.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
@@ -160,6 +161,25 @@ static void ovl_put_super(struct super_block *sb)
kfree(ufs);
}
+static int ovl_sync_fs(struct super_block *sb, int wait)
+{
+ struct ovl_fs *ufs = sb->s_fs_info;
+ struct super_block *upper_sb;
+ int ret;
+
+ if (!ufs->upper_mnt)
+ return 0;
+ upper_sb = ufs->upper_mnt->mnt_sb;
+ if (!upper_sb->s_op->sync_fs)
+ return 0;
+
+ /* real inodes have already been synced by sync_filesystem(ovl_sb) */
+ down_read(&upper_sb->s_umount);
+ ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+ up_read(&upper_sb->s_umount);
+ return ret;
+}
+
/**
* ovl_statfs
* @sb: The overlayfs super block
@@ -222,6 +242,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations ovl_super_operations = {
.put_super = ovl_put_super,
+ .sync_fs = ovl_sync_fs,
.statfs = ovl_statfs,
.show_options = ovl_show_options,
.remount_fs = ovl_remount,
@@ -701,6 +722,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
unsigned int stacklen = 0;
unsigned int i;
bool remote = false;
+ struct cred *cred;
int err;
err = -ENOMEM;
@@ -708,6 +730,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ufs)
goto out;
+ init_waitqueue_head(&ufs->copyup_wq);
ufs->config.redirect_dir = ovl_redirect_dir_def;
err = ovl_parse_opt((char *) data, &ufs->config);
if (err)
@@ -825,6 +848,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
* creation of workdir in previous step.
*/
if (ufs->workdir) {
+ struct dentry *temp;
+
err = ovl_check_d_type_supported(&workpath);
if (err < 0)
goto out_put_workdir;
@@ -836,6 +861,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
*/
if (!err)
pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+ /* Check if upper/work fs supports O_TMPFILE */
+ temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
+ ufs->tmpfile = !IS_ERR(temp);
+ if (ufs->tmpfile)
+ dput(temp);
+ else
+ pr_warn("overlayfs: upper fs does not support tmpfile.\n");
}
}
@@ -870,10 +903,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
else
sb->s_d_op = &ovl_dentry_operations;
- ufs->creator_cred = prepare_creds();
- if (!ufs->creator_cred)
+ ufs->creator_cred = cred = prepare_creds();
+ if (!cred)
goto out_put_lower_mnt;
+ /* Never override disk quota limits or use reserved space */
+ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
+
err = -ENOMEM;
oe = ovl_alloc_entry(numlower);
if (!oe)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f..1953986 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -10,7 +10,9 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/slab.h>
+#include <linux/cred.h>
#include <linux/xattr.h>
+#include <linux/sched/signal.h>
#include "overlayfs.h"
#include "ovl_entry.h"
@@ -263,3 +265,33 @@ struct file *ovl_path_open(struct path *path, int flags)
{
return dentry_open(path, flags | O_NOATIME, current_cred());
}
+
+int ovl_copy_up_start(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_entry *oe = dentry->d_fsdata;
+ int err;
+
+ spin_lock(&ofs->copyup_wq.lock);
+ err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
+ if (!err) {
+ if (oe->__upperdentry)
+ err = 1; /* Already copied up */
+ else
+ oe->copying = true;
+ }
+ spin_unlock(&ofs->copyup_wq.lock);
+
+ return err;
+}
+
+void ovl_copy_up_end(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ spin_lock(&ofs->copyup_wq.lock);
+ oe->copying = false;
+ wake_up_locked(&ofs->copyup_wq);
+ spin_unlock(&ofs->copyup_wq.lock);
+}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index c9d48dc..eebf5f6 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -15,6 +15,7 @@
#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fe12b51..88c3555 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -60,6 +60,10 @@
#include <linux/tty.h>
#include <linux/string.h>
#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
#include <linux/proc_fs.h>
#include <linux/ioport.h>
#include <linux/uaccess.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1e1e182..c87b6b9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -85,6 +85,11 @@
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
+#include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/stat.h>
#include <linux/flex_array.h>
#include <linux/posix-timers.h>
#ifdef CONFIG_HARDWALL
@@ -1724,11 +1729,12 @@ out_unlock:
return NULL;
}
-int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct task_struct *task;
- struct pid_namespace *pid = dentry->d_sb->s_fs_info;
+ struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
@@ -3511,9 +3517,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
return 0;
}
-static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_task_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct task_struct *p = get_proc_task(inode);
generic_fillattr(inode, stat);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 00ce153..c330495 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/dcache.h>
#include <linux/path.h>
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 06c7390..ee27feb 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -118,10 +118,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
return 0;
}
-static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int proc_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct proc_dir_entry *de = PDE(inode);
if (de && de->nlink)
set_nlink(inode, de->nlink);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5d6960f..c5ae09b 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,8 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/binfmts.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
struct ctl_table_header;
struct mempolicy;
@@ -149,7 +151,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
* base.c
*/
extern const struct dentry_operations pid_dentry_operations;
-extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int proc_setattr(struct dentry *, struct iattr *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
extern int pid_revalidate(struct dentry *, unsigned int);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index ea9f3d1..4ee5527 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -28,6 +28,7 @@
#include <linux/list.h>
#include <linux/ioport.h>
#include <linux/memory.h>
+#include <linux/sched/task.h>
#include <asm/sections.h>
#include "internal.h"
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index aec66e6..983fce5 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -3,6 +3,8 @@
#include <linux/pid_namespace.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
#include <linux/seq_file.h>
#include <linux/seqlock.h>
#include <linux/time.h>
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index ffd72a6..d72fc40 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/sched/task.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/mount.h>
@@ -140,10 +141,10 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
return de;
}
-static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct net *net;
net = get_proc_task_net(inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3e64c65..8f91ec6 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -8,6 +8,7 @@
#include <linux/printk.h>
#include <linux/security.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -801,9 +802,10 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
-static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_sys_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b90da88..deecb39 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -14,12 +14,14 @@
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/sched/stat.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/user_namespace.h>
#include <linux/mount.h>
#include <linux/pid_namespace.h>
#include <linux/parser.h>
+#include <linux/cred.h>
#include "internal.h"
@@ -149,10 +151,10 @@ void __init proc_root_init(void)
proc_sys_init();
}
-static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
-)
+static int proc_root_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(d_inode(path->dentry), stat);
stat->nlink = proc_root.nlink + nr_processes();
return 0;
}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index e47c3e8..bd4e55f 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -5,11 +5,12 @@
#include <linux/kernel_stat.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
+#include <linux/sched/stat.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/irqnr.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
#include <linux/tick.h>
#ifndef arch_irq_stat_cpu
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ee3efb2..f08bd31 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -11,6 +11,7 @@
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/swap.h>
+#include <linux/sched/mm.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1ef97cf..23266694 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -7,6 +7,8 @@
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
+#include <linux/sched/mm.h>
+
#include "internal.h"
/*
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3f1190d..b5713fe 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -10,6 +10,8 @@
#include <linux/nsproxy.h>
#include <linux/security.h>
#include <linux/fs_struct.h>
+#include <linux/sched/task.h>
+
#include "proc/internal.h" /* only for get_proc_task() in ->open() */
#include "pnode.h"
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 406fed9..74b489e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -72,6 +72,7 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/kmod.h>
#include <linux/namei.h>
#include <linux/capability.h>
diff --git a/fs/read_write.c b/fs/read_write.c
index 5816d4c..c4f88af 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -4,8 +4,9 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#include <linux/slab.h>
+#include <linux/slab.h>
#include <linux/stat.h>
+#include <linux/sched/xacct.h>
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
@@ -23,9 +24,6 @@
#include <linux/uaccess.h>
#include <asm/unistd.h>
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
@@ -370,7 +368,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
kiocb.ki_pos = *ppos;
iter->type |= READ;
- ret = file->f_op->read_iter(&kiocb, iter);
+ ret = call_read_iter(file, &kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
*ppos = kiocb.ki_pos;
@@ -390,7 +388,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
kiocb.ki_pos = *ppos;
iter->type |= WRITE;
- ret = file->f_op->write_iter(&kiocb, iter);
+ ret = call_write_iter(file, &kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
*ppos = kiocb.ki_pos;
@@ -439,7 +437,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
kiocb.ki_pos = *ppos;
iov_iter_init(&iter, READ, &iov, 1, len);
- ret = filp->f_op->read_iter(&kiocb, &iter);
+ ret = call_read_iter(filp, &kiocb, &iter);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
@@ -496,7 +494,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
kiocb.ki_pos = *ppos;
iov_iter_init(&iter, WRITE, &iov, 1, len);
- ret = filp->f_op->write_iter(&kiocb, &iter);
+ ret = call_write_iter(filp, &kiocb, &iter);
BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
*ppos = kiocb.ki_pos;
@@ -675,7 +673,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
EXPORT_SYMBOL(iov_shorten);
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, iter_fn_t fn, int flags)
+ loff_t *ppos, int type, int flags)
{
struct kiocb kiocb;
ssize_t ret;
@@ -692,7 +690,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
kiocb.ki_pos = *ppos;
- ret = fn(&kiocb, iter);
+ if (type == READ)
+ ret = call_read_iter(filp, &kiocb, iter);
+ else
+ ret = call_write_iter(filp, &kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
@@ -700,7 +701,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, io_fn_t fn, int flags)
+ loff_t *ppos, int type, int flags)
{
ssize_t ret = 0;
@@ -711,7 +712,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
- nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
+ if (type == READ) {
+ nr = filp->f_op->read(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ } else {
+ nr = filp->f_op->write(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ }
if (nr < 0) {
if (!ret)
@@ -834,50 +841,32 @@ out:
return ret;
}
-static ssize_t do_readv_writev(int type, struct file *file,
- const struct iovec __user * uvector,
- unsigned long nr_segs, loff_t *pos,
- int flags)
+static ssize_t __do_readv_writev(int type, struct file *file,
+ struct iov_iter *iter, loff_t *pos, int flags)
{
size_t tot_len;
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- struct iov_iter iter;
- ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
-
- ret = import_iovec(type, uvector, nr_segs,
- ARRAY_SIZE(iovstack), &iov, &iter);
- if (ret < 0)
- return ret;
+ ssize_t ret = 0;
- tot_len = iov_iter_count(&iter);
+ tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
+ if (type != READ)
file_start_write(file);
- }
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
+ if ((type == READ && file->f_op->read_iter) ||
+ (type == WRITE && file->f_op->write_iter))
+ ret = do_iter_readv_writev(file, iter, pos, type, flags);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
+ ret = do_loop_readv_writev(file, iter, pos, type, flags);
if (type != READ)
file_end_write(file);
out:
- kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file);
@@ -887,6 +876,27 @@ out:
return ret;
}
+static ssize_t do_readv_writev(int type, struct file *file,
+ const struct iovec __user *uvector,
+ unsigned long nr_segs, loff_t *pos,
+ int flags)
+{
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ ssize_t ret;
+
+ ret = import_iovec(type, uvector, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ return ret;
+
+ ret = __do_readv_writev(type, file, &iter, pos, flags);
+ kfree(iov);
+
+ return ret;
+}
+
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
@@ -1064,51 +1074,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
unsigned long nr_segs, loff_t *pos,
int flags)
{
- compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
ret = compat_import_iovec(type, uvector, nr_segs,
UIO_FASTIOV, &iov, &iter);
if (ret < 0)
return ret;
- tot_len = iov_iter_count(&iter);
- if (!tot_len)
- goto out;
- ret = rw_verify_area(type, file, pos, tot_len);
- if (ret < 0)
- goto out;
-
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
- file_start_write(file);
- }
-
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
- else
- ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
-
- if (type != READ)
- file_end_write(file);
-
-out:
+ ret = __do_readv_writev(type, file, &iter, pos, flags);
kfree(iov);
- if ((ret + (type == READ)) > 0) {
- if (type == READ)
- fsnotify_access(file);
- else
- fsnotify_modify(file);
- }
+
return ret;
}
@@ -1518,6 +1496,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (flags != 0)
return -EINVAL;
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
ret = rw_verify_area(READ, file_in, &pos_in, len);
if (unlikely(ret))
return ret;
@@ -1538,7 +1521,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (len == 0)
return 0;
- sb_start_write(inode_out->i_sb);
+ file_start_write(file_out);
/*
* Try cloning first, this is supported by more file systems, and
@@ -1574,7 +1557,7 @@ done:
inc_syscr(current);
inc_syscw(current);
- sb_end_write(inode_out->i_sb);
+ file_end_write(file_out);
return ret;
}
diff --git a/fs/select.c b/fs/select.c
index 305c0da..e211227 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -15,7 +15,8 @@
*/
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/rt.h>
#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/slab.h>
@@ -26,7 +27,6 @@
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
-#include <linux/sched/rt.h>
#include <linux/freezer.h>
#include <net/busy_poll.h>
#include <linux/vmalloc.h>
diff --git a/fs/splice.c b/fs/splice.c
index 4ef78aa..006ba50 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -33,6 +33,8 @@
#include <linux/gfp.h>
#include <linux/socket.h>
#include <linux/compat.h>
+#include <linux/sched/signal.h>
+
#include "internal.h"
/*
@@ -307,7 +309,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
idx = to.idx;
init_sync_kiocb(&kiocb, in);
kiocb.ki_pos = *ppos;
- ret = in->f_op->read_iter(&kiocb, &to);
+ ret = call_read_iter(in, &kiocb, &to);
if (ret > 0) {
*ppos = kiocb.ki_pos;
file_accessed(in);
diff --git a/fs/stat.c b/fs/stat.c
index 3f14d1e..fa0be59 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -12,12 +12,22 @@
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/security.h>
+#include <linux/cred.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
+/**
+ * generic_fillattr - Fill in the basic attributes from the inode struct
+ * @inode: Inode to use as the source
+ * @stat: Where to fill in the attributes
+ *
+ * Fill in the basic attributes in the kstat structure from data that's to be
+ * found on the VFS inode structure. This is the default if no getattr inode
+ * operation is supplied.
+ */
void generic_fillattr(struct inode *inode, struct kstat *stat)
{
stat->dev = inode->i_sb->s_dev;
@@ -33,81 +43,147 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->ctime = inode->i_ctime;
stat->blksize = i_blocksize(inode);
stat->blocks = inode->i_blocks;
-}
+ if (IS_NOATIME(inode))
+ stat->result_mask &= ~STATX_ATIME;
+ if (IS_AUTOMOUNT(inode))
+ stat->attributes |= STATX_ATTR_AUTOMOUNT;
+}
EXPORT_SYMBOL(generic_fillattr);
/**
* vfs_getattr_nosec - getattr without security checks
* @path: file to get attributes from
* @stat: structure to return attributes in
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
*
* Get attributes without calling security_inode_getattr.
*
* Currently the only caller other than vfs_getattr is internal to the
- * filehandle lookup code, which uses only the inode number and returns
- * no attributes to any user. Any other code probably wants
- * vfs_getattr.
+ * filehandle lookup code, which uses only the inode number and returns no
+ * attributes to any user. Any other code probably wants vfs_getattr.
*/
-int vfs_getattr_nosec(struct path *path, struct kstat *stat)
+int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_backing_inode(path->dentry);
+ memset(stat, 0, sizeof(*stat));
+ stat->result_mask |= STATX_BASIC_STATS;
+ request_mask &= STATX_ALL;
+ query_flags &= KSTAT_QUERY_FLAGS;
if (inode->i_op->getattr)
- return inode->i_op->getattr(path->mnt, path->dentry, stat);
+ return inode->i_op->getattr(path, stat, request_mask,
+ query_flags);
generic_fillattr(inode, stat);
return 0;
}
-
EXPORT_SYMBOL(vfs_getattr_nosec);
-int vfs_getattr(struct path *path, struct kstat *stat)
+/*
+ * vfs_getattr - Get the enhanced basic attributes of a file
+ * @path: The file of interest
+ * @stat: Where to return the statistics
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ *
+ * Ask the filesystem for a file's attributes. The caller must indicate in
+ * request_mask and query_flags to indicate what they want.
+ *
+ * If the file is remote, the filesystem can be forced to update the attributes
+ * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can
+ * suppress the update by passing AT_STATX_DONT_SYNC.
+ *
+ * Bits must have been set in request_mask to indicate which attributes the
+ * caller wants retrieving. Any such attribute not requested may be returned
+ * anyway, but the value may be approximate, and, if remote, may not have been
+ * synchronised with the server.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
int retval;
retval = security_inode_getattr(path);
if (retval)
return retval;
- return vfs_getattr_nosec(path, stat);
+ return vfs_getattr_nosec(path, stat, request_mask, query_flags);
}
-
EXPORT_SYMBOL(vfs_getattr);
-int vfs_fstat(unsigned int fd, struct kstat *stat)
+/**
+ * vfs_statx_fd - Get the enhanced basic attributes by file descriptor
+ * @fd: The file descriptor referring to the file of interest
+ * @stat: The result structure to fill in.
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ *
+ * This function is a wrapper around vfs_getattr(). The main difference is
+ * that it uses a file descriptor to determine the file location.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_statx_fd(unsigned int fd, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
struct fd f = fdget_raw(fd);
int error = -EBADF;
if (f.file) {
- error = vfs_getattr(&f.file->f_path, stat);
+ error = vfs_getattr(&f.file->f_path, stat,
+ request_mask, query_flags);
fdput(f);
}
return error;
}
-EXPORT_SYMBOL(vfs_fstat);
+EXPORT_SYMBOL(vfs_statx_fd);
-int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
- int flag)
+/**
+ * vfs_statx - Get basic and extra attributes by filename
+ * @dfd: A file descriptor representing the base dir for a relative filename
+ * @filename: The name of the file of interest
+ * @flags: Flags to control the query
+ * @stat: The result structure to fill in.
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ *
+ * This function is a wrapper around vfs_getattr(). The main difference is
+ * that it uses a filename and base directory to determine the file location.
+ * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink
+ * at the given name from being referenced.
+ *
+ * The caller must have preset stat->request_mask as for vfs_getattr(). The
+ * flags are also used to load up stat->query_flags.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_statx(int dfd, const char __user *filename, int flags,
+ struct kstat *stat, u32 request_mask)
{
struct path path;
int error = -EINVAL;
- unsigned int lookup_flags = 0;
+ unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
- if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
- AT_EMPTY_PATH)) != 0)
- goto out;
+ if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
+ AT_EMPTY_PATH | KSTAT_QUERY_FLAGS)) != 0)
+ return -EINVAL;
- if (!(flag & AT_SYMLINK_NOFOLLOW))
- lookup_flags |= LOOKUP_FOLLOW;
- if (flag & AT_EMPTY_PATH)
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
+
retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
goto out;
- error = vfs_getattr(&path, stat);
+ error = vfs_getattr(&path, stat, request_mask, flags);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -116,19 +192,7 @@ retry:
out:
return error;
}
-EXPORT_SYMBOL(vfs_fstatat);
-
-int vfs_stat(const char __user *name, struct kstat *stat)
-{
- return vfs_fstatat(AT_FDCWD, name, stat, 0);
-}
-EXPORT_SYMBOL(vfs_stat);
-
-int vfs_lstat(const char __user *name, struct kstat *stat)
-{
- return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
-}
-EXPORT_SYMBOL(vfs_lstat);
+EXPORT_SYMBOL(vfs_statx);
#ifdef __ARCH_WANT_OLD_STAT
@@ -141,7 +205,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
{
static int warncount = 5;
struct __old_kernel_stat tmp;
-
+
if (warncount > 0) {
warncount--;
printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n",
@@ -166,7 +230,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
#if BITS_PER_LONG == 32
if (stat->size > MAX_NON_LFS)
return -EOVERFLOW;
-#endif
+#endif
tmp.st_size = stat->size;
tmp.st_atime = stat->atime.tv_sec;
tmp.st_mtime = stat->mtime.tv_sec;
@@ -445,6 +509,81 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
}
#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
+static inline int __put_timestamp(struct timespec *kts,
+ struct statx_timestamp __user *uts)
+{
+ return (__put_user(kts->tv_sec, &uts->tv_sec ) ||
+ __put_user(kts->tv_nsec, &uts->tv_nsec ) ||
+ __put_user(0, &uts->__reserved ));
+}
+
+/*
+ * Set the statx results.
+ */
+static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
+{
+ uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
+ gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
+
+ if (__put_user(stat->result_mask, &buffer->stx_mask ) ||
+ __put_user(stat->mode, &buffer->stx_mode ) ||
+ __clear_user(&buffer->__spare0, sizeof(buffer->__spare0)) ||
+ __put_user(stat->nlink, &buffer->stx_nlink ) ||
+ __put_user(uid, &buffer->stx_uid ) ||
+ __put_user(gid, &buffer->stx_gid ) ||
+ __put_user(stat->attributes, &buffer->stx_attributes ) ||
+ __put_user(stat->blksize, &buffer->stx_blksize ) ||
+ __put_user(MAJOR(stat->rdev), &buffer->stx_rdev_major ) ||
+ __put_user(MINOR(stat->rdev), &buffer->stx_rdev_minor ) ||
+ __put_user(MAJOR(stat->dev), &buffer->stx_dev_major ) ||
+ __put_user(MINOR(stat->dev), &buffer->stx_dev_minor ) ||
+ __put_timestamp(&stat->atime, &buffer->stx_atime ) ||
+ __put_timestamp(&stat->btime, &buffer->stx_btime ) ||
+ __put_timestamp(&stat->ctime, &buffer->stx_ctime ) ||
+ __put_timestamp(&stat->mtime, &buffer->stx_mtime ) ||
+ __put_user(stat->ino, &buffer->stx_ino ) ||
+ __put_user(stat->size, &buffer->stx_size ) ||
+ __put_user(stat->blocks, &buffer->stx_blocks ) ||
+ __clear_user(&buffer->__spare1, sizeof(buffer->__spare1)) ||
+ __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/**
+ * sys_statx - System call to get enhanced stats
+ * @dfd: Base directory to pathwalk from *or* fd to stat.
+ * @filename: File to stat *or* NULL.
+ * @flags: AT_* flags to control pathwalk.
+ * @mask: Parts of statx struct actually required.
+ * @buffer: Result buffer.
+ *
+ * Note that if filename is NULL, then it does the equivalent of fstat() using
+ * dfd to indicate the file of interest.
+ */
+SYSCALL_DEFINE5(statx,
+ int, dfd, const char __user *, filename, unsigned, flags,
+ unsigned int, mask,
+ struct statx __user *, buffer)
+{
+ struct kstat stat;
+ int error;
+
+ if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+ return -EINVAL;
+ if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
+ return -EFAULT;
+
+ if (filename)
+ error = vfs_statx(dfd, filename, flags, &stat, mask);
+ else
+ error = vfs_statx_fd(dfd, &stat, mask, flags);
+ if (error)
+ return error;
+ return statx_set_result(&stat, buffer);
+}
+
/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
void __inode_add_bytes(struct inode *inode, loff_t bytes)
{
diff --git a/fs/sync.c b/fs/sync.c
index 2a54c1f..11ba023 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
spin_unlock(&inode->i_lock);
mark_inode_dirty_sync(inode);
}
- return file->f_op->fsync(file, start, end, datasync);
+ return call_fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 08d3e63..83809f5 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -440,10 +440,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
return blocks;
}
-int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int sysv_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
- struct super_block *s = dentry->d_sb;
- generic_fillattr(d_inode(dentry), stat);
+ struct super_block *s = path->dentry->d_sb;
+ generic_fillattr(d_inode(path->dentry), stat);
stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
stat->blksize = s->s_blocksize;
return 0;
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 6c21228..1e7e27c 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -142,7 +142,7 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int);
extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
extern int sysv_sync_inode(struct inode *);
extern void sysv_set_inode(struct inode *, dev_t);
-extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int sysv_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int sysv_init_icache(void);
extern void sysv_destroy_icache(void);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 528369f..30825d88 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1622,11 +1622,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
-int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int ubifs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
loff_t size;
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct ubifs_inode *ui = ubifs_inode(inode);
mutex_lock(&ui->ui_mutex);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index f0c86f0..4d57e48 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1749,8 +1749,8 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, int flags);
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
umode_t mode);
-int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
+int ubifs_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
int ubifs_check_dir_empty(struct inode *dir);
/* xattr.c */
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index f7dfef5..6023c97 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -152,9 +152,10 @@ out_unmap:
return err;
}
-static int udf_symlink_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int udf_symlink_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_backing_inode(dentry);
struct page *page;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 3c421d0..973607d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -14,7 +14,8 @@
#include <linux/list.h>
#include <linux/hashtable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/mm.h>
#include <linux/poll.h>
#include <linux/slab.h>
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 339c696..2dfdc62 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -16,6 +16,7 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/mm.h>
+#include <linux/sched/mm.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/swap.h>
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8c7d01b..b620872 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -33,6 +33,7 @@
#include <linux/migrate.h>
#include <linux/backing-dev.h>
#include <linux/freezer.h>
+#include <linux/sched/mm.h>
#include "xfs_format.h"
#include "xfs_log_format.h"
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index cf1363db..2fd7fdf 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -43,6 +43,7 @@
#include "xfs_acl.h"
#include <linux/capability.h>
+#include <linux/cred.h>
#include <linux/dcache.h>
#include <linux/mount.h>
#include <linux/namei.h>
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 22c1615..229cc6a 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -489,11 +489,12 @@ xfs_vn_get_link_inline(
STATIC int
xfs_vn_getattr(
- struct vfsmount *mnt,
- struct dentry *dentry,
- struct kstat *stat)
+ const struct path *path,
+ struct kstat *stat,
+ u32 request_mask,
+ unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 7a989de..592fdf7 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -55,7 +55,7 @@ typedef __u32 xfs_nlink_t;
#include <linux/file.h>
#include <linux/swap.h>
#include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/bitops.h>
#include <linux/major.h>
#include <linux/pagemap.h>
OpenPOWER on IntegriCloud