summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c28
-rw-r--r--fs/9p/cache.c204
-rw-r--r--fs/9p/cache.h64
-rw-r--r--fs/9p/fid.c114
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/v9fs.c108
-rw-r--r--fs/9p/v9fs.h53
-rw-r--r--fs/9p/v9fs_vfs.h26
-rw-r--r--fs/9p/vfs_addr.c194
-rw-r--r--fs/9p/vfs_dentry.c47
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c316
-rw-r--r--fs/9p/vfs_inode.c307
-rw-r--r--fs/9p/vfs_inode_dotl.c198
-rw-r--r--fs/9p/vfs_super.c65
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c56
-rw-r--r--fs/block_dev.c30
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/compression.c27
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/export.c10
-rw-r--r--fs/btrfs/extent-tree.c130
-rw-r--r--fs/btrfs/extent_io.c219
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c117
-rw-r--r--fs/btrfs/free-space-cache.c162
-rw-r--r--fs/btrfs/inode.c173
-rw-r--r--fs/btrfs/ioctl.c36
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/print-tree.c1
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/transaction.c5
-rw-r--r--fs/btrfs/tree-log.c35
-rw-r--r--fs/btrfs/volumes.c34
-rw-r--r--fs/ceph/caps.c43
-rw-r--r--fs/ceph/dir.c27
-rw-r--r--fs/ceph/inode.c12
-rw-r--r--fs/ceph/mds_client.c10
-rw-r--r--fs/ceph/snap.c14
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/xattr.c3
-rw-r--r--fs/cifs/Kconfig1
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README5
-rw-r--r--fs/cifs/cifs_dfs_ref.c10
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifs_unicode.c127
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsencrypt.c38
-rw-r--r--fs/cifs/cifsencrypt.h33
-rw-r--r--fs/cifs/cifsfs.c49
-rw-r--r--fs/cifs/cifsfs.h19
-rw-r--r--fs/cifs/cifsglob.h73
-rw-r--r--fs/cifs/cifspdu.h47
-rw-r--r--fs/cifs/cifsproto.h11
-rw-r--r--fs/cifs/cifssmb.c68
-rw-r--r--fs/cifs/connect.c113
-rw-r--r--fs/cifs/file.c382
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/cifs/link.c59
-rw-r--r--fs/cifs/md4.c205
-rw-r--r--fs/cifs/md5.c366
-rw-r--r--fs/cifs/md5.h38
-rw-r--r--fs/cifs/misc.c187
-rw-r--r--fs/cifs/netmisc.c12
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/cifs/sess.c21
-rw-r--r--fs/cifs/smbdes.c1
-rw-r--r--fs/cifs/smbencrypt.c92
-rw-r--r--fs/cifs/transport.c78
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c125
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/ecryptfs/dentry.c22
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/inode.c138
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c111
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c26
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/file.c60
-rw-r--r--fs/ext4/mballoc.c100
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c100
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c57
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/main.c11
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/hfsplus/extents.c4
-rw-r--r--fs/hfsplus/part_tbl.c4
-rw-r--r--fs/hfsplus/super.c106
-rw-r--r--fs/hfsplus/wrapper.c4
-rw-r--r--fs/inode.c63
-rw-r--r--fs/internal.h15
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/jbd2/transaction.c21
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/lockd/host.c9
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1542
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/nfs/callback.c109
-rw-r--r--fs/nfs/callback.h4
-rw-r--r--fs/nfs/callback_proc.c12
-rw-r--r--fs/nfs/callback_xdr.c5
-rw-r--r--fs/nfs/client.c15
-rw-r--r--fs/nfs/delegation.c6
-rw-r--r--fs/nfs/direct.c34
-rw-r--r--fs/nfs/inode.c35
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c13
-rw-r--r--fs/nfs/nfs4proc.c161
-rw-r--r--fs/nfs/nfs4state.c35
-rw-r--r--fs/nfs/nfs4xdr.c13
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfs_common/nfsacl.c54
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4state.c187
-rw-r--r--fs/nfsd/nfs4xdr.c12
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c5
-rw-r--r--fs/ntfs/mft.c11
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c9
-rw-r--r--fs/ocfs2/super.c40
-rw-r--r--fs/open.c136
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/mac.c17
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/posix_acl.c17
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/consoles.c4
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/quota/dquot.c18
-rw-r--r--fs/quota/quota.c41
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/super.c17
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/squashfs/block.c8
-rw-r--r--fs/squashfs/xz_wrapper.c6
-rw-r--r--fs/squashfs/zlib_wrapper.c6
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c31
-rw-r--r--fs/xfs/quota/xfs_qm.c46
-rw-r--r--fs/xfs/xfs_alloc.h16
-rw-r--r--fs/xfs/xfs_bmap.c61
-rw-r--r--fs/xfs/xfs_buf_item.c12
-rw-r--r--fs/xfs/xfs_extfree_item.c3
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_iomap.c7
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c15
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_trans.c41
216 files changed, 6001 insertions, 4086 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 02a2cf6..5154552 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -21,8 +21,8 @@
#include <linux/posix_acl_xattr.h>
#include "xattr.h"
#include "acl.h"
-#include "v9fs_vfs.h"
#include "v9fs.h"
+#include "v9fs_vfs.h"
static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
{
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(inode);
- if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
+ ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
return 0;
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
- posix_acl_release(dacl);
- posix_acl_release(pacl);
} else
retval = -EIO;
+ if (!IS_ERR(dacl))
+ posix_acl_release(dacl);
+
+ if (!IS_ERR(pacl))
+ posix_acl_release(pacl);
+
return retval;
}
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
return -ECHILD;
v9ses = v9fs_inode2v9ses(inode);
- if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
+ ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
/*
- * On access = client mode get the acl
+ * On access = client and acl = on mode get the acl
* values from the server
*/
return 0;
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
struct inode *inode = dentry->d_inode;
set_cached_acl(inode, type, acl);
+
+ if (!acl)
+ return 0;
+
/* Set a setxattr request to server */
size = posix_acl_xattr_size(acl->a_count);
buffer = kmalloc(size, GFP_KERNEL);
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry)
int v9fs_set_create_acl(struct dentry *dentry,
struct posix_acl *dpacl, struct posix_acl *pacl)
{
- if (dpacl)
- v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
- if (pacl)
- v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
+ v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
+ v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
posix_acl_release(dpacl);
posix_acl_release(pacl);
return 0;
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 0dbe0d1..5b335c5 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -33,67 +33,11 @@
#define CACHETAG_LEN 11
-struct kmem_cache *vcookie_cache;
-
struct fscache_netfs v9fs_cache_netfs = {
.name = "9p",
.version = 0,
};
-static void init_once(void *foo)
-{
- struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
- vcookie->fscache = NULL;
- vcookie->qid = NULL;
- inode_init_once(&vcookie->inode);
-}
-
-/**
- * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
- * vcookie to inode mapping
- *
- * Returns 0 on success.
- */
-
-static int v9fs_init_vcookiecache(void)
-{
- vcookie_cache = kmem_cache_create("vcookie_cache",
- sizeof(struct v9fs_cookie),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once);
- if (!vcookie_cache)
- return -ENOMEM;
-
- return 0;
-}
-
-/**
- * v9fs_destroy_vcookiecache - destroy the cache of vcookies
- *
- */
-
-static void v9fs_destroy_vcookiecache(void)
-{
- kmem_cache_destroy(vcookie_cache);
-}
-
-int __v9fs_cache_register(void)
-{
- int ret;
- ret = v9fs_init_vcookiecache();
- if (ret < 0)
- return ret;
-
- return fscache_register_netfs(&v9fs_cache_netfs);
-}
-
-void __v9fs_cache_unregister(void)
-{
- v9fs_destroy_vcookiecache();
- fscache_unregister_netfs(&v9fs_cache_netfs);
-}
-
/**
* v9fs_random_cachetag - Generate a random tag to be associated
* with a new cache session.
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
}
const struct fscache_cookie_def v9fs_cache_session_index_def = {
- .name = "9P.session",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = v9fs_cache_session_get_key,
+ .name = "9P.session",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = v9fs_cache_session_get_key,
};
void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
-
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
- vcookie->qid->path);
- return sizeof(vcookie->qid->path);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ memcpy(buffer, &v9inode->fscache_key->path,
+ sizeof(v9inode->fscache_key->path));
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
+ v9inode->fscache_key->path);
+ return sizeof(v9inode->fscache_key->path);
}
static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
uint64_t *size)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- *size = i_size_read(&vcookie->inode);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ *size = i_size_read(&v9inode->vfs_inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
*size);
}
static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
void *buffer, uint16_t buflen)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
-
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
- vcookie->qid->version);
- return sizeof(vcookie->qid->version);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ memcpy(buffer, &v9inode->fscache_key->version,
+ sizeof(v9inode->fscache_key->version));
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
+ v9inode->fscache_key->version);
+ return sizeof(v9inode->fscache_key->version);
}
static enum
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
const void *buffer,
uint16_t buflen)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
- if (buflen != sizeof(vcookie->qid->version))
+ if (buflen != sizeof(v9inode->fscache_key->version))
return FSCACHE_CHECKAUX_OBSOLETE;
- if (memcmp(buffer, &vcookie->qid->version,
- sizeof(vcookie->qid->version)))
+ if (memcmp(buffer, &v9inode->fscache_key->version,
+ sizeof(v9inode->fscache_key->version)))
return FSCACHE_CHECKAUX_OBSOLETE;
return FSCACHE_CHECKAUX_OKAY;
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
{
- struct v9fs_cookie *vcookie = cookie_netfs_data;
+ struct v9fs_inode *v9inode = cookie_netfs_data;
struct pagevec pvec;
pgoff_t first;
int loop, nr_pages;
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
first = 0;
for (;;) {
- nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+ nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
first,
PAGEVEC_SIZE - pagevec_count(&pvec));
if (!nr_pages)
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
void v9fs_cache_inode_get_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie;
+ struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
if (!S_ISREG(inode->i_mode))
return;
- vcookie = v9fs_inode2cookie(inode);
- if (vcookie->fscache)
+ v9inode = V9FS_I(inode);
+ if (v9inode->fscache)
return;
v9ses = v9fs_inode2v9ses(inode);
- vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- vcookie);
+ v9inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
}
void v9fs_cache_inode_put_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
- fscache_relinquish_cookie(vcookie->fscache, 0);
- vcookie->fscache = NULL;
+ fscache_relinquish_cookie(v9inode->fscache, 0);
+ v9inode->fscache = NULL;
}
void v9fs_cache_inode_flush_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
- fscache_relinquish_cookie(vcookie->fscache, 1);
- vcookie->fscache = NULL;
+ fscache_relinquish_cookie(v9inode->fscache, 1);
+ v9inode->fscache = NULL;
}
void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid;
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
- spin_lock(&vcookie->lock);
+ spin_lock(&v9inode->fscache_lock);
fid = filp->private_data;
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
v9fs_cache_inode_flush_cookie(inode);
else
v9fs_cache_inode_get_cookie(inode);
- spin_unlock(&vcookie->lock);
+ spin_unlock(&v9inode->fscache_lock);
}
void v9fs_cache_inode_reset_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct v9fs_session_info *v9ses;
struct fscache_cookie *old;
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
- old = vcookie->fscache;
+ old = v9inode->fscache;
- spin_lock(&vcookie->lock);
- fscache_relinquish_cookie(vcookie->fscache, 1);
+ spin_lock(&v9inode->fscache_lock);
+ fscache_relinquish_cookie(v9inode->fscache, 1);
v9ses = v9fs_inode2v9ses(inode);
- vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- vcookie);
-
+ v9inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
- inode, old, vcookie->fscache);
+ inode, old, v9inode->fscache);
- spin_unlock(&vcookie->lock);
+ spin_unlock(&v9inode->fscache_lock);
}
int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
{
struct inode *inode = page->mapping->host;
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- BUG_ON(!vcookie->fscache);
+ BUG_ON(!v9inode->fscache);
- return fscache_maybe_release_page(vcookie->fscache, page, gfp);
+ return fscache_maybe_release_page(v9inode->fscache, page, gfp);
}
void __v9fs_fscache_invalidate_page(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- BUG_ON(!vcookie->fscache);
+ BUG_ON(!v9inode->fscache);
if (PageFsCache(page)) {
- fscache_wait_on_page_write(vcookie->fscache, page);
+ fscache_wait_on_page_write(v9inode->fscache, page);
BUG_ON(!PageLocked(page));
- fscache_uncache_page(vcookie->fscache, page);
+ fscache_uncache_page(v9inode->fscache, page);
}
}
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data,
int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return -ENOBUFS;
- ret = fscache_read_or_alloc_page(vcookie->fscache,
+ ret = fscache_read_or_alloc_page(v9inode->fscache,
page,
v9fs_vfs_readpage_complete,
NULL,
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
unsigned *nr_pages)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return -ENOBUFS;
- ret = fscache_read_or_alloc_pages(vcookie->fscache,
+ ret = fscache_read_or_alloc_pages(v9inode->fscache,
mapping, pages, nr_pages,
v9fs_vfs_readpage_complete,
NULL,
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
- ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+ ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
if (ret != 0)
v9fs_uncache_page(inode, page);
}
+
+/*
+ * wait for a page to complete writing to the cache
+ */
+void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
+{
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ if (PageFsCache(page))
+ fscache_wait_on_page_write(v9inode->fscache, page);
+}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index a94192b..049507a 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -25,20 +25,6 @@
#include <linux/fscache.h>
#include <linux/spinlock.h>
-extern struct kmem_cache *vcookie_cache;
-
-struct v9fs_cookie {
- spinlock_t lock;
- struct inode inode;
- struct fscache_cookie *fscache;
- struct p9_qid *qid;
-};
-
-static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
-{
- return container_of(inode, struct v9fs_cookie, inode);
-}
-
extern struct fscache_netfs v9fs_cache_netfs;
extern const struct fscache_cookie_def v9fs_cache_session_index_def;
extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode,
struct list_head *pages,
unsigned *nr_pages);
extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
-
-
-/**
- * v9fs_cache_register - Register v9fs file system with the cache
- */
-static inline int v9fs_cache_register(void)
-{
- return __v9fs_cache_register();
-}
-
-/**
- * v9fs_cache_unregister - Unregister v9fs from the cache
- */
-static inline void v9fs_cache_unregister(void)
-{
- __v9fs_cache_unregister();
-}
+extern void __v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page);
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp)
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
- fscache_uncache_page(vcookie->fscache, page);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ fscache_uncache_page(v9inode->fscache, page);
BUG_ON(PageFsCache(page));
}
-static inline void v9fs_vcookie_set_qid(struct inode *inode,
+static inline void v9fs_fscache_set_key(struct inode *inode,
struct p9_qid *qid)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
- spin_lock(&vcookie->lock);
- vcookie->qid = qid;
- spin_unlock(&vcookie->lock);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ spin_lock(&v9inode->fscache_lock);
+ v9inode->fscache_key = qid;
+ spin_unlock(&v9inode->fscache_lock);
}
-#else /* CONFIG_9P_FSCACHE */
-
-static inline int v9fs_cache_register(void)
+static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
{
- return 1;
+ return __v9fs_fscache_wait_on_page_write(inode, page);
}
-static inline void v9fs_cache_unregister(void) {}
+#else /* CONFIG_9P_FSCACHE */
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp) {
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
{}
-static inline void v9fs_vcookie_set_qid(struct inode *inode,
- struct p9_qid *qid)
-{}
+static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
+{
+ return;
+}
#endif /* CONFIG_9P_FSCACHE */
#endif /* _9P_CACHE_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b00223c..cd63e00 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -125,46 +125,17 @@ err_out:
return -ENOMEM;
}
-/**
- * v9fs_fid_lookup - lookup for a fid, try to walk if not found
- * @dentry: dentry to look for fid in
- *
- * Look for a fid in the specified dentry for the current user.
- * If no fid is found, try to create one walking from a fid from the parent
- * dentry (if it has one), or the root dentry. If the user haven't accessed
- * the fs yet, attach now and walk from the root.
- */
-
-struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
+static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
+ uid_t uid, int any)
{
- int i, n, l, clone, any, access;
- u32 uid;
- struct p9_fid *fid, *old_fid = NULL;
struct dentry *ds;
- struct v9fs_session_info *v9ses;
char **wnames, *uname;
+ int i, n, l, clone, access;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid, *old_fid = NULL;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
access = v9ses->flags & V9FS_ACCESS_MASK;
- switch (access) {
- case V9FS_ACCESS_SINGLE:
- case V9FS_ACCESS_USER:
- case V9FS_ACCESS_CLIENT:
- uid = current_fsuid();
- any = 0;
- break;
-
- case V9FS_ACCESS_ANY:
- uid = v9ses->uid;
- any = 1;
- break;
-
- default:
- uid = ~0;
- any = 0;
- break;
- }
-
fid = v9fs_fid_find(dentry, uid, any);
if (fid)
return fid;
@@ -250,6 +221,45 @@ err_out:
return fid;
}
+/**
+ * v9fs_fid_lookup - lookup for a fid, try to walk if not found
+ * @dentry: dentry to look for fid in
+ *
+ * Look for a fid in the specified dentry for the current user.
+ * If no fid is found, try to create one walking from a fid from the parent
+ * dentry (if it has one), or the root dentry. If the user haven't accessed
+ * the fs yet, attach now and walk from the root.
+ */
+
+struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
+{
+ uid_t uid;
+ int any, access;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ access = v9ses->flags & V9FS_ACCESS_MASK;
+ switch (access) {
+ case V9FS_ACCESS_SINGLE:
+ case V9FS_ACCESS_USER:
+ case V9FS_ACCESS_CLIENT:
+ uid = current_fsuid();
+ any = 0;
+ break;
+
+ case V9FS_ACCESS_ANY:
+ uid = v9ses->uid;
+ any = 1;
+ break;
+
+ default:
+ uid = ~0;
+ any = 0;
+ break;
+ }
+ return v9fs_fid_lookup_with_uid(dentry, uid, any);
+}
+
struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
{
struct p9_fid *fid, *ret;
@@ -261,3 +271,39 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
ret = p9_client_walk(fid, 0, NULL, 1);
return ret;
}
+
+static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
+{
+ struct p9_fid *fid, *ret;
+
+ fid = v9fs_fid_lookup_with_uid(dentry, uid, 0);
+ if (IS_ERR(fid))
+ return fid;
+
+ ret = p9_client_walk(fid, 0, NULL, 1);
+ return ret;
+}
+
+struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *fid;
+
+ fid = v9fs_fid_clone_with_uid(dentry, 0);
+ if (IS_ERR(fid))
+ goto error_out;
+ /*
+ * writeback fid will only be used to write back the
+ * dirty pages. We always request for the open fid in read-write
+ * mode so that a partial page write which result in page
+ * read can work.
+ */
+ err = p9_client_open(fid, O_RDWR);
+ if (err < 0) {
+ p9_client_clunk(fid);
+ fid = ERR_PTR(err);
+ goto error_out;
+ }
+error_out:
+ return fid;
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index c3bbd6a..bb0b6e7 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -19,7 +19,8 @@
* Boston, MA 02111-1301 USA
*
*/
-
+#ifndef FS_9P_FID_H
+#define FS_9P_FID_H
#include <linux/list.h>
/**
@@ -45,3 +46,5 @@ struct v9fs_dentry {
struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
+struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
+#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f77cd3..c82b017 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,6 +39,7 @@
static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
static LIST_HEAD(v9fs_sessionlist);
+struct kmem_cache *v9fs_inode_cache;
/*
* Option Parsing (code inspired by NFS code)
@@ -55,7 +56,7 @@ enum {
/* Cache options */
Opt_cache_loose, Opt_fscache,
/* Access options */
- Opt_access,
+ Opt_access, Opt_posixacl,
/* Error token */
Opt_err
};
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
{Opt_fscache, "fscache"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
+ {Opt_posixacl, "posixacl"},
{Opt_err, NULL}
};
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
else if (strcmp(s, "any") == 0)
v9ses->flags |= V9FS_ACCESS_ANY;
else if (strcmp(s, "client") == 0) {
-#ifdef CONFIG_9P_FS_POSIX_ACL
v9ses->flags |= V9FS_ACCESS_CLIENT;
-#else
- P9_DPRINTK(P9_DEBUG_ERROR,
- "access=client option not supported\n");
- kfree(s);
- ret = -EINVAL;
- goto free_and_return;
-#endif
} else {
v9ses->flags |= V9FS_ACCESS_SINGLE;
v9ses->uid = simple_strtoul(s, &e, 10);
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
kfree(s);
break;
+ case Opt_posixacl:
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ v9ses->flags |= V9FS_POSIX_ACL;
+#else
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "Not defined CONFIG_9P_FS_POSIX_ACL. "
+ "Ignoring posixacl option\n");
+#endif
+ break;
+
default:
continue;
}
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
list_add(&v9ses->slist, &v9fs_sessionlist);
spin_unlock(&v9fs_sessionlist_lock);
- v9ses->flags = V9FS_ACCESS_USER;
strcpy(v9ses->uname, V9FS_DEFUSER);
strcpy(v9ses->aname, V9FS_DEFANAME);
v9ses->uid = ~0;
v9ses->dfltuid = V9FS_DEFUID;
v9ses->dfltgid = V9FS_DEFGID;
- rc = v9fs_parse_options(v9ses, data);
- if (rc < 0) {
- retval = rc;
- goto error;
- }
-
v9ses->clnt = p9_client_create(dev_name, data);
if (IS_ERR(v9ses->clnt)) {
retval = PTR_ERR(v9ses->clnt);
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
goto error;
}
- if (p9_is_proto_dotl(v9ses->clnt))
+ v9ses->flags = V9FS_ACCESS_USER;
+
+ if (p9_is_proto_dotl(v9ses->clnt)) {
+ v9ses->flags = V9FS_ACCESS_CLIENT;
v9ses->flags |= V9FS_PROTO_2000L;
- else if (p9_is_proto_dotu(v9ses->clnt))
+ } else if (p9_is_proto_dotu(v9ses->clnt)) {
v9ses->flags |= V9FS_PROTO_2000U;
+ }
+
+ rc = v9fs_parse_options(v9ses, data);
+ if (rc < 0) {
+ retval = rc;
+ goto error;
+ }
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->flags |= V9FS_ACCESS_ANY;
v9ses->uid = ~0;
}
+ if (!v9fs_proto_dotl(v9ses) ||
+ !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
+ /*
+ * We support ACL checks on clinet only if the protocol is
+ * 9P2000.L and access is V9FS_ACCESS_CLIENT.
+ */
+ v9ses->flags &= ~V9FS_ACL_MASK;
+ }
fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
v9ses->aname);
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void)
kobject_put(v9fs_kobj);
}
+static void v9fs_inode_init_once(void *foo)
+{
+ struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
+#ifdef CONFIG_9P_FSCACHE
+ v9inode->fscache = NULL;
+ v9inode->fscache_key = NULL;
+#endif
+ inode_init_once(&v9inode->vfs_inode);
+}
+
+/**
+ * v9fs_init_inode_cache - initialize a cache for 9P
+ * Returns 0 on success.
+ */
+static int v9fs_init_inode_cache(void)
+{
+ v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
+ sizeof(struct v9fs_inode),
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
+ v9fs_inode_init_once);
+ if (!v9fs_inode_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * v9fs_destroy_inode_cache - destroy the cache of 9P inode
+ *
+ */
+static void v9fs_destroy_inode_cache(void)
+{
+ kmem_cache_destroy(v9fs_inode_cache);
+}
+
+static int v9fs_cache_register(void)
+{
+ int ret;
+ ret = v9fs_init_inode_cache();
+ if (ret < 0)
+ return ret;
+#ifdef CONFIG_9P_FSCACHE
+ return fscache_register_netfs(&v9fs_cache_netfs);
+#else
+ return ret;
+#endif
+}
+
+static void v9fs_cache_unregister(void)
+{
+ v9fs_destroy_inode_cache();
+#ifdef CONFIG_9P_FSCACHE
+ fscache_unregister_netfs(&v9fs_cache_netfs);
+#endif
+}
+
/**
* init_v9fs - Initialize module
*
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c4b5d88..bd8496d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,9 @@
* Boston, MA 02111-1301 USA
*
*/
+#ifndef FS_9P_V9FS_H
+#define FS_9P_V9FS_H
+
#include <linux/backing-dev.h>
/**
@@ -28,8 +31,10 @@
* @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
* @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
* @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
+ * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client.
* @V9FS_ACCESS_ANY: use a single attach for all users
* @V9FS_ACCESS_MASK: bit mask of different ACCESS options
+ * @V9FS_POSIX_ACL: POSIX ACLs are enforced
*
* Session flags reflect options selected by users at mount time
*/
@@ -37,13 +42,15 @@
V9FS_ACCESS_USER | \
V9FS_ACCESS_CLIENT)
#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
+#define V9FS_ACL_MASK V9FS_POSIX_ACL
enum p9_session_flags {
V9FS_PROTO_2000U = 0x01,
V9FS_PROTO_2000L = 0x02,
V9FS_ACCESS_SINGLE = 0x04,
V9FS_ACCESS_USER = 0x08,
- V9FS_ACCESS_CLIENT = 0x10
+ V9FS_ACCESS_CLIENT = 0x10,
+ V9FS_POSIX_ACL = 0x20
};
/* possible values of ->cache */
@@ -109,8 +116,28 @@ struct v9fs_session_info {
struct list_head slist; /* list of sessions registered with v9fs */
struct backing_dev_info bdi;
struct rw_semaphore rename_sem;
+ struct p9_fid *root_fid; /* Used for file system sync */
+};
+
+/* cache_validity flags */
+#define V9FS_INO_INVALID_ATTR 0x01
+
+struct v9fs_inode {
+#ifdef CONFIG_9P_FSCACHE
+ spinlock_t fscache_lock;
+ struct fscache_cookie *fscache;
+ struct p9_qid *fscache_key;
+#endif
+ unsigned int cache_validity;
+ struct p9_fid *writeback_fid;
+ struct inode vfs_inode;
};
+static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
+{
+ return container_of(inode, struct v9fs_inode, vfs_inode);
+}
+
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
char *);
extern void v9fs_session_close(struct v9fs_session_info *v9ses);
@@ -124,16 +151,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
void *p);
-extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses,
- struct p9_fid *fid,
- struct super_block *sb);
-
+extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
-extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses,
- struct p9_fid *fid,
- struct super_block *sb);
+extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
/* other default globals */
#define V9FS_PORT 564
@@ -158,7 +184,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
}
/**
- * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * v9fs_get_inode_from_fid - Helper routine to populate an inode by
* issuing a attribute request
* @v9ses: session information
* @fid: fid to issue attribute request for
@@ -166,11 +192,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
*
*/
static inline struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_inode_dotl(v9ses, fid, sb);
+ return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
else
- return v9fs_inode(v9ses, fid, sb);
+ return v9fs_inode_from_fid(v9ses, fid, sb);
}
+#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b789f8e..4014160 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -20,6 +20,8 @@
* Boston, MA 02111-1301 USA
*
*/
+#ifndef FS_9P_V9FS_VFS_H
+#define FS_9P_V9FS_VFS_H
/* plan9 semantics are that created files are implicitly opened.
* But linux semantics are that you call create, then open.
@@ -36,6 +38,7 @@
* unlink calls remove, which is an implicit clunk. So we have to track
* that kind of thing so that we don't try to clunk a dead fid.
*/
+#define P9_LOCK_TIMEOUT (30*HZ)
extern struct file_system_type v9fs_fs_type;
extern const struct address_space_operations v9fs_addr_operations;
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations;
extern const struct file_operations v9fs_dir_operations_dotl;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
+extern const struct file_operations v9fs_cached_file_operations;
+extern const struct file_operations v9fs_cached_file_operations_dotl;
+extern struct kmem_cache *v9fs_inode_cache;
-#ifdef CONFIG_9P_FSCACHE
struct inode *v9fs_alloc_inode(struct super_block *sb);
void v9fs_destroy_inode(struct inode *inode);
-#endif
-
struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+int v9fs_init_inode(struct v9fs_session_info *v9ses,
+ struct inode *inode, int mode);
void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
int v9fs_uflags2omode(int uflags, int extended);
ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
+ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
int v9fs_file_fsync_dotl(struct file *filp, int datasync);
-
-#define P9_LOCK_TIMEOUT (30*HZ)
+ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
+ const char __user *, size_t, loff_t *, int);
+int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
+int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
+static inline void v9fs_invalidate_inode_attr(struct inode *inode)
+{
+ struct v9fs_inode *v9inode;
+ v9inode = V9FS_I(inode);
+ v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
+ return;
+}
+#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b7f2a8e..2524e4c 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -39,16 +39,16 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "cache.h"
+#include "fid.h"
/**
- * v9fs_vfs_readpage - read an entire page in from 9P
+ * v9fs_fid_readpage - read an entire page in from 9P
*
- * @filp: file being read
+ * @fid: fid being read
* @page: structure to page
*
*/
-
-static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
{
int retval;
loff_t offset;
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
buffer = kmap(page);
offset = page_offset(page);
- retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
+ retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
if (retval < 0) {
v9fs_uncache_page(inode, page);
goto done;
@@ -87,6 +87,19 @@ done:
}
/**
+ * v9fs_vfs_readpage - read an entire page in from 9P
+ *
+ * @filp: file being read
+ * @page: structure to page
+ *
+ */
+
+static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+{
+ return v9fs_fid_readpage(filp->private_data, page);
+}
+
+/**
* v9fs_vfs_readpages - read a set of pages from 9P
*
* @filp: file being read
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
{
if (PagePrivate(page))
return 0;
-
return v9fs_fscache_release_page(page, gfp);
}
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
static void v9fs_invalidate_page(struct page *page, unsigned long offset)
{
+ /*
+ * If called with zero offset, we should release
+ * the private state assocated with the page
+ */
if (offset == 0)
v9fs_fscache_invalidate_page(page);
}
+static int v9fs_vfs_writepage_locked(struct page *page)
+{
+ char *buffer;
+ int retval, len;
+ loff_t offset, size;
+ mm_segment_t old_fs;
+ struct v9fs_inode *v9inode;
+ struct inode *inode = page->mapping->host;
+
+ v9inode = V9FS_I(inode);
+ size = i_size_read(inode);
+ if (page->index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+
+ set_page_writeback(page);
+
+ buffer = kmap(page);
+ offset = page_offset(page);
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* We should have writeback_fid always set */
+ BUG_ON(!v9inode->writeback_fid);
+
+ retval = v9fs_file_write_internal(inode,
+ v9inode->writeback_fid,
+ (__force const char __user *)buffer,
+ len, &offset, 0);
+ if (retval > 0)
+ retval = 0;
+
+ set_fs(old_fs);
+ kunmap(page);
+ end_page_writeback(page);
+ return retval;
+}
+
+static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int retval;
+
+ retval = v9fs_vfs_writepage_locked(page);
+ if (retval < 0) {
+ if (retval == -EAGAIN) {
+ redirty_page_for_writepage(wbc, page);
+ retval = 0;
+ } else {
+ SetPageError(page);
+ mapping_set_error(page->mapping, retval);
+ }
+ } else
+ retval = 0;
+
+ unlock_page(page);
+ return retval;
+}
+
/**
* v9fs_launder_page - Writeback a dirty page
- * Since the writes go directly to the server, we simply return a 0
- * here to indicate success.
- *
* Returns 0 on success.
*/
static int v9fs_launder_page(struct page *page)
{
+ int retval;
+ struct inode *inode = page->mapping->host;
+
+ v9fs_fscache_wait_on_page_write(inode, page);
+ if (clear_page_dirty_for_io(page)) {
+ retval = v9fs_vfs_writepage_locked(page);
+ if (retval)
+ return retval;
+ }
return 0;
}
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page)
* with an error.
*
*/
-ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t pos, unsigned long nr_segs)
+static ssize_t
+v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t pos, unsigned long nr_segs)
{
+ /*
+ * FIXME
+ * Now that we do caching with cache mode enabled, We need
+ * to support direct IO
+ */
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
"off/no(%lld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
return -EINVAL;
}
+
+static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ int retval = 0;
+ struct page *page;
+ struct v9fs_inode *v9inode;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct inode *inode = mapping->host;
+
+ v9inode = V9FS_I(inode);
+start:
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ BUG_ON(!v9inode->writeback_fid);
+ if (PageUptodate(page))
+ goto out;
+
+ if (len == PAGE_CACHE_SIZE)
+ goto out;
+
+ retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
+ page_cache_release(page);
+ if (!retval)
+ goto start;
+out:
+ *pagep = page;
+ return retval;
+}
+
+static int v9fs_write_end(struct file *filp, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ loff_t last_pos = pos + copied;
+ struct inode *inode = page->mapping->host;
+
+ if (unlikely(copied < len)) {
+ /*
+ * zero out the rest of the area
+ */
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+ zero_user(page, from + copied, len - copied);
+ flush_dcache_page(page);
+ }
+
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ /*
+ * No need to use i_size_read() here, the i_size
+ * cannot change under us because we hold the i_mutex.
+ */
+ if (last_pos > inode->i_size) {
+ inode_add_bytes(inode, last_pos - inode->i_size);
+ i_size_write(inode, last_pos);
+ }
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ return copied;
+}
+
+
const struct address_space_operations v9fs_addr_operations = {
- .readpage = v9fs_vfs_readpage,
- .readpages = v9fs_vfs_readpages,
- .releasepage = v9fs_release_page,
- .invalidatepage = v9fs_invalidate_page,
- .launder_page = v9fs_launder_page,
- .direct_IO = v9fs_direct_IO,
+ .readpage = v9fs_vfs_readpage,
+ .readpages = v9fs_vfs_readpages,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+ .writepage = v9fs_vfs_writepage,
+ .write_begin = v9fs_write_begin,
+ .write_end = v9fs_write_end,
+ .releasepage = v9fs_release_page,
+ .invalidatepage = v9fs_invalidate_page,
+ .launder_page = v9fs_launder_page,
+ .direct_IO = v9fs_direct_IO,
};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 233b7d4..b6a3b9f 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
* v9fs_cached_dentry_delete - called when dentry refcount equals 0
* @dentry: dentry in question
*
- * Only return 1 if our inode is invalid. Only non-synthetic files
- * (ones without mtime == 0) should be calling this function.
- *
*/
-
static int v9fs_cached_dentry_delete(const struct dentry *dentry)
{
- struct inode *inode = dentry->d_inode;
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
- dentry);
+ P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
+ dentry->d_name.name, dentry);
- if(!inode)
+ /* Don't cache negative dentries */
+ if (!dentry->d_inode)
return 1;
-
return 0;
}
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry)
}
}
+static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct p9_fid *fid;
+ struct inode *inode;
+ struct v9fs_inode *v9inode;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ if (!inode)
+ goto out_valid;
+
+ v9inode = V9FS_I(inode);
+ if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
+ int retval;
+ struct v9fs_session_info *v9ses;
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9fs_proto_dotl(v9ses))
+ retval = v9fs_refresh_inode_dotl(fid, inode);
+ else
+ retval = v9fs_refresh_inode(fid, inode);
+ if (retval <= 0)
+ return retval;
+ }
+out_valid:
+ return 1;
+}
+
const struct dentry_operations v9fs_cached_dentry_operations = {
+ .d_revalidate = v9fs_lookup_revalidate,
.d_delete = v9fs_cached_dentry_delete,
.d_release = v9fs_dentry_release,
};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b84ebe8..9c2bdda 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
P9_DPRINTK(P9_DEBUG_VFS,
"v9fs_dir_release: inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
- filemap_write_and_wait(inode->i_mapping);
if (fid)
p9_client_clunk(fid);
return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 240c306..78bcb97 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -44,8 +44,7 @@
#include "fid.h"
#include "cache.h"
-static const struct file_operations v9fs_cached_file_operations;
-static const struct file_operations v9fs_cached_file_operations_dotl;
+static const struct vm_operations_struct v9fs_file_vm_ops;
/**
* v9fs_file_open - open a file (or directory)
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl;
int v9fs_file_open(struct inode *inode, struct file *file)
{
int err;
+ struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
int omode;
P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
+ v9inode = V9FS_I(inode);
v9ses = v9fs_inode2v9ses(inode);
if (v9fs_proto_dotl(v9ses))
omode = file->f_flags;
@@ -89,20 +90,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}
file->private_data = fid;
- if ((fid->qid.version) && (v9ses->cache)) {
- P9_DPRINTK(P9_DEBUG_VFS, "cached");
- /* enable cached file options */
- if(file->f_op == &v9fs_file_operations)
- file->f_op = &v9fs_cached_file_operations;
- else if (file->f_op == &v9fs_file_operations_dotl)
- file->f_op = &v9fs_cached_file_operations_dotl;
-
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ fid = v9fs_writeback_fid(file->f_path.dentry);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ goto out_error;
+ }
+ v9inode->writeback_fid = (void *) fid;
+ }
#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
#endif
- }
-
return 0;
+out_error:
+ p9_client_clunk(file->private_data);
+ file->private_data = NULL;
+ return err;
}
/**
@@ -335,25 +346,22 @@ out_err:
}
/**
- * v9fs_file_readn - read from a file
- * @filp: file pointer to read
+ * v9fs_fid_readn - read from a fid
+ * @fid: fid to read
* @data: data buffer to read data into
* @udata: user data buffer to read data into
* @count: size of buffer
* @offset: offset at which to read data
*
*/
-
ssize_t
-v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
u64 offset)
{
int n, total, size;
- struct p9_fid *fid = filp->private_data;
P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
- (long long unsigned) offset, count);
-
+ (long long unsigned) offset, count);
n = 0;
total = 0;
size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -379,6 +387,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
}
/**
+ * v9fs_file_readn - read from a file
+ * @filp: file pointer to read
+ * @data: data buffer to read data into
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+ssize_t
+v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+ u64 offset)
+{
+ return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
+}
+
+/**
* v9fs_file_read - read from a file
* @filp: file pointer to read
* @udata: user data buffer to read data into
@@ -410,45 +434,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
return ret;
}
-/**
- * v9fs_file_write - write to a file
- * @filp: file pointer to write
- * @data: data buffer to write data from
- * @count: size of buffer
- * @offset: offset at which to write data
- *
- */
-
-static ssize_t
-v9fs_file_write(struct file *filp, const char __user * data,
- size_t count, loff_t * offset)
+ssize_t
+v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
+ const char __user *data, size_t count,
+ loff_t *offset, int invalidate)
{
- ssize_t retval;
- size_t total = 0;
int n;
- struct p9_fid *fid;
+ loff_t i_size;
+ size_t total = 0;
struct p9_client *clnt;
- struct inode *inode = filp->f_path.dentry->d_inode;
loff_t origin = *offset;
unsigned long pg_start, pg_end;
P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
(int)count, (int)*offset);
- fid = filp->private_data;
clnt = fid->clnt;
-
- retval = generic_write_checks(filp, &origin, &count, 0);
- if (retval)
- goto out;
-
- retval = -EINVAL;
- if ((ssize_t) count < 0)
- goto out;
- retval = 0;
- if (!count)
- goto out;
-
do {
n = p9_client_write(fid, NULL, data+total, origin+total, count);
if (n <= 0)
@@ -457,25 +458,60 @@ v9fs_file_write(struct file *filp, const char __user * data,
total += n;
} while (count > 0);
- if (total > 0) {
+ if (invalidate && (total > 0)) {
pg_start = origin >> PAGE_CACHE_SHIFT;
pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
if (inode->i_mapping && inode->i_mapping->nrpages)
invalidate_inode_pages2_range(inode->i_mapping,
pg_start, pg_end);
*offset += total;
- i_size_write(inode, i_size_read(inode) + total);
- inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ i_size = i_size_read(inode);
+ if (*offset > i_size) {
+ inode_add_bytes(inode, *offset - i_size);
+ i_size_write(inode, *offset);
+ }
}
-
if (n < 0)
- retval = n;
- else
- retval = total;
+ return n;
+
+ return total;
+}
+
+/**
+ * v9fs_file_write - write to a file
+ * @filp: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+static ssize_t
+v9fs_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offset)
+{
+ ssize_t retval = 0;
+ loff_t origin = *offset;
+
+
+ retval = generic_write_checks(filp, &origin, &count, 0);
+ if (retval)
+ goto out;
+
+ retval = -EINVAL;
+ if ((ssize_t) count < 0)
+ goto out;
+ retval = 0;
+ if (!count)
+ goto out;
+
+ return v9fs_file_write_internal(filp->f_path.dentry->d_inode,
+ filp->private_data,
+ data, count, offset, 1);
out:
return retval;
}
+
static int v9fs_file_fsync(struct file *filp, int datasync)
{
struct p9_fid *fid;
@@ -505,28 +541,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync)
return retval;
}
-static const struct file_operations v9fs_cached_file_operations = {
+static int
+v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int retval;
+
+ retval = generic_file_mmap(file, vma);
+ if (!retval)
+ vma->vm_ops = &v9fs_file_vm_ops;
+
+ return retval;
+}
+
+static int
+v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct v9fs_inode *v9inode;
+ struct page *page = vmf->page;
+ struct file *filp = vma->vm_file;
+ struct inode *inode = filp->f_path.dentry->d_inode;
+
+
+ P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
+ page, (unsigned long)filp->private_data);
+
+ v9inode = V9FS_I(inode);
+ /* make sure the cache has finished storing the page */
+ v9fs_fscache_wait_on_page_write(inode, page);
+ BUG_ON(!v9inode->writeback_fid);
+ lock_page(page);
+ if (page->mapping != inode->i_mapping)
+ goto out_unlock;
+
+ return VM_FAULT_LOCKED;
+out_unlock:
+ unlock_page(page);
+ return VM_FAULT_NOPAGE;
+}
+
+static ssize_t
+v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
+ loff_t *offsetp)
+{
+ loff_t size, offset;
+ struct inode *inode;
+ struct address_space *mapping;
+
+ offset = *offsetp;
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ if (!count)
+ return 0;
+ size = i_size_read(inode);
+ if (offset < size)
+ filemap_write_and_wait_range(mapping, offset,
+ offset + count - 1);
+
+ return v9fs_file_read(filp, udata, count, offsetp);
+}
+
+/**
+ * v9fs_cached_file_read - read from a file
+ * @filp: file pointer to read
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+static ssize_t
+v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
+ loff_t *offset)
+{
+ if (filp->f_flags & O_DIRECT)
+ return v9fs_direct_read(filp, data, count, offset);
+ return do_sync_read(filp, data, count, offset);
+}
+
+static ssize_t
+v9fs_direct_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offsetp)
+{
+ loff_t offset;
+ ssize_t retval;
+ struct inode *inode;
+ struct address_space *mapping;
+
+ offset = *offsetp;
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ if (!count)
+ return 0;
+
+ mutex_lock(&inode->i_mutex);
+ retval = filemap_write_and_wait_range(mapping, offset,
+ offset + count - 1);
+ if (retval)
+ goto err_out;
+ /*
+ * After a write we want buffered reads to be sure to go to disk to get
+ * the new data. We invalidate clean cached page from the region we're
+ * about to write. We do this *before* the write so that if we fail
+ * here we fall back to buffered write
+ */
+ if (mapping->nrpages) {
+ pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+
+ retval = invalidate_inode_pages2_range(mapping,
+ pg_start, pg_end);
+ /*
+ * If a page can not be invalidated, fall back
+ * to buffered write.
+ */
+ if (retval) {
+ if (retval == -EBUSY)
+ goto buff_write;
+ goto err_out;
+ }
+ }
+ retval = v9fs_file_write(filp, data, count, offsetp);
+err_out:
+ mutex_unlock(&inode->i_mutex);
+ return retval;
+
+buff_write:
+ mutex_unlock(&inode->i_mutex);
+ return do_sync_write(filp, data, count, offsetp);
+}
+
+/**
+ * v9fs_cached_file_write - write to a file
+ * @filp: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+static ssize_t
+v9fs_cached_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offset)
+{
+
+ if (filp->f_flags & O_DIRECT)
+ return v9fs_direct_write(filp, data, count, offset);
+ return do_sync_write(filp, data, count, offset);
+}
+
+static const struct vm_operations_struct v9fs_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = v9fs_vm_page_mkwrite,
+};
+
+
+const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
+ .read = v9fs_cached_file_read,
+ .write = v9fs_cached_file_write,
.aio_read = generic_file_aio_read,
- .write = v9fs_file_write,
+ .aio_write = generic_file_aio_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
- .mmap = generic_file_readonly_mmap,
+ .mmap = v9fs_file_mmap,
.fsync = v9fs_file_fsync,
};
-static const struct file_operations v9fs_cached_file_operations_dotl = {
+const struct file_operations v9fs_cached_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
+ .read = v9fs_cached_file_read,
+ .write = v9fs_cached_file_write,
.aio_read = generic_file_aio_read,
- .write = v9fs_file_write,
+ .aio_write = generic_file_aio_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
- .mmap = generic_file_readonly_mmap,
+ .mmap = v9fs_file_mmap,
.fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b76a40b..8a2c232 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -203,26 +203,25 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
wstat->extension = NULL;
}
-#ifdef CONFIG_9P_FSCACHE
/**
* v9fs_alloc_inode - helper function to allocate an inode
- * This callback is executed before setting up the inode so that we
- * can associate a vcookie with each inode.
*
*/
-
struct inode *v9fs_alloc_inode(struct super_block *sb)
{
- struct v9fs_cookie *vcookie;
- vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
- GFP_KERNEL);
- if (!vcookie)
+ struct v9fs_inode *v9inode;
+ v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache,
+ GFP_KERNEL);
+ if (!v9inode)
return NULL;
-
- vcookie->fscache = NULL;
- vcookie->qid = NULL;
- spin_lock_init(&vcookie->lock);
- return &vcookie->inode;
+#ifdef CONFIG_9P_FSCACHE
+ v9inode->fscache = NULL;
+ v9inode->fscache_key = NULL;
+ spin_lock_init(&v9inode->fscache_lock);
+#endif
+ v9inode->writeback_fid = NULL;
+ v9inode->cache_validity = 0;
+ return &v9inode->vfs_inode;
}
/**
@@ -234,35 +233,18 @@ static void v9fs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+ kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
}
void v9fs_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, v9fs_i_callback);
}
-#endif
-/**
- * v9fs_get_inode - helper function to setup an inode
- * @sb: superblock
- * @mode: mode to setup inode with
- *
- */
-
-struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+int v9fs_init_inode(struct v9fs_session_info *v9ses,
+ struct inode *inode, int mode)
{
- int err;
- struct inode *inode;
- struct v9fs_session_info *v9ses = sb->s_fs_info;
-
- P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
-
- inode = new_inode(sb);
- if (!inode) {
- P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
- return ERR_PTR(-ENOMEM);
- }
+ int err = 0;
inode_init_owner(inode, NULL, mode);
inode->i_blocks = 0;
@@ -292,14 +274,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
case S_IFREG:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- inode->i_fop = &v9fs_file_operations_dotl;
+ if (v9ses->cache)
+ inode->i_fop =
+ &v9fs_cached_file_operations_dotl;
+ else
+ inode->i_fop = &v9fs_file_operations_dotl;
} else {
inode->i_op = &v9fs_file_inode_operations;
- inode->i_fop = &v9fs_file_operations;
+ if (v9ses->cache)
+ inode->i_fop = &v9fs_cached_file_operations;
+ else
+ inode->i_fop = &v9fs_file_operations;
}
break;
-
case S_IFLNK:
if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
@@ -335,12 +323,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
err = -EINVAL;
goto error;
}
+error:
+ return err;
- return inode;
+}
-error:
- iput(inode);
- return ERR_PTR(err);
+/**
+ * v9fs_get_inode - helper function to setup an inode
+ * @sb: superblock
+ * @mode: mode to setup inode with
+ *
+ */
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+{
+ int err;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+
+ inode = new_inode(sb);
+ if (!inode) {
+ P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ err = v9fs_init_inode(v9ses, inode, mode);
+ if (err) {
+ iput(inode);
+ return ERR_PTR(err);
+ }
+ return inode;
}
/*
@@ -403,6 +416,8 @@ error:
*/
void v9fs_evict_inode(struct inode *inode)
{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+
truncate_inode_pages(inode->i_mapping, 0);
end_writeback(inode);
filemap_fdatawrite(inode->i_mapping);
@@ -410,41 +425,67 @@ void v9fs_evict_inode(struct inode *inode)
#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_put_cookie(inode);
#endif
+ /* clunk the fid stashed in writeback_fid */
+ if (v9inode->writeback_fid) {
+ p9_client_clunk(v9inode->writeback_fid);
+ v9inode->writeback_fid = NULL;
+ }
}
-struct inode *
-v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+static struct inode *v9fs_qid_iget(struct super_block *sb,
+ struct p9_qid *qid,
+ struct p9_wstat *st)
{
- int err, umode;
- struct inode *ret = NULL;
- struct p9_wstat *st;
-
- st = p9_client_stat(fid);
- if (IS_ERR(st))
- return ERR_CAST(st);
+ int retval, umode;
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+ i_ino = v9fs_qid2ino(qid);
+ inode = iget_locked(sb, i_ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+ /*
+ * initialize the inode with the stat info
+ * FIXME!! we may need support for stale inodes
+ * later.
+ */
umode = p9mode2unixmode(v9ses, st->mode);
- ret = v9fs_get_inode(sb, umode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
+ retval = v9fs_init_inode(v9ses, inode, umode);
+ if (retval)
goto error;
- }
-
- v9fs_stat2inode(st, ret, sb);
- ret->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode(st, inode, sb);
#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
+ v9fs_fscache_set_key(inode, &st->qid);
+ v9fs_cache_inode_get_cookie(inode);
#endif
- p9stat_free(st);
- kfree(st);
- return ret;
+ unlock_new_inode(inode);
+ return inode;
error:
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(retval);
+
+}
+
+struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct p9_wstat *st;
+ struct inode *inode = NULL;
+
+ st = p9_client_stat(fid);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ inode = v9fs_qid_iget(sb, &st->qid, st);
p9stat_free(st);
kfree(st);
- return ERR_PTR(err);
+ return inode;
}
/**
@@ -458,8 +499,8 @@ error:
static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
{
int retval;
- struct inode *file_inode;
struct p9_fid *v9fid;
+ struct inode *file_inode;
P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
rmdir);
@@ -470,8 +511,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
return PTR_ERR(v9fid);
retval = p9_client_remove(v9fid);
- if (!retval)
- drop_nlink(file_inode);
+ if (!retval) {
+ /*
+ * directories on unlink should have zero
+ * link count
+ */
+ if (rmdir) {
+ clear_nlink(file_inode);
+ drop_nlink(dir);
+ } else
+ drop_nlink(file_inode);
+
+ v9fs_invalidate_inode_attr(file_inode);
+ v9fs_invalidate_inode_attr(dir);
+ }
return retval;
}
@@ -531,7 +584,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -570,9 +623,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
int err;
u32 perm;
int flags;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
struct file *filp;
+ struct v9fs_inode *v9inode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid, *inode_fid;
err = 0;
fid = NULL;
@@ -592,8 +646,25 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
/* if we are opening a file, assign the open fid to the file */
if (nd && nd->flags & LOOKUP_OPEN) {
+ v9inode = V9FS_I(dentry->d_inode);
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ inode_fid = v9fs_writeback_fid(dentry);
+ if (IS_ERR(inode_fid)) {
+ err = PTR_ERR(inode_fid);
+ goto error;
+ }
+ v9inode->writeback_fid = (void *) inode_fid;
+ }
filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
@@ -601,6 +672,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
}
filp->private_data = fid;
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
+ v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
+#endif
} else
p9_client_clunk(fid);
@@ -625,8 +700,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
int err;
u32 perm;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct v9fs_session_info *v9ses;
P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
err = 0;
@@ -636,6 +711,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
fid = NULL;
+ } else {
+ inc_nlink(dir);
+ v9fs_invalidate_inode_attr(dir);
}
if (fid)
@@ -687,7 +765,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(result);
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
inode = NULL;
@@ -747,17 +825,19 @@ int
v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
+ int retval;
struct inode *old_inode;
+ struct inode *new_inode;
struct v9fs_session_info *v9ses;
struct p9_fid *oldfid;
struct p9_fid *olddirfid;
struct p9_fid *newdirfid;
struct p9_wstat wstat;
- int retval;
P9_DPRINTK(P9_DEBUG_VFS, "\n");
retval = 0;
old_inode = old_dentry->d_inode;
+ new_inode = new_dentry->d_inode;
v9ses = v9fs_inode2v9ses(old_inode);
oldfid = v9fs_fid_lookup(old_dentry);
if (IS_ERR(oldfid))
@@ -798,9 +878,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = p9_client_wstat(oldfid, &wstat);
clunk_newdir:
- if (!retval)
+ if (!retval) {
+ if (new_inode) {
+ if (S_ISDIR(new_inode->i_mode))
+ clear_nlink(new_inode);
+ else
+ drop_nlink(new_inode);
+ /*
+ * Work around vfs rename rehash bug with
+ * FS_RENAME_DOES_D_MOVE
+ */
+ v9fs_invalidate_inode_attr(new_inode);
+ }
+ if (S_ISDIR(old_inode->i_mode)) {
+ if (!new_inode)
+ inc_nlink(new_dir);
+ drop_nlink(old_dir);
+ }
+ v9fs_invalidate_inode_attr(old_inode);
+ v9fs_invalidate_inode_attr(old_dir);
+ v9fs_invalidate_inode_attr(new_dir);
+
/* successful rename */
d_move(old_dentry, new_dentry);
+ }
up_write(&v9ses->rename_sem);
p9_client_clunk(newdirfid);
@@ -831,9 +932,10 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+ }
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -891,17 +993,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (iattr->ia_valid & ATTR_GID)
wstat.n_gid = iattr->ia_gid;
}
-
- retval = p9_client_wstat(fid, &wstat);
- if (retval < 0)
- return retval;
-
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(dentry->d_inode)) {
retval = vmtruncate(dentry->d_inode, iattr->ia_size);
if (retval)
return retval;
}
+ /* Write all dirty data */
+ if (S_ISREG(dentry->d_inode->i_mode))
+ filemap_write_and_wait(dentry->d_inode->i_mapping);
+
+ retval = p9_client_wstat(fid, &wstat);
+ if (retval < 0)
+ return retval;
+ v9fs_invalidate_inode_attr(dentry->d_inode);
setattr_copy(dentry->d_inode, iattr);
mark_inode_dirty(dentry->d_inode);
@@ -924,6 +1029,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
char tag_name[14];
unsigned int i_nlink;
struct v9fs_session_info *v9ses = sb->s_fs_info;
+ struct v9fs_inode *v9inode = V9FS_I(inode);
inode->i_nlink = 1;
@@ -983,6 +1089,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
/* not real number of blocks, but 512 byte ones ... */
inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
/**
@@ -1115,8 +1222,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
int mode, const char *extension)
{
u32 perm;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(dir);
if (!v9fs_proto_dotu(v9ses)) {
@@ -1130,6 +1237,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
if (IS_ERR(fid))
return PTR_ERR(fid);
+ v9fs_invalidate_inode_attr(dir);
p9_client_clunk(fid);
return 0;
}
@@ -1166,8 +1274,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int retval;
- struct p9_fid *oldfid;
char *name;
+ struct p9_fid *oldfid;
P9_DPRINTK(P9_DEBUG_VFS,
" %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
@@ -1186,7 +1294,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
sprintf(name, "%d\n", oldfid->fid);
retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
__putname(name);
-
+ if (!retval) {
+ v9fs_refresh_inode(oldfid, old_dentry->d_inode);
+ v9fs_invalidate_inode_attr(dir);
+ }
clunk_fid:
p9_client_clunk(oldfid);
return retval;
@@ -1237,6 +1348,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
+int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
+{
+ loff_t i_size;
+ struct p9_wstat *st;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ st = p9_client_stat(fid);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We don't want to refresh inode->i_size,
+ * because we may have cached data
+ */
+ i_size = inode->i_size;
+ v9fs_stat2inode(st, inode, inode->i_sb);
+ if (v9ses->cache)
+ inode->i_size = i_size;
+ spin_unlock(&inode->i_lock);
+ p9stat_free(st);
+ kfree(st);
+ return 0;
+}
+
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index fe3ffa9..67c138e 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
return dentry;
}
+static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
+ struct p9_qid *qid,
+ struct p9_fid *fid,
+ struct p9_stat_dotl *st)
+{
+ int retval;
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ i_ino = v9fs_qid2ino(qid);
+ inode = iget_locked(sb, i_ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+ /*
+ * initialize the inode with the stat info
+ * FIXME!! we may need support for stale inodes
+ * later.
+ */
+ retval = v9fs_init_inode(v9ses, inode, st->st_mode);
+ if (retval)
+ goto error;
+
+ v9fs_stat2inode_dotl(st, inode);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_fscache_set_key(inode, &st->qid);
+ v9fs_cache_inode_get_cookie(inode);
+#endif
+ retval = v9fs_get_acl(inode, fid);
+ if (retval)
+ goto error;
+
+ unlock_new_inode(inode);
+ return inode;
+error:
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(retval);
+
+}
+
struct inode *
-v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
{
- struct inode *ret = NULL;
- int err;
struct p9_stat_dotl *st;
+ struct inode *inode = NULL;
st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
if (IS_ERR(st))
return ERR_CAST(st);
- ret = v9fs_get_inode(sb, st->st_mode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
- goto error;
- }
-
- v9fs_stat2inode_dotl(st, ret);
- ret->i_ino = v9fs_qid2ino(&st->qid);
-#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
-#endif
- err = v9fs_get_acl(ret, fid);
- if (err) {
- iput(ret);
- goto error;
- }
- kfree(st);
- return ret;
-error:
+ inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
kfree(st);
- return ERR_PTR(err);
+ return inode;
}
/**
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
struct nameidata *nd)
{
int err = 0;
- char *name = NULL;
gid_t gid;
int flags;
mode_t mode;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL;
- struct p9_fid *dfid, *ofid;
+ char *name = NULL;
struct file *filp;
struct p9_qid qid;
struct inode *inode;
+ struct p9_fid *fid = NULL;
+ struct v9fs_inode *v9inode;
+ struct p9_fid *dfid, *ofid, *inode_fid;
+ struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
v9ses = v9fs_inode2v9ses(dir);
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
err);
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
/* instantiate inode and assign the unopened fid to the dentry */
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
fid = NULL;
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -219,6 +244,22 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
/* Now set the ACL based on the default value */
v9fs_set_create_acl(dentry, dacl, pacl);
+ v9inode = V9FS_I(inode);
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ inode_fid = v9fs_writeback_fid(dentry);
+ if (IS_ERR(inode_fid)) {
+ err = PTR_ERR(inode_fid);
+ goto error;
+ }
+ v9inode->writeback_fid = (void *) inode_fid;
+ }
/* Since we are opening a file, assign the open fid to the file */
filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
@@ -226,6 +267,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
return PTR_ERR(filp);
}
filp->private_data = ofid;
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
+ v9fs_cache_inode_set_cookie(inode, filp);
+#endif
return 0;
error:
@@ -300,7 +345,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -327,7 +372,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
}
/* Now set the ACL based on the default value */
v9fs_set_create_acl(dentry, dacl, pacl);
-
+ inc_nlink(dir);
+ v9fs_invalidate_inode_attr(dir);
error:
if (fid)
p9_client_clunk(fid);
@@ -346,9 +392,10 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+ }
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -406,16 +453,20 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
if (IS_ERR(fid))
return PTR_ERR(fid);
- retval = p9_client_setattr(fid, &p9attr);
- if (retval < 0)
- return retval;
-
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(dentry->d_inode)) {
retval = vmtruncate(dentry->d_inode, iattr->ia_size);
if (retval)
return retval;
}
+ /* Write all dirty data */
+ if (S_ISREG(dentry->d_inode->i_mode))
+ filemap_write_and_wait(dentry->d_inode->i_mapping);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval < 0)
+ return retval;
+ v9fs_invalidate_inode_attr(dentry->d_inode);
setattr_copy(dentry->d_inode, iattr);
mark_inode_dirty(dentry->d_inode);
@@ -439,6 +490,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
void
v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
inode->i_atime.tv_sec = stat->st_atime_sec;
@@ -497,20 +549,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
/* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
* because the inode structure does not have fields for them.
*/
+ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
static int
v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct v9fs_session_info *v9ses;
- struct p9_fid *dfid;
- struct p9_fid *fid = NULL;
- struct inode *inode;
- struct p9_qid qid;
- char *name;
int err;
gid_t gid;
+ char *name;
+ struct p9_qid qid;
+ struct inode *inode;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct v9fs_session_info *v9ses;
name = (char *) dentry->d_name.name;
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
@@ -534,6 +587,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
if (v9ses->cache) {
/* Now walk from the parent so we can get an unopened fid. */
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -546,7 +600,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
}
/* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -588,10 +642,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int err;
- struct p9_fid *dfid, *oldfid;
char *name;
- struct v9fs_session_info *v9ses;
struct dentry *dir_dentry;
+ struct p9_fid *dfid, *oldfid;
+ struct v9fs_session_info *v9ses;
P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
dir->i_ino, old_dentry->d_name.name,
@@ -616,29 +670,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
return err;
}
+ v9fs_invalidate_inode_attr(dir);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
/* Get the latest stat info from server. */
struct p9_fid *fid;
- struct p9_stat_dotl *st;
-
fid = v9fs_fid_lookup(old_dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
- if (IS_ERR(st))
- return PTR_ERR(st);
-
- v9fs_stat2inode_dotl(st, old_dentry->d_inode);
-
- kfree(st);
- } else {
- /* Caching disabled. No need to get upto date stat info.
- * This dentry will be released immediately. So, just hold the
- * inode
- */
- ihold(old_dentry->d_inode);
+ v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
}
+ ihold(old_dentry->d_inode);
d_instantiate(dentry, old_dentry->d_inode);
return err;
@@ -657,12 +699,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
dev_t rdev)
{
int err;
+ gid_t gid;
char *name;
mode_t mode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
struct inode *inode;
- gid_t gid;
struct p9_qid qid;
struct dentry *dir_dentry;
struct posix_acl *dacl = NULL, *pacl = NULL;
@@ -699,6 +741,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
if (err < 0)
goto error;
+ v9fs_invalidate_inode_attr(dir);
/* instantiate inode and assign the unopened fid to the dentry */
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -710,7 +753,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -782,6 +825,31 @@ ndset:
return NULL;
}
+int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
+{
+ loff_t i_size;
+ struct p9_stat_dotl *st;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We don't want to refresh inode->i_size,
+ * because we may have cached data
+ */
+ i_size = inode->i_size;
+ v9fs_stat2inode_dotl(st, inode);
+ if (v9ses->cache)
+ inode->i_size = i_size;
+ spin_unlock(&inode->i_lock);
+ kfree(st);
+ return 0;
+}
+
const struct inode_operations v9fs_dir_inode_operations_dotl = {
.create = v9fs_vfs_create_dotl,
.lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index dbaabe3..09fd08d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
} else
sb->s_op = &v9fs_super_ops;
sb->s_bdi = &v9ses->bdi;
+ if (v9ses->cache)
+ sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
- sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
- MS_NOATIME;
+ sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
+ if (!v9ses->cache)
+ sb->s_flags |= MS_SYNCHRONOUS;
#ifdef CONFIG_9P_FS_POSIX_ACL
- if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
+ if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
sb->s_flags |= MS_POSIXACL;
#endif
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(inode);
goto release_sb;
}
-
root = d_alloc_root(inode);
if (!root) {
iput(inode);
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(st);
goto release_sb;
}
-
+ root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
v9fs_stat2inode_dotl(st, root->d_inode);
kfree(st);
} else {
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
p9stat_free(st);
kfree(st);
}
+ v9fs_fid_add(root, fid);
retval = v9fs_get_acl(inode, fid);
if (retval)
goto release_sb;
- v9fs_fid_add(root, fid);
+ /*
+ * Add the root fid to session info. This is used
+ * for file system sync. We want a cloned fid here
+ * so that we can do a sync_filesystem after a
+ * shrink_dcache_for_umount
+ */
+ v9ses->root_fid = v9fs_fid_clone(root);
+ if (IS_ERR(v9ses->root_fid)) {
+ retval = PTR_ERR(v9ses->root_fid);
+ goto release_sb;
+ }
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
return dget(sb->s_root);
@@ -197,15 +210,11 @@ close_session:
v9fs_session_close(v9ses);
kfree(v9ses);
return ERR_PTR(retval);
-
release_sb:
/*
- * we will do the session_close and root dentry release
- * in the below call. But we need to clunk fid, because we haven't
- * attached the fid to dentry so it won't get clunked
- * automatically.
+ * we will do the session_close and root dentry
+ * release in the below call.
*/
- p9_client_clunk(fid);
deactivate_locked_super(sb);
return ERR_PTR(retval);
}
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s)
P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
kill_anon_super(s);
-
+ p9_client_clunk(v9ses->root_fid);
v9fs_session_cancel(v9ses);
v9fs_session_close(v9ses);
kfree(v9ses);
@@ -276,11 +285,31 @@ done:
return res;
}
+static int v9fs_sync_fs(struct super_block *sb, int wait)
+{
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
+ return p9_client_sync_fs(v9ses->root_fid);
+}
+
+static int v9fs_drop_inode(struct inode *inode)
+{
+ struct v9fs_session_info *v9ses;
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9ses->cache)
+ return generic_drop_inode(inode);
+ /*
+ * in case of non cached mode always drop the
+ * the inode because we want the inode attribute
+ * to always match that on the server.
+ */
+ return 1;
+}
+
static const struct super_operations v9fs_super_ops = {
-#ifdef CONFIG_9P_FSCACHE
.alloc_inode = v9fs_alloc_inode,
.destroy_inode = v9fs_destroy_inode,
-#endif
.statfs = simple_statfs,
.evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = {
};
static const struct super_operations v9fs_super_ops_dotl = {
-#ifdef CONFIG_9P_FSCACHE
.alloc_inode = v9fs_alloc_inode,
.destroy_inode = v9fs_destroy_inode,
-#endif
+ .sync_fs = v9fs_sync_fs,
.statfs = v9fs_statfs,
+ .drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
.umount_begin = v9fs_umount_begin,
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = {
.mount = v9fs_mount,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
- .fs_flags = FS_RENAME_DOES_D_MOVE,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
};
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa..7cb53aa 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
def_bool n
config EXPORTFS
- tristate
+ bool
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef..ba01202 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_FHANDLE) += fhandle.o
+
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb..789b3af 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
candidate->first = candidate->last = index;
candidate->offset_first = from;
candidate->to_last = to;
+ INIT_LIST_HEAD(&candidate->link);
candidate->usage = 1;
candidate->state = AFS_WBACK_PENDING;
init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3..7f54f43 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
- aio_wq = create_workqueue("aio");
+ aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
BUG_ON(!aio_wq || !abe_pool);
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
-#define get_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- atomic_inc(&(kioctx)->users); \
-} while (0)
-#define put_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
- __put_ioctx(kioctx); \
-} while (0)
+static inline void get_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ atomic_inc(&kioctx->users);
+}
+
+static inline int try_get_ioctx(struct kioctx *kioctx)
+{
+ return atomic_inc_not_zero(&kioctx->users);
+}
+
+static inline void put_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ if (unlikely(atomic_dec_and_test(&kioctx->users)))
+ __put_ioctx(kioctx);
+}
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -569,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
- queue_work(aio_wq, &fput_work);
+ schedule_work(&fput_work);
} else {
req->ki_filp = NULL;
really_put_req(ctx, req);
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
rcu_read_lock();
hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
- if (ctx->user_id == ctx_id && !ctx->dead) {
- get_ioctx(ctx);
+ /*
+ * RCU protects us against accessing freed memory but
+ * we have to be careful not to get a reference when the
+ * reference count already dropped to 0 (ctx->dead test
+ * is unreliable because of races).
+ */
+ if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
ret = ctx;
break;
}
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
spin_lock_irq(&ctx->ctx_lock);
+ /*
+ * We could have raced with io_destroy() and are currently holding a
+ * reference to ctx which should be destroyed. We cannot submit IO
+ * since ctx gets freed as soon as io_submit() puts its reference. The
+ * check here is reliable: io_destroy() sets ctx->dead before waiting
+ * for outstanding IO and the barrier between these two is realized by
+ * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
+ * increment ctx->reqs_active before checking for ctx->dead and the
+ * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
+ * don't see ctx->dead set here, io_destroy() waits for our IO to
+ * finish.
+ */
+ if (ctx->dead) {
+ spin_unlock_irq(&ctx->ctx_lock);
+ ret = -EINVAL;
+ goto out_put_req;
+ }
aio_run_iocb(req);
if (!list_empty(&ctx->run_list)) {
/* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb..8892870 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
if (ret)
goto out_del;
+ /*
+ * bdev could be deleted beneath us which would implicitly destroy
+ * the holder directory. Hold on to it.
+ */
+ kobject_get(bdev->bd_part->holder_dir);
list_add(&holder->list, &bdev->bd_holder_disks);
goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
del_symlink(bdev->bd_part->holder_dir,
&disk_to_dev(disk)->kobj);
+ kobject_put(bdev->bd_part->holder_dir);
list_del_init(&holder->list);
kfree(holder);
}
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
* flush_disk - invalidates all buffer-cache entries on a disk
*
* @bdev: struct block device to be flushed
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Invalidates all buffer-cache entries on a disk. It should be called
* when a disk has been changed -- either by a media change or online
* resize.
*/
-static void flush_disk(struct block_device *bdev)
+static void flush_disk(struct block_device *bdev, bool kill_dirty)
{
- if (__invalidate_device(bdev)) {
+ if (__invalidate_device(bdev, kill_dirty)) {
char name[BDEVNAME_SIZE] = "";
if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
"%s: detected capacity change from %lld to %lld\n",
name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
- flush_disk(bdev);
+ flush_disk(bdev, false);
}
}
EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return 0;
- flush_disk(bdev);
+ flush_disk(bdev, true);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
return 1;
@@ -1215,12 +1222,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
res = __blkdev_get(bdev, mode, 0);
- /* __blkdev_get() may alter read only status, check it afterwards */
- if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
- __blkdev_put(bdev, mode, 0);
- res = -EACCES;
- }
-
if (whole) {
/* finish claiming */
mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1299,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
if (err)
return ERR_PTR(err);
+ if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ blkdev_put(bdev, mode);
+ return ERR_PTR(-EACCES);
+ }
+
return bdev;
}
EXPORT_SYMBOL(blkdev_get_by_path);
@@ -1601,7 +1607,7 @@ fail:
}
EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev)
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
{
struct super_block *sb = get_super(bdev);
int res = 0;
@@ -1614,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
* hold).
*/
shrink_dcache_sb(sb);
- res = invalidate_inodes(sb);
+ res = invalidate_inodes(sb, kill_dirty);
drop_super(sb);
}
invalidate_bdev(bdev);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15b5ca2..9c94934 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
char *value = NULL;
struct posix_acl *acl;
+ if (!IS_POSIXACL(inode))
+ return NULL;
+
acl = get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED)
return acl;
@@ -84,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
struct posix_acl *acl;
int ret = 0;
+ if (!IS_POSIXACL(dentry->d_inode))
+ return -EOPNOTSUPP;
+
acl = btrfs_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f745287..4d2110e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -562,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
- int ret;
+ int ret = -ENOMEM;
u32 *sums;
tree = &BTRFS_I(inode)->io_tree;
@@ -577,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
+ if (!cb)
+ goto out;
+
atomic_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
@@ -597,13 +600,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE;
- cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages,
+ cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
GFP_NOFS);
+ if (!cb->compressed_pages)
+ goto fail1;
+
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
for (page_index = 0; page_index < nr_pages; page_index++) {
cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
__GFP_HIGHMEM);
+ if (!cb->compressed_pages[page_index])
+ goto fail2;
}
cb->nr_pages = nr_pages;
@@ -614,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = uncompressed_len;
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
+ if (!comp_bio)
+ goto fail2;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
atomic_inc(&cb->pending_bios);
@@ -681,6 +691,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_put(comp_bio);
return 0;
+
+fail2:
+ for (page_index = 0; page_index < nr_pages; page_index++)
+ free_page((unsigned long)cb->compressed_pages[page_index]);
+
+ kfree(cb->compressed_pages);
+fail1:
+ kfree(cb);
+out:
+ free_extent_map(em);
+ return ret;
}
static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
@@ -900,7 +921,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
return ret;
}
-void __exit btrfs_exit_compress(void)
+void btrfs_exit_compress(void)
{
free_workspaces();
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3a..7f78cc7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ /*
+ * we bump reservation progress every time we decrement
+ * bytes_reserved. This way people waiting for reservations
+ * know something good has happened and they can check
+ * for progress. The number here isn't to be trusted, it
+ * just shows reclaim activity
+ */
+ unsigned long reservation_progress;
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b531c36..e1aa8d6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE)
+ if (page->private == EXTENT_PAGE_PRIVATE) {
+ WARN_ON(1);
goto out;
- if (!page->private)
+ }
+ if (!page->private) {
+ WARN_ON(1);
goto out;
+ }
len = page->private >> 2;
WARN_ON(len == 0);
@@ -1550,6 +1554,7 @@ static int transaction_kthread(void *arg)
spin_unlock(&root->fs_info->new_trans_lock);
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
if (transid == trans->transid) {
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
@@ -2453,10 +2458,14 @@ int btrfs_commit_super(struct btrfs_root *root)
up_write(&root->fs_info->cleanup_work_sem);
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
/* run commit again to drop the original snapshot */
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
btrfs_commit_transaction(trans, root);
ret = btrfs_write_and_wait_transaction(NULL, root);
BUG_ON(ret);
@@ -2554,6 +2563,8 @@ int close_ctree(struct btrfs_root *root)
kfree(fs_info->chunk_root);
kfree(fs_info->dev_root);
kfree(fs_info->csum_root);
+ kfree(fs_info);
+
return 0;
}
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9786963..b4ffad8 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
int len = *max_len;
int type;
- if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
- (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+ if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE;
return 255;
+ } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
+ *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+ return 255;
+ }
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
@@ -171,6 +175,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
int ret;
path = btrfs_alloc_path();
+ if (!path)
+ return ERR_PTR(-ENOMEM);
if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = root->root_key.objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b552693..7b3089b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,11 +320,6 @@ static int caching_kthread(void *data)
if (!path)
return -ENOMEM;
- exclude_super_stripes(extent_root, block_group);
- spin_lock(&block_group->space_info->lock);
- block_group->space_info->bytes_readonly += block_group->bytes_super;
- spin_unlock(&block_group->space_info->lock);
-
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
/*
@@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
cache->cached = BTRFS_CACHE_NO;
}
spin_unlock(&cache->lock);
- if (ret == 1)
+ if (ret == 1) {
+ free_excluded_extents(fs_info->extent_root, cache);
return 0;
+ }
}
if (load_cache_only)
@@ -3344,21 +3341,24 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
- int pause = 1;
+ long time_left;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+ int loops = 0;
+ unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
+ progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
max_reclaim = min(reserved, to_reclaim);
- while (1) {
+ while (loops < 1024) {
/* have the flusher threads jump in and do some IO */
smp_mb();
nr_pages = min_t(unsigned long, nr_pages,
@@ -3371,17 +3371,31 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
+ loops++;
+
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(pause);
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
+ time_left = schedule_timeout_interruptible(1);
+
+ /* We were interrupted, exit */
+ if (time_left)
+ break;
+
+ /* we've kicked the IO a few times, if anything has been freed,
+ * exit. There is no sense in looping here for a long time
+ * when we really need to commit the transaction, or there are
+ * just too many writers without enough free space
+ */
+
+ if (loops > 3) {
+ smp_mb();
+ if (progress != space_info->reservation_progress)
+ break;
+ }
}
return reclaimed >= to_reclaim;
@@ -3588,10 +3602,23 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes > 0) {
if (dest) {
- block_rsv_add_bytes(dest, num_bytes, 0);
- } else {
+ spin_lock(&dest->lock);
+ if (!dest->full) {
+ u64 bytes_to_add;
+
+ bytes_to_add = dest->size - dest->reserved;
+ bytes_to_add = min(num_bytes, bytes_to_add);
+ dest->reserved += bytes_to_add;
+ if (dest->reserved >= dest->size)
+ dest->full = 1;
+ num_bytes -= bytes_to_add;
+ }
+ spin_unlock(&dest->lock);
+ }
+ if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -3824,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
+ sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -3985,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret)
@@ -4012,6 +4039,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize);
atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+ WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
spin_lock(&BTRFS_I(inode)->accounting_lock);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
@@ -4112,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
@@ -4163,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -4213,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -4691,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
+ cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
@@ -5355,7 +5387,7 @@ again:
num_bytes, data, 1);
goto again;
}
- if (ret == -ENOSPC) {
+ if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
@@ -5633,6 +5665,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize)
{
struct btrfs_block_rsv *block_rsv;
+ struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
int ret;
block_rsv = get_block_rsv(trans, root);
@@ -5640,14 +5673,39 @@ use_block_rsv(struct btrfs_trans_handle *trans,
if (block_rsv->size == 0) {
ret = reserve_metadata_bytes(trans, root, block_rsv,
blocksize, 0);
- if (ret)
+ /*
+ * If we couldn't reserve metadata bytes try and use some from
+ * the global reserve.
+ */
+ if (ret && block_rsv != global_rsv) {
+ ret = block_rsv_use_bytes(global_rsv, blocksize);
+ if (!ret)
+ return global_rsv;
+ return ERR_PTR(ret);
+ } else if (ret) {
return ERR_PTR(ret);
+ }
return block_rsv;
}
ret = block_rsv_use_bytes(block_rsv, blocksize);
if (!ret)
return block_rsv;
+ if (ret) {
+ WARN_ON(1);
+ ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
+ 0);
+ if (!ret) {
+ spin_lock(&block_rsv->lock);
+ block_rsv->size += blocksize;
+ spin_unlock(&block_rsv->lock);
+ return block_rsv;
+ } else if (ret && block_rsv != global_rsv) {
+ ret = block_rsv_use_bytes(global_rsv, blocksize);
+ if (!ret)
+ return global_rsv;
+ }
+ }
return ERR_PTR(-ENOSPC);
}
@@ -6221,6 +6279,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
BUG_ON(!wc);
trans = btrfs_start_transaction(tree_root, 0);
+ BUG_ON(IS_ERR(trans));
+
if (block_rsv)
trans->block_rsv = block_rsv;
@@ -6318,6 +6378,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_end_transaction_throttle(trans, tree_root);
trans = btrfs_start_transaction(tree_root, 0);
+ BUG_ON(IS_ERR(trans));
if (block_rsv)
trans->block_rsv = block_rsv;
}
@@ -6446,6 +6507,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start,
int ret = 0;
ra = kzalloc(sizeof(*ra), GFP_NOFS);
+ if (!ra)
+ return -ENOMEM;
mutex_lock(&inode->i_mutex);
first_index = start >> PAGE_CACHE_SHIFT;
@@ -6531,7 +6594,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
u64 end = start + extent_key->offset - 1;
em = alloc_extent_map(GFP_NOFS);
- BUG_ON(!em || IS_ERR(em));
+ BUG_ON(!em);
em->start = start;
em->len = extent_key->offset;
@@ -7477,7 +7540,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
BUG_ON(reloc_root->commit_root != NULL);
while (1) {
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
mutex_lock(&root->fs_info->drop_mutex);
ret = btrfs_drop_snapshot(trans, reloc_root);
@@ -7535,7 +7598,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
if (found) {
trans = btrfs_start_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
}
@@ -7779,7 +7842,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
trans = btrfs_start_transaction(extent_root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
if (extent_key->objectid == 0) {
ret = del_extent_zero(trans, extent_root, path, extent_key);
@@ -8013,6 +8076,13 @@ out:
return ret;
}
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type)
+{
+ u64 alloc_flags = get_alloc_profile(root, type);
+ return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
+
/*
* helper to account the unused space of all the readonly block group in the
* list. takes mirrors into account.
@@ -8270,6 +8340,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
if (block_group->cached == BTRFS_CACHE_STARTED)
wait_block_group_cache_done(block_group);
+ /*
+ * We haven't cached this block group, which means we could
+ * possibly have excluded extents on this block group.
+ */
+ if (block_group->cached == BTRFS_CACHE_NO)
+ free_excluded_extents(info->extent_root, block_group);
+
btrfs_remove_free_space_cache(block_group);
btrfs_put_block_group(block_group);
@@ -8385,6 +8462,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
cache->sectorsize = root->sectorsize;
/*
+ * We need to exclude the super stripes now so that the space
+ * info has super bytes accounted for, otherwise we'll think
+ * we have more space than we actually do.
+ */
+ exclude_super_stripes(root, cache);
+
+ /*
* check for two cases, either we are full, and therefore
* don't need to bother with the caching work since we won't
* find any space, or we are empty, and we can just add all
@@ -8392,12 +8476,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* time, particularly in the full case.
*/
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
- exclude_super_stripes(root, cache);
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
free_excluded_extents(root, cache);
} else if (btrfs_block_group_used(&cache->item) == 0) {
- exclude_super_stripes(root, cache);
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, root->fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e993cf..714adc4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits)
+ unsigned long bits, int contig)
{
struct rb_node *node;
struct extent_state *state;
u64 cur_start = *start;
u64 total_bytes = 0;
+ u64 last = 0;
int found = 0;
if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
state = rb_entry(node, struct extent_state, rb_node);
if (state->start > search_end)
break;
- if (state->end >= cur_start && (state->state & bits)) {
+ if (contig && found && state->start > last + 1)
+ break;
+ if (state->end >= cur_start && (state->state & bits) == bits) {
total_bytes += min(search_end, state->end) + 1 -
max(cur_start, state->start);
if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
*start = state->start;
found = 1;
}
+ last = state->end;
+ } else if (contig && found) {
+ break;
}
node = rb_next(node);
if (!node)
@@ -1865,7 +1871,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
bio_get(bio);
if (tree->ops && tree->ops->submit_bio_hook)
- tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+ ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
mirror_num, bio_flags, start);
else
submit_bio(rw, bio);
@@ -1920,6 +1926,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
nr = bio_get_nr_vecs(bdev);
bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+ if (!bio)
+ return -ENOMEM;
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
@@ -1944,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
static void set_page_extent_head(struct page *page, unsigned long len)
{
+ WARN_ON(!PagePrivate(page));
set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
}
@@ -2126,7 +2135,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
&bio_flags);
if (bio)
- submit_one_bio(READ, bio, 0, bio_flags);
+ ret = submit_one_bio(READ, bio, 0, bio_flags);
return ret;
}
@@ -2819,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
*/
- clear_extent_bit(tree, start, end,
+ ret = clear_extent_bit(tree, start, end,
~(EXTENT_LOCKED | EXTENT_NODATASUM),
0, 0, NULL, mask);
+
+ /* if clear_extent_bit failed for enomem reasons,
+ * we can't allow the release to continue.
+ */
+ if (ret < 0)
+ ret = 0;
+ else
+ ret = 1;
}
return ret;
}
@@ -2901,6 +2918,46 @@ out:
return sector;
}
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+ u64 offset,
+ u64 last,
+ get_extent_t *get_extent)
+{
+ u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+ struct extent_map *em;
+ u64 len;
+
+ if (offset >= last)
+ return NULL;
+
+ while(1) {
+ len = last - offset;
+ if (len == 0)
+ break;
+ len = (len + sectorsize - 1) & ~(sectorsize - 1);
+ em = get_extent(inode, NULL, 0, offset, len, 0);
+ if (!em || IS_ERR(em))
+ return em;
+
+ /* if this isn't a hole return it */
+ if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+ em->block_start != EXTENT_MAP_HOLE) {
+ return em;
+ }
+
+ /* this is a hole, advance to the next extent */
+ offset = extent_map_end(em);
+ free_extent_map(em);
+ if (offset >= last)
+ break;
+ }
+ return NULL;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -2910,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u32 flags = 0;
u32 found_type;
u64 last;
+ u64 last_for_get_extent = 0;
u64 disko = 0;
+ u64 isize = i_size_read(inode);
struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_file_extent_item *item;
int end = 0;
- u64 em_start = 0, em_len = 0;
+ u64 em_start = 0;
+ u64 em_len = 0;
+ u64 em_end = 0;
unsigned long emflags;
- int hole = 0;
if (len == 0)
return -EINVAL;
@@ -2929,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
+ /*
+ * lookup the last file extent. We're not using i_size here
+ * because there might be preallocation past i_size
+ */
ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
path, inode->i_ino, -1, 0);
if (ret < 0) {
@@ -2942,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
- /* No extents, just return */
+ /* No extents, but there might be delalloc bits */
if (found_key.objectid != inode->i_ino ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- btrfs_free_path(path);
- return 0;
+ /* have to trust i_size as the end */
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ } else {
+ /*
+ * remember the start of the last extent. There are a
+ * bunch of different factors that go into the length of the
+ * extent, so its much less complex to remember where it started
+ */
+ last = found_key.offset;
+ last_for_get_extent = last + 1;
}
- last = found_key.offset;
btrfs_free_path(path);
+ /*
+ * we might have some extents allocated but more delalloc past those
+ * extents. so, we trust isize unless the start of the last extent is
+ * beyond isize
+ */
+ if (last < isize) {
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ }
+
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
- em = get_extent(inode, NULL, 0, off, max - off, 0);
+
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@@ -2962,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- hole = 0;
- off = em->start + em->len;
- if (off >= max)
- end = 1;
+ u64 offset_in_extent;
- if (em->block_start == EXTENT_MAP_HOLE) {
- hole = 1;
- goto next;
- }
+ /* break if the extent we found is outside the range */
+ if (em->start >= max || extent_map_end(em) < off)
+ break;
- em_start = em->start;
- em_len = em->len;
+ /*
+ * get_extent may return an extent that starts before our
+ * requested range. We have to make sure the ranges
+ * we return to fiemap always move forward and don't
+ * overlap, so adjust the offsets here
+ */
+ em_start = max(em->start, off);
+ /*
+ * record the offset from the start of the extent
+ * for adjusting the disk offset below
+ */
+ offset_in_extent = em_start - em->start;
+ em_end = extent_map_end(em);
+ em_len = em_end - em_start;
+ emflags = em->flags;
disko = 0;
flags = 0;
+ /*
+ * bump off for our next call to get_extent
+ */
+ off = extent_map_end(em);
+ if (off >= max)
+ end = 1;
+
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
@@ -2988,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- disko = em->block_start;
+ disko = em->block_start + offset_in_extent;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
-next:
- emflags = em->flags;
free_extent_map(em);
em = NULL;
- if (!end) {
- em = get_extent(inode, NULL, 0, off, max - off, 0);
- if (!em)
- goto out;
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- emflags = em->flags;
- }
-
- if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+ if ((em_start >= last) || em_len == (u64)-1 ||
+ (last == (u64)-1 && isize <= em_end)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- if (em_start == last) {
+ /* now scan forward to see if this is really the last extent. */
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ if (!em) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
-
- if (!hole) {
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
- }
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
}
out_free:
free_extent_map(em);
@@ -3192,7 +3284,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
}
if (!PageUptodate(p))
uptodate = 0;
- unlock_page(p);
+
+ /*
+ * see below about how we avoid a nasty race with release page
+ * and why we unlock later
+ */
+ if (i != 0)
+ unlock_page(p);
}
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3216,9 +3314,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
+
+ /*
+ * there is a race where release page may have
+ * tried to find this extent buffer in the radix
+ * but failed. It will tell the VM it is safe to
+ * reclaim the, and it will clear the page private bit.
+ * We must make sure to set the page private bit properly
+ * after the extent buffer is in the radix tree so
+ * it doesn't get lost
+ */
+ set_page_extent_mapped(eb->first_page);
+ set_page_extent_head(eb->first_page, eb->len);
+ if (!page0)
+ unlock_page(eb->first_page);
return eb;
free_eb:
+ if (eb->first_page && !page0)
+ unlock_page(eb->first_page);
+
if (!atomic_dec_and_test(&eb->refs))
return exists;
btrfs_release_extent_buffer(eb);
@@ -3269,10 +3384,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
continue;
lock_page(page);
+ WARN_ON(!PagePrivate(page));
+
+ set_page_extent_mapped(page);
if (i == 0)
set_page_extent_head(page, eb->len);
- else
- set_page_private(page, EXTENT_PAGE_PRIVATE);
clear_page_dirty_for_io(page);
spin_lock_irq(&page->mapping->tree_lock);
@@ -3462,6 +3578,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
+
+ WARN_ON(!PagePrivate(page));
+
+ set_page_extent_mapped(page);
+ if (i == 0)
+ set_page_extent_head(page, eb->len);
+
if (inc_all_pages)
page_cache_get(page);
if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfa..9318dfe 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
- u64 max_bytes, unsigned long bits);
+ u64 max_bytes, unsigned long bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce..2b6c12e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
{
struct extent_map *em;
em = kmem_cache_alloc(extent_map_cache, mask);
- if (!em || IS_ERR(em))
- return em;
+ if (!em)
+ return NULL;
em->in_tree = 0;
em->flags = 0;
em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a562a25..4f19a3e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
root = root->fs_info->csum_root;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
while (1) {
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -548,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
if (path->slots[0] == 0)
goto out;
path->slots[0]--;
+ } else if (ret < 0) {
+ goto out;
}
+
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c800d58..f447b78 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */
flush_dcache_page(page);
+
+ /*
+ * if we get a partial write, we can end up with
+ * partially up to date pages. These add
+ * a lot of complexity, so make sure they don't
+ * happen by forcing this copy to be retried.
+ *
+ * The rest of the btrfs_file_write code will fall
+ * back to page at a time copies after we return 0.
+ */
+ if (!PageUptodate(page) && copied < count)
+ copied = 0;
+
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
@@ -186,6 +199,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split = alloc_extent_map(GFP_NOFS);
if (!split2)
split2 = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!split || !split2);
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
@@ -762,6 +776,27 @@ out:
}
/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+ int ret = 0;
+
+ if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ret;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
* this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be
* modified.
@@ -776,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
+ int faili = 0;
u64 start_pos;
u64 last_pos;
@@ -793,11 +829,24 @@ again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
+ faili = i - 1;
err = -ENOMEM;
- BUG_ON(1);
+ goto fail;
+ }
+
+ if (i == 0)
+ err = prepare_uptodate_page(pages[i], pos);
+ if (i == num_pages - 1)
+ err = prepare_uptodate_page(pages[i],
+ pos + write_bytes);
+ if (err) {
+ page_cache_release(pages[i]);
+ faili = i - 1;
+ goto fail;
}
wait_on_page_writeback(pages[i]);
}
+ err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -837,6 +886,14 @@ again:
WARN_ON(!PageLocked(pages[i]));
}
return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
}
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -846,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
@@ -867,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
- pinned[0] = NULL;
- pinned[1] = NULL;
-
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -946,6 +999,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
(sizeof(struct page *)));
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
+ if (!pages) {
+ ret = -ENOMEM;
+ goto out;
+ }
/* generic_write_checks can change our pos */
start_pos = pos;
@@ -953,39 +1010,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
- /*
- * there are lots of better ways to do this, but this code
- * makes sure the first and last page in the file range are
- * up to date and ready for cow
- */
- if ((pos & (PAGE_CACHE_SIZE - 1))) {
- pinned[0] = grab_cache_page(inode->i_mapping, first_index);
- if (!PageUptodate(pinned[0])) {
- ret = btrfs_readpage(NULL, pinned[0]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[0]);
- } else {
- unlock_page(pinned[0]);
- }
- }
- if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
- pinned[1] = grab_cache_page(inode->i_mapping, last_index);
- if (!PageUptodate(pinned[1])) {
- ret = btrfs_readpage(NULL, pinned[1]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[1]);
- } else {
- unlock_page(pinned[1]);
- }
- }
-
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(iov_iter_count(&i),
nrptrs * (size_t)PAGE_CACHE_SIZE -
offset);
- size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ size_t num_pages = (write_bytes + offset +
+ PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
@@ -1015,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+
+ /*
+ * if we have trouble faulting in the pages, fall
+ * back to one page at a time
+ */
+ if (copied < write_bytes)
+ nrptrs = 1;
+
+ if (copied == 0)
+ dirty_pages = 0;
+ else
+ dirty_pages = (copied + offset +
+ PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) {
if (copied > 0)
@@ -1060,10 +1103,6 @@ out:
err = ret;
kfree(pages);
- if (pinned[0])
- page_cache_release(pinned[0]);
- if (pinned[1])
- page_cache_release(pinned[1]);
*ppos = pos;
/*
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 60d6842..a039065 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
return entry;
}
-static void unlink_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_space *info)
+static inline void
+__unlink_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info)
{
rb_erase(&info->offset_index, &block_group->free_space_offset);
block_group->free_extents--;
+}
+
+static void unlink_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info)
+{
+ __unlink_free_space(block_group, info);
block_group->free_space -= info->bytes;
}
@@ -1016,14 +1023,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
u64 max_bytes;
u64 bitmap_bytes;
u64 extent_bytes;
+ u64 size = block_group->key.offset;
/*
* The goal is to keep the total amount of memory used per 1gb of space
* at or below 32k, so we need to adjust how much memory we allow to be
* used by extent based free space tracking
*/
- max_bytes = MAX_CACHE_BYTES_PER_GIG *
- (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
+ if (size < 1024 * 1024 * 1024)
+ max_bytes = MAX_CACHE_BYTES_PER_GIG;
+ else
+ max_bytes = MAX_CACHE_BYTES_PER_GIG *
+ div64_u64(size, 1024 * 1024 * 1024);
/*
* we want to account for 1 more bitmap than what we have so we can make
@@ -1171,6 +1182,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
recalculate_thresholds(block_group);
}
+static void free_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *bitmap_info)
+{
+ unlink_free_space(block_group, bitmap_info);
+ kfree(bitmap_info->bitmap);
+ kfree(bitmap_info);
+ block_group->total_bitmaps--;
+ recalculate_thresholds(block_group);
+}
+
static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *bitmap_info,
u64 *offset, u64 *bytes)
@@ -1195,6 +1216,7 @@ again:
*/
search_start = *offset;
search_bytes = *bytes;
+ search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(block_group, bitmap_info, &search_start,
&search_bytes);
BUG_ON(ret < 0 || search_start != *offset);
@@ -1211,13 +1233,8 @@ again:
if (*bytes) {
struct rb_node *next = rb_next(&bitmap_info->offset_index);
- if (!bitmap_info->bytes) {
- unlink_free_space(block_group, bitmap_info);
- kfree(bitmap_info->bitmap);
- kfree(bitmap_info);
- block_group->total_bitmaps--;
- recalculate_thresholds(block_group);
- }
+ if (!bitmap_info->bytes)
+ free_bitmap(block_group, bitmap_info);
/*
* no entry after this bitmap, but we still have bytes to
@@ -1250,13 +1267,8 @@ again:
return -EAGAIN;
goto again;
- } else if (!bitmap_info->bytes) {
- unlink_free_space(block_group, bitmap_info);
- kfree(bitmap_info->bitmap);
- kfree(bitmap_info);
- block_group->total_bitmaps--;
- recalculate_thresholds(block_group);
- }
+ } else if (!bitmap_info->bytes)
+ free_bitmap(block_group, bitmap_info);
return 0;
}
@@ -1359,22 +1371,14 @@ out:
return ret;
}
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes)
+bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info, bool update_stat)
{
- struct btrfs_free_space *right_info = NULL;
- struct btrfs_free_space *left_info = NULL;
- struct btrfs_free_space *info = NULL;
- int ret = 0;
-
- info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
- if (!info)
- return -ENOMEM;
-
- info->offset = offset;
- info->bytes = bytes;
-
- spin_lock(&block_group->tree_lock);
+ struct btrfs_free_space *left_info;
+ struct btrfs_free_space *right_info;
+ bool merged = false;
+ u64 offset = info->offset;
+ u64 bytes = info->bytes;
/*
* first we want to see if there is free space adjacent to the range we
@@ -1388,37 +1392,62 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
else
left_info = tree_search_offset(block_group, offset - 1, 0, 0);
- /*
- * If there was no extent directly to the left or right of this new
- * extent then we know we're going to have to allocate a new extent, so
- * before we do that see if we need to drop this into a bitmap
- */
- if ((!left_info || left_info->bitmap) &&
- (!right_info || right_info->bitmap)) {
- ret = insert_into_bitmap(block_group, info);
-
- if (ret < 0) {
- goto out;
- } else if (ret) {
- ret = 0;
- goto out;
- }
- }
-
if (right_info && !right_info->bitmap) {
- unlink_free_space(block_group, right_info);
+ if (update_stat)
+ unlink_free_space(block_group, right_info);
+ else
+ __unlink_free_space(block_group, right_info);
info->bytes += right_info->bytes;
kfree(right_info);
+ merged = true;
}
if (left_info && !left_info->bitmap &&
left_info->offset + left_info->bytes == offset) {
- unlink_free_space(block_group, left_info);
+ if (update_stat)
+ unlink_free_space(block_group, left_info);
+ else
+ __unlink_free_space(block_group, left_info);
info->offset = left_info->offset;
info->bytes += left_info->bytes;
kfree(left_info);
+ merged = true;
}
+ return merged;
+}
+
+int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+ u64 offset, u64 bytes)
+{
+ struct btrfs_free_space *info;
+ int ret = 0;
+
+ info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+ if (!info)
+ return -ENOMEM;
+
+ info->offset = offset;
+ info->bytes = bytes;
+
+ spin_lock(&block_group->tree_lock);
+
+ if (try_merge_free_space(block_group, info, true))
+ goto link;
+
+ /*
+ * There was no extent directly to the left or right of this new
+ * extent then we know we're going to have to allocate a new extent, so
+ * before we do that see if we need to drop this into a bitmap
+ */
+ ret = insert_into_bitmap(block_group, info);
+ if (ret < 0) {
+ goto out;
+ } else if (ret) {
+ ret = 0;
+ goto out;
+ }
+link:
ret = link_free_space(block_group, info);
if (ret)
kfree(info);
@@ -1621,6 +1650,7 @@ __btrfs_return_cluster_to_free_space(
node = rb_next(&entry->offset_index);
rb_erase(&entry->offset_index, &cluster->root);
BUG_ON(entry->bitmap);
+ try_merge_free_space(block_group, entry, false);
tree_insert_offset(&block_group->free_space_offset,
entry->offset, &entry->offset_index, 0);
}
@@ -1685,13 +1715,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
ret = offset;
if (entry->bitmap) {
bitmap_clear_bits(block_group, entry, offset, bytes);
- if (!entry->bytes) {
- unlink_free_space(block_group, entry);
- kfree(entry->bitmap);
- kfree(entry);
- block_group->total_bitmaps--;
- recalculate_thresholds(block_group);
- }
+ if (!entry->bytes)
+ free_bitmap(block_group, entry);
} else {
unlink_free_space(block_group, entry);
entry->offset += bytes;
@@ -1789,6 +1814,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
ret = search_start;
bitmap_clear_bits(block_group, entry, ret, bytes);
+ if (entry->bytes == 0)
+ free_bitmap(block_group, entry);
out:
spin_unlock(&cluster->lock);
spin_unlock(&block_group->tree_lock);
@@ -1842,15 +1869,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
entry->offset += bytes;
entry->bytes -= bytes;
- if (entry->bytes == 0) {
+ if (entry->bytes == 0)
rb_erase(&entry->offset_index, &cluster->root);
- kfree(entry);
- }
break;
}
out:
spin_unlock(&cluster->lock);
+ if (!ret)
+ return 0;
+
+ spin_lock(&block_group->tree_lock);
+
+ block_group->free_space -= bytes;
+ if (entry->bytes == 0) {
+ block_group->free_extents--;
+ kfree(entry);
+ }
+
+ spin_unlock(&block_group->tree_lock);
+
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 160b55b..4a0107e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -416,7 +416,7 @@ again:
}
if (start == 0) {
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -612,6 +612,7 @@ retry:
GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_reserve_extent(trans, root,
async_extent->compressed_size,
async_extent->compressed_size,
@@ -643,6 +644,7 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
@@ -771,7 +773,7 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(root == root->fs_info->tree_root);
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -819,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(ret);
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = start;
em->orig_start = em->start;
ram_size = ins.offset;
@@ -1049,7 +1052,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
} else {
trans = btrfs_join_transaction(root, 1);
}
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
cow_start = (u64)-1;
cur_offset = start;
@@ -1168,6 +1171,7 @@ out_check:
struct extent_map_tree *em_tree;
em_tree = &BTRFS_I(inode)->extent_tree;
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = cur_offset;
em->orig_start = em->start;
em->len = num_bytes;
@@ -1557,6 +1561,7 @@ out:
out_page:
unlock_page(page);
page_cache_release(page);
+ kfree(fixup);
}
/*
@@ -1703,7 +1708,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@@ -1720,6 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1907,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
private = 0;
if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY)) {
+ (u64)-1, 1, EXTENT_DIRTY, 0)) {
ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
start, &private_failure);
if (ret == 0) {
@@ -2354,6 +2360,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
*/
if (is_bad_inode(inode)) {
trans = btrfs_start_transaction(root, 0);
+ BUG_ON(IS_ERR(trans));
btrfs_orphan_del(trans, inode);
btrfs_end_transaction(trans, root);
iput(inode);
@@ -2381,6 +2388,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
if (root->orphan_block_rsv || root->orphan_item_inserted) {
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
btrfs_end_transaction(trans, root);
}
@@ -2641,7 +2649,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto err;
+ goto out;
}
path->leave_spinning = 1;
@@ -2714,9 +2722,10 @@ static int check_path_shared(struct btrfs_root *root,
struct extent_buffer *eb;
int level;
u64 refs = 1;
- int uninitialized_var(ret);
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ int ret;
+
if (!path->nodes[level])
break;
eb = path->nodes[level];
@@ -2727,7 +2736,7 @@ static int check_path_shared(struct btrfs_root *root,
if (refs > 1)
return 1;
}
- return ret; /* XXX callers? */
+ return 0;
}
/*
@@ -4134,7 +4143,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
}
srcu_read_unlock(&root->fs_info->subvol_srcu, index);
- if (root != sub_root) {
+ if (!IS_ERR(inode) && root != sub_root) {
down_read(&root->fs_info->cleanup_work_sem);
if (!(inode->i_sb->s_flags & MS_RDONLY))
btrfs_orphan_cleanup(sub_root);
@@ -4347,6 +4356,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
btrfs_set_trans_block_group(trans, inode);
if (nolock)
ret = btrfs_end_transaction_nolock(trans, root);
@@ -4372,6 +4383,7 @@ void btrfs_dirty_inode(struct inode *inode)
return;
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_update_inode(trans, root, inode);
@@ -4794,9 +4806,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
int err;
int drop_inode = 0;
- if (inode->i_nlink == 0)
- return -ENOENT;
-
/* do not allow sys_link's with other subvols of the same device */
if (root->objectid != BTRFS_I(inode)->root->objectid)
return -EPERM;
@@ -4809,10 +4818,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
/*
- * 1 item for inode ref
+ * 2 items for inode and inode ref
* 2 items for dir items
+ * 1 item for parent inode
*/
- trans = btrfs_start_transaction(root, 3);
+ trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto fail;
@@ -5176,6 +5186,8 @@ again:
em = NULL;
btrfs_release_path(root, path);
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return ERR_CAST(trans);
goto again;
}
map = kmap(page);
@@ -5266,6 +5278,128 @@ out:
return em;
}
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
+{
+ struct extent_map *em;
+ struct extent_map *hole_em = NULL;
+ u64 range_start = start;
+ u64 end;
+ u64 found;
+ u64 found_end;
+ int err = 0;
+
+ em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+ if (IS_ERR(em))
+ return em;
+ if (em) {
+ /*
+ * if our em maps to a hole, there might
+ * actually be delalloc bytes behind it
+ */
+ if (em->block_start != EXTENT_MAP_HOLE)
+ return em;
+ else
+ hole_em = em;
+ }
+
+ /* check to see if we've wrapped (len == -1 or similar) */
+ end = start + len;
+ if (end < start)
+ end = (u64)-1;
+ else
+ end -= 1;
+
+ em = NULL;
+
+ /* ok, we didn't find anything, lets look for delalloc */
+ found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+ end, len, EXTENT_DELALLOC, 1);
+ found_end = range_start + found;
+ if (found_end < range_start)
+ found_end = (u64)-1;
+
+ /*
+ * we didn't find anything useful, return
+ * the original results from get_extent()
+ */
+ if (range_start > end || found_end <= start) {
+ em = hole_em;
+ hole_em = NULL;
+ goto out;
+ }
+
+ /* adjust the range_start to make sure it doesn't
+ * go backwards from the start they passed in
+ */
+ range_start = max(start,range_start);
+ found = found_end - range_start;
+
+ if (found > 0) {
+ u64 hole_start = start;
+ u64 hole_len = len;
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /*
+ * when btrfs_get_extent can't find anything it
+ * returns one huge hole
+ *
+ * make sure what it found really fits our range, and
+ * adjust to make sure it is based on the start from
+ * the caller
+ */
+ if (hole_em) {
+ u64 calc_end = extent_map_end(hole_em);
+
+ if (calc_end <= start || (hole_em->start > end)) {
+ free_extent_map(hole_em);
+ hole_em = NULL;
+ } else {
+ hole_start = max(hole_em->start, start);
+ hole_len = calc_end - hole_start;
+ }
+ }
+ em->bdev = NULL;
+ if (hole_em && range_start > hole_start) {
+ /* our hole starts before our delalloc, so we
+ * have to return just the parts of the hole
+ * that go until the delalloc starts
+ */
+ em->len = min(hole_len,
+ range_start - hole_start);
+ em->start = hole_start;
+ em->orig_start = hole_start;
+ /*
+ * don't adjust block start at all,
+ * it is fixed at EXTENT_MAP_HOLE
+ */
+ em->block_start = hole_em->block_start;
+ em->block_len = hole_len;
+ } else {
+ em->start = range_start;
+ em->len = found;
+ em->orig_start = range_start;
+ em->block_start = EXTENT_MAP_DELALLOC;
+ em->block_len = found;
+ }
+ } else if (hole_em) {
+ return hole_em;
+ }
+out:
+
+ free_extent_map(hole_em);
+ if (err) {
+ free_extent_map(em);
+ return ERR_PTR(err);
+ }
+ return em;
+}
+
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
@@ -5280,8 +5414,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
trans = btrfs_join_transaction(root, 0);
- if (!trans)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(trans))
+ return ERR_CAST(trans);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -5505,7 +5639,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
* while we look for nocow cross refs
*/
trans = btrfs_join_transaction(root, 0);
- if (!trans)
+ if (IS_ERR(trans))
goto must_cow;
if (can_nocow_odirect(trans, inode, start, len) == 1) {
@@ -5640,7 +5774,7 @@ again:
BUG_ON(!ordered);
trans = btrfs_join_transaction(root, 1);
- if (!trans) {
+ if (IS_ERR(trans)) {
err = -ENOMEM;
goto out;
}
@@ -5920,6 +6054,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
if (!skip_sum) {
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
if (!dip->csums) {
+ kfree(dip);
ret = -ENOMEM;
goto free_ordered;
}
@@ -6088,7 +6223,7 @@ out:
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
- return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+ return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a506a22..5fdb2ab 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -203,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
@@ -907,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_unlock;
+ }
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
} else {
@@ -1067,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
- if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+ if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
return -EINVAL;
if (flags & ~BTRFS_SUBVOL_RDONLY)
return -EOPNOTSUPP;
+ if (!is_owner_or_cap(inode))
+ return -EACCES;
+
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
@@ -1093,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_reset;
}
- ret = btrfs_update_root(trans, root,
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
btrfs_commit_transaction(trans, root);
@@ -1898,7 +1905,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
memcpy(&new_key, &key, sizeof(new_key));
new_key.objectid = inode->i_ino;
- new_key.offset = key.offset + destoff - off;
+ if (off <= key.offset)
+ new_key.offset = key.offset + destoff - off;
+ else
+ new_key.offset = destoff;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
@@ -2082,7 +2092,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
ret = -ENOMEM;
trans = btrfs_start_ioctl_transaction(root, 0);
- if (!trans)
+ if (IS_ERR(trans))
goto out_drop;
file->private_data = trans;
@@ -2138,9 +2148,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
path->leave_spinning = 1;
trans = btrfs_start_transaction(root, 1);
- if (!trans) {
+ if (IS_ERR(trans)) {
btrfs_free_path(path);
- return -ENOMEM;
+ return PTR_ERR(trans);
}
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
@@ -2201,7 +2211,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
int num_types = 4;
int alloc_size;
int ret = 0;
- int slot_count = 0;
+ u64 slot_count = 0;
int i, c;
if (copy_from_user(&space_args,
@@ -2240,7 +2250,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
goto out;
}
- slot_count = min_t(int, space_args.space_slots, slot_count);
+ slot_count = min_t(u64, space_args.space_slots, slot_count);
alloc_size = sizeof(*dest) * slot_count;
@@ -2260,6 +2270,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
for (i = 0; i < num_types; i++) {
struct btrfs_space_info *tmp;
+ if (!slot_count)
+ break;
+
info = NULL;
rcu_read_lock();
list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2281,7 +2294,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
memcpy(dest, &space, sizeof(space));
dest++;
space_args.total_spaces++;
+ slot_count--;
}
+ if (!slot_count)
+ break;
}
up_read(&info->groups_sem);
}
@@ -2334,6 +2350,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
u64 transid;
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
transid = trans->transid;
btrfs_commit_transaction_async(trans, root, 0);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450..a178f5e 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
unsigned long tot_out;
unsigned long tot_len;
char *buf;
+ bool may_late_unmap, need_unmap;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
tot_in += in_len;
working_bytes = in_len;
+ may_late_unmap = need_unmap = false;
/* fast path: avoid using the working buffer */
if (in_page_bytes_left >= in_len) {
buf = data_in + in_offset;
bytes = in_len;
+ may_late_unmap = true;
goto cont;
}
@@ -329,14 +332,17 @@ cont:
if (working_bytes == 0 && tot_in >= tot_len)
break;
- kunmap(pages_in[page_in_index]);
- page_in_index++;
- if (page_in_index >= total_pages_in) {
+ if (page_in_index + 1 >= total_pages_in) {
ret = -1;
- data_in = NULL;
goto done;
}
- data_in = kmap(pages_in[page_in_index]);
+
+ if (may_late_unmap)
+ need_unmap = true;
+ else
+ kunmap(pages_in[page_in_index]);
+
+ data_in = kmap(pages_in[++page_in_index]);
in_page_bytes_left = PAGE_CACHE_SIZE;
in_offset = 0;
@@ -346,6 +352,8 @@ cont:
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
+ if (need_unmap)
+ kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "btrfs decompress failed\n");
ret = -1;
@@ -363,8 +371,7 @@ cont:
break;
}
done:
- if (data_in)
- kunmap(pages_in[page_in_index]);
+ kunmap(pages_in[page_in_index]);
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2b61e1d..083a554 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
u64 file_offset)
{
struct rb_root *root = &tree->tree;
- struct rb_node *prev;
+ struct rb_node *prev = NULL;
struct rb_node *ret;
struct btrfs_ordered_extent *entry;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0d126be..fb2605d 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
#else
BUG();
#endif
+ break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 045c9c2..31ade58 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
new_node->bytenr = dest->node->start;
new_node->level = node->level;
new_node->lowest = node->lowest;
+ new_node->checked = 1;
new_node->root = dest;
if (!node->lowest) {
@@ -2028,6 +2029,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
while (1) {
trans = btrfs_start_transaction(root, 0);
+ BUG_ON(IS_ERR(trans));
trans->block_rsv = rc->block_rsv;
ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
@@ -2147,6 +2149,12 @@ again:
}
trans = btrfs_join_transaction(rc->extent_root, 1);
+ if (IS_ERR(trans)) {
+ if (!err)
+ btrfs_block_rsv_release(rc->extent_root,
+ rc->block_rsv, num_bytes);
+ return PTR_ERR(trans);
+ }
if (!err) {
if (num_bytes != rc->merging_rsv_size) {
@@ -3222,6 +3230,7 @@ truncate:
trans = btrfs_join_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_free_path(path);
+ ret = PTR_ERR(trans);
goto out;
}
@@ -3628,6 +3637,7 @@ int prepare_to_relocate(struct reloc_control *rc)
set_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root, 1);
+ BUG_ON(IS_ERR(trans));
btrfs_commit_transaction(trans, rc->extent_root);
return 0;
}
@@ -3644,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
u32 item_size;
int ret;
int err = 0;
+ int progress = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3656,8 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
while (1) {
+ progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
-
+ BUG_ON(IS_ERR(trans));
+restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
continue;
@@ -3770,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
}
}
+ if (trans && progress && err == -ENOSPC) {
+ ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+ rc->block_group->flags);
+ if (ret == 0) {
+ err = 0;
+ progress = 0;
+ goto restart;
+ }
+ }
btrfs_release_path(rc->extent_root, path);
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
@@ -3804,7 +3826,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
/* get rid of pinned extents */
trans = btrfs_join_transaction(rc->extent_root, 1);
- btrfs_commit_transaction(trans, rc->extent_root);
+ if (IS_ERR(trans))
+ err = PTR_ERR(trans);
+ else
+ btrfs_commit_transaction(trans, rc->extent_root);
out_free:
btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
btrfs_free_path(path);
@@ -4022,6 +4047,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
int ret;
trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
+ BUG_ON(IS_ERR(trans));
memset(&root->root_item.drop_progress, 0,
sizeof(root->root_item.drop_progress));
@@ -4125,6 +4151,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
set_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root, 1);
+ if (IS_ERR(trans)) {
+ unset_reloc_control(rc);
+ err = PTR_ERR(trans);
+ goto out_free;
+ }
rc->merge_reloc_tree = 1;
@@ -4154,9 +4185,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)
unset_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root, 1);
- btrfs_commit_transaction(trans, rc->extent_root);
-out:
+ if (IS_ERR(trans))
+ err = PTR_ERR(trans);
+ else
+ btrfs_commit_transaction(trans, rc->extent_root);
+out_free:
kfree(rc);
+out:
while (!list_empty(&reloc_roots)) {
reloc_root = list_entry(reloc_roots.next,
struct btrfs_root, root_list);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b2130c4..d39a989 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
- Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+ Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+ Opt_enospc_debug, Opt_err,
};
static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
{Opt_space_cache, "space_cache"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+ {Opt_enospc_debug, "enospc_debug"},
{Opt_err, NULL},
};
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_user_subvol_rm_allowed:
btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
break;
+ case Opt_enospc_debug:
+ btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -383,7 +388,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
struct btrfs_fs_devices **fs_devices)
{
substring_t args[MAX_OPT_ARGS];
- char *opts, *p;
+ char *opts, *orig, *p;
int error = 0;
int intarg;
@@ -397,6 +402,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
return -ENOMEM;
+ orig = opts;
while ((p = strsep(&opts, ",")) != NULL) {
int token;
@@ -432,7 +438,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
}
out_free_opts:
- kfree(opts);
+ kfree(orig);
out:
/*
* If no subvolume name is specified we use the default one. Allocate
@@ -623,6 +629,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
return ret;
}
@@ -761,6 +769,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
}
btrfs_close_devices(fs_devices);
+ kfree(fs_info);
+ kfree(tree_root);
} else {
char b[BDEVNAME_SIZE];
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bae5c7b..3d73c8d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1161,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
INIT_DELAYED_WORK(&ac->work, do_async_commit);
ac->root = root;
ac->newtrans = btrfs_join_transaction(root, 0);
+ if (IS_ERR(ac->newtrans)) {
+ int err = PTR_ERR(ac->newtrans);
+ kfree(ac);
+ return err;
+ }
/* take transaction reference */
mutex_lock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 054744a..a4bbb85 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
}
dst_copy = kmalloc(item_size, GFP_NOFS);
src_copy = kmalloc(item_size, GFP_NOFS);
+ if (!dst_copy || !src_copy) {
+ btrfs_release_path(root, path);
+ kfree(dst_copy);
+ kfree(src_copy);
+ return -ENOMEM;
+ }
read_extent_buffer(eb, src_copy, src_ptr, item_size);
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(leaf, di, &location);
name_len = btrfs_dir_name_len(leaf, di);
name = kmalloc(name_len, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
btrfs_release_path(root, path);
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
int match = 0;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
if (ret != 0)
goto out;
@@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
key.offset = (u64)-1;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
log_type = btrfs_dir_type(eb, di);
read_extent_buffer(eb, name, (unsigned long)(di + 1),
name_len);
@@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ if (!next)
+ return -ENOMEM;
if (*level == 1) {
wc->process_func(root, next, wc, ptr_gen);
@@ -2032,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
mutex_unlock(&log_root_tree->log_mutex);
+ ret = 0;
goto out;
}
atomic_set(&log_root_tree->log_commit[index2], 1);
@@ -2096,7 +2116,7 @@ out:
smp_mb();
if (waitqueue_active(&root->log_commit_wait[index1]))
wake_up(&root->log_commit_wait[index1]);
- return 0;
+ return ret;
}
static void free_log_tree(struct btrfs_trans_handle *trans,
@@ -2194,6 +2214,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
log = root->log_root;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
name, name_len, -1);
if (IS_ERR(di)) {
@@ -2594,6 +2617,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
+ if (!ins_data)
+ return -ENOMEM;
+
ins_sizes = (u32 *)ins_data;
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
@@ -2725,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
log = root->log_root;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
dst_path = btrfs_alloc_path();
+ if (!dst_path) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
min_key.objectid = inode->i_ino;
min_key.type = BTRFS_INODE_ITEM_KEY;
@@ -3080,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
BUG_ON(!path);
trans = btrfs_start_transaction(fs_info->tree_root, 0);
+ BUG_ON(IS_ERR(trans));
wc.trans = trans;
wc.pin = 1;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d158530..dd13eb8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1213,6 +1213,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
return -ENOMEM;
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
@@ -1334,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = btrfs_shrink_device(device, 0);
if (ret)
- goto error_brelse;
+ goto error_undo;
ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
if (ret)
- goto error_brelse;
+ goto error_undo;
device->in_fs_metadata = 0;
@@ -1412,6 +1416,13 @@ out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
return ret;
+error_undo:
+ if (device->writeable) {
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
+ root->fs_info->fs_devices->rw_devices++;
+ }
+ goto error_brelse;
}
/*
@@ -1601,11 +1612,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = find_next_devid(root, &device->devid);
if (ret) {
+ kfree(device->name);
kfree(device);
goto error;
}
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ kfree(device->name);
+ kfree(device);
+ ret = PTR_ERR(trans);
+ goto error;
+ }
+
lock_chunks(root);
device->writeable = 1;
@@ -1621,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
- device->mode = 0;
+ device->mode = FMODE_EXCL;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
@@ -1873,7 +1892,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
return ret;
trans = btrfs_start_transaction(root, 0);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
lock_chunks(root);
@@ -2047,7 +2066,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
BUG_ON(ret);
trans = btrfs_start_transaction(dev_root, 0);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_grow_device(trans, device, old_size);
BUG_ON(ret);
@@ -2213,6 +2232,11 @@ again:
/* Shrinking succeeded, else we would be at "done". */
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto done;
+ }
+
lock_chunks(root);
device->disk_total_bytes = new_size;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 60d27bc..6b61ded 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1560,9 +1560,10 @@ retry_locked:
/* NOTE: no side-effects allowed, until we take s_mutex */
revoking = cap->implemented & ~cap->issued;
- if (revoking)
- dout(" mds%d revoking %s\n", cap->mds,
- ceph_cap_string(revoking));
+ dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
+ cap->mds, cap, ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->implemented),
+ ceph_cap_string(revoking));
if (cap == ci->i_auth_cap &&
(cap->issued & CEPH_CAP_FILE_WR)) {
@@ -1658,6 +1659,8 @@ ack:
if (cap == ci->i_auth_cap && ci->i_dirty_caps)
flushing = __mark_caps_flushing(inode, session);
+ else
+ flushing = 0;
mds = cap->mds; /* remember mds, so we don't repeat */
sent++;
@@ -1940,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
}
}
+static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_cap *cap;
+ int delayed = 0;
+
+ spin_lock(&inode->i_lock);
+ cap = ci->i_auth_cap;
+ dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
+ ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+ __ceph_flush_snaps(ci, &session, 1);
+ if (ci->i_flushing_caps) {
+ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+ __ceph_caps_used(ci),
+ __ceph_caps_wanted(ci),
+ cap->issued | cap->implemented,
+ ci->i_flushing_caps, NULL);
+ if (delayed) {
+ spin_lock(&inode->i_lock);
+ __cap_delay_requeue(mdsc, ci);
+ spin_unlock(&inode->i_lock);
+ }
+ } else {
+ spin_unlock(&inode->i_lock);
+ }
+}
+
/*
* Take references to capabilities we hold, so that we don't release
@@ -2687,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
ceph_add_cap(inode, session, cap_id, -1,
issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
NULL /* no caps context */);
- try_flush_caps(inode, session, NULL);
+ kick_flushing_inode_caps(mdsc, session, inode);
up_read(&mdsc->snap_rwsem);
/* make sure we re-request max_size, if necessary */
@@ -2785,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_IMPORT:
handle_cap_import(mdsc, inode, h, session,
snaptrace, snaptrace_len);
- ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
- session);
+ ceph_check_caps(ceph_inode(inode), 0, session);
goto done_unlocked;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de..ebafa65 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -409,7 +409,7 @@ more:
spin_lock(&inode->i_lock);
if (ci->i_release_count == fi->dir_release_count) {
dout(" marking %p complete\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = filp->f_pos;
}
spin_unlock(&inode->i_lock);
@@ -496,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
/* .snap dir? */
if (err == -ENOENT &&
+ ceph_snap(parent) == CEPH_NOSNAP &&
strcmp(dentry->d_name.name,
fsc->mount_options->snapdir_name) == 0) {
struct inode *inode = ceph_get_snapdir(parent);
@@ -992,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
@@ -1029,28 +1030,8 @@ out_touch:
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *parent_inode = NULL;
- u64 snapid = CEPH_NOSNAP;
- if (!IS_ROOT(dentry)) {
- parent_inode = dentry->d_parent->d_inode;
- if (parent_inode)
- snapid = ceph_snap(parent_inode);
- }
- dout("dentry_release %p parent %p\n", dentry, parent_inode);
- if (parent_inode && snapid != CEPH_SNAPDIR) {
- struct ceph_inode_info *ci = ceph_inode(parent_inode);
-
- spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen ||
- snapid <= CEPH_MAXSNAP) {
- dout(" clearing %p complete (d_release)\n",
- parent_inode);
- ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
- ci->i_release_count++;
- }
- spin_unlock(&parent_inode->i_lock);
- }
+ dout("dentry_release %p\n", dentry);
if (di) {
ceph_dentry_lru_del(dentry);
if (di->lease_session)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e835eff..193bfa5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,13 +707,9 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = 2;
}
-
- /* it may be better to set st_size in getattr instead? */
- if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
- inode->i_size = ci->i_rbytes;
break;
default:
pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@@ -1819,7 +1815,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
else
stat->dev = 0;
if (S_ISDIR(inode->i_mode)) {
- stat->size = ci->i_rbytes;
+ if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
+ RBYTES))
+ stat->size = ci->i_rbytes;
+ else
+ stat->size = ci->i_files + ci->i_subdirs;
stat->blocks = 0;
stat->blksize = 65536;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1e30d19..a1ee8fa 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
dout("choose_mds %p %llx.%llx "
"frag %u mds%d (%d/%d)\n",
inode, ceph_vinop(inode),
- frag.frag, frag.mds,
+ frag.frag, mds,
(int)r, frag.ndist);
- return mds;
+ if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+ CEPH_MDS_STATE_ACTIVE)
+ return mds;
}
/* since this file/dir wasn't known to be
@@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
dout("choose_mds %p %llx.%llx "
"frag %u mds%d (auth)\n",
inode, ceph_vinop(inode), frag.frag, mds);
- return mds;
+ if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+ CEPH_MDS_STATE_ACTIVE)
+ return mds;
}
}
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243a..f40b913 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
if (lastinode)
iput(lastinode);
- dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
- list_for_each_entry(child, &realm->children, child_item)
- queue_realm_cap_snaps(child);
+ list_for_each_entry(child, &realm->children, child_item) {
+ dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
+ realm, realm->ino, child, child->ino);
+ list_del_init(&child->dirty_item);
+ list_add(&child->dirty_item, &realm->dirty_item);
+ }
+ list_del_init(&realm->dirty_item);
dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
}
@@ -683,7 +687,9 @@ more:
* queue cap snaps _after_ we've built the new snap contexts,
* so that i_head_snapc can be set appropriately.
*/
- list_for_each_entry(realm, &dirty_realms, dirty_item) {
+ while (!list_empty(&dirty_realms)) {
+ realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
+ dirty_item);
queue_realm_cap_snaps(realm);
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bf6f0f3..9c50854 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+ fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
+ fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 6e12a6b..8c9eba6 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
struct rb_node **p;
struct rb_node *parent = NULL;
struct ceph_inode_xattr *xattr = NULL;
+ int name_len = strlen(name);
int c;
p = &ci->i_xattrs.index.rb_node;
@@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
parent = *p;
xattr = rb_entry(parent, struct ceph_inode_xattr, node);
c = strncmp(name, xattr->name, xattr->name_len);
+ if (c == 0 && name_len > xattr->name_len)
+ c = 1;
if (c < 0)
p = &(*p)->rb_left;
else if (c > 0)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index ee45648..7cb0f7f 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -3,6 +3,7 @@ config CIFS
depends on INET
select NLS
select CRYPTO
+ select CRYPTO_MD4
select CRYPTO_MD5
select CRYPTO_HMAC
select CRYPTO_ARC4
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 43b19dd..d875584 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
- md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
+ cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
readdir.o ioctl.o sess.o export.o
cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 46af99a..fe16835 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -452,6 +452,11 @@ A partial list of the supported mount options follows:
if oplock (caching token) is granted and held. Note that
direct allows write operations larger than page size
to be sent to the server.
+ strictcache Use for switching on strict cache mode. In this mode the
+ client read from the cache all the time it has Oplock Level II,
+ otherwise - read from the server. All written data are stored
+ in the cache, but if the client doesn't have Exclusive Oplock,
+ it writes the data to the server.
acl Allow setfacl and getfacl to manage posix ACLs if server
supports them. (default)
noacl Do not allow setfacl and getfacl calls on this mount
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7ed3653..0a265ad 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -282,8 +282,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
cFYI(1, "in %s", __func__);
BUG_ON(IS_ROOT(mntpt));
- xid = GetXid();
-
/*
* The MSDFS spec states that paths in DFS referral requests and
* responses must be prefixed by a single '\' character instead of
@@ -293,20 +291,21 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
mnt = ERR_PTR(-ENOMEM);
full_path = build_path_from_dentry(mntpt);
if (full_path == NULL)
- goto free_xid;
+ goto cdda_exit;
cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
tlink = cifs_sb_tlink(cifs_sb);
- mnt = ERR_PTR(-EINVAL);
if (IS_ERR(tlink)) {
mnt = ERR_CAST(tlink);
goto free_full_path;
}
ses = tlink_tcon(tlink)->ses;
+ xid = GetXid();
rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
&num_referrals, &referrals,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ FreeXid(xid);
cifs_put_tlink(tlink);
@@ -339,8 +338,7 @@ success:
free_dfs_info_array(referrals, num_referrals);
free_full_path:
kfree(full_path);
-free_xid:
- FreeXid(xid);
+cdda_exit:
cFYI(1, "leaving %s" , __func__);
return mnt;
}
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 7852cd6..ac51cd2 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -40,6 +40,7 @@
#define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */
#define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */
#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
+#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */
struct cifs_sb_info {
struct rb_root tlink_tree;
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 430f510..fc0fd4f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -44,10 +44,14 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes,
int charlen, outlen = 0;
int maxwords = maxbytes / 2;
char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp;
- for (i = 0; i < maxwords && from[i]; i++) {
- charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp,
- NLS_MAX_CHARSET_SIZE);
+ for (i = 0; i < maxwords; i++) {
+ ftmp = get_unaligned_le16(&from[i]);
+ if (ftmp == 0)
+ break;
+
+ charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
if (charlen > 0)
outlen += charlen;
else
@@ -58,9 +62,9 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes,
}
/*
- * cifs_mapchar - convert a little-endian char to proper char in codepage
+ * cifs_mapchar - convert a host-endian char to proper char in codepage
* @target - where converted character should be copied
- * @src_char - 2 byte little-endian source character
+ * @src_char - 2 byte host-endian source character
* @cp - codepage to which character should be converted
* @mapchar - should character be mapped according to mapchars mount option?
*
@@ -69,7 +73,7 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes,
* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
*/
static int
-cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
bool mapchar)
{
int len = 1;
@@ -82,7 +86,7 @@ cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp,
* build_path_from_dentry are modified, as they use slash as
* separator.
*/
- switch (le16_to_cpu(src_char)) {
+ switch (src_char) {
case UNI_COLON:
*target = ':';
break;
@@ -109,8 +113,7 @@ out:
return len;
cp_convert:
- len = cp->uni2char(le16_to_cpu(src_char), target,
- NLS_MAX_CHARSET_SIZE);
+ len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
if (len <= 0) {
*target = '?';
len = 1;
@@ -149,6 +152,7 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
int nullsize = nls_nullsize(codepage);
int fromwords = fromlen / 2;
char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp;
/*
* because the chars can be of varying widths, we need to take care
@@ -158,19 +162,23 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
*/
safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
- for (i = 0; i < fromwords && from[i]; i++) {
+ for (i = 0; i < fromwords; i++) {
+ ftmp = get_unaligned_le16(&from[i]);
+ if (ftmp == 0)
+ break;
+
/*
* check to see if converting this character might make the
* conversion bleed into the null terminator
*/
if (outlen >= safelen) {
- charlen = cifs_mapchar(tmp, from[i], codepage, mapchar);
+ charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
if ((outlen + charlen) > (tolen - nullsize))
break;
}
/* put converted char into 'to' buffer */
- charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar);
+ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
outlen += charlen;
}
@@ -193,24 +201,21 @@ cifs_strtoUCS(__le16 *to, const char *from, int len,
{
int charlen;
int i;
- wchar_t *wchar_to = (wchar_t *)to; /* needed to quiet sparse */
+ wchar_t wchar_to; /* needed to quiet sparse */
for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
-
- /* works for 2.4.0 kernel or later */
- charlen = codepage->char2uni(from, len, &wchar_to[i]);
+ charlen = codepage->char2uni(from, len, &wchar_to);
if (charlen < 1) {
- cERROR(1, "strtoUCS: char2uni of %d returned %d",
- (int)*from, charlen);
+ cERROR(1, "strtoUCS: char2uni of 0x%x returned %d",
+ *from, charlen);
/* A question mark */
- to[i] = cpu_to_le16(0x003f);
+ wchar_to = 0x003f;
charlen = 1;
- } else
- to[i] = cpu_to_le16(wchar_to[i]);
-
+ }
+ put_unaligned_le16(wchar_to, &to[i]);
}
- to[i] = 0;
+ put_unaligned_le16(0, &to[i]);
return i;
}
@@ -252,3 +257,79 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
return dst;
}
+/*
+ * Convert 16 bit Unicode pathname to wire format from string in current code
+ * page. Conversion may involve remapping up the six characters that are
+ * only legal in POSIX-like OS (if they are present in the string). Path
+ * names are little endian 16 bit Unicode on the wire
+ */
+int
+cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
+ const struct nls_table *cp, int mapChars)
+{
+ int i, j, charlen;
+ int len_remaining = maxlen;
+ char src_char;
+ __u16 temp;
+
+ if (!mapChars)
+ return cifs_strtoUCS(target, source, PATH_MAX, cp);
+
+ for (i = 0, j = 0; i < maxlen; j++) {
+ src_char = source[i];
+ switch (src_char) {
+ case 0:
+ put_unaligned_le16(0, &target[j]);
+ goto ctoUCS_out;
+ case ':':
+ temp = UNI_COLON;
+ break;
+ case '*':
+ temp = UNI_ASTERIK;
+ break;
+ case '?':
+ temp = UNI_QUESTION;
+ break;
+ case '<':
+ temp = UNI_LESSTHAN;
+ break;
+ case '>':
+ temp = UNI_GRTRTHAN;
+ break;
+ case '|':
+ temp = UNI_PIPE;
+ break;
+ /*
+ * FIXME: We can not handle remapping backslash (UNI_SLASH)
+ * until all the calls to build_path_from_dentry are modified,
+ * as they use backslash as separator.
+ */
+ default:
+ charlen = cp->char2uni(source+i, len_remaining,
+ &temp);
+ /*
+ * if no match, use question mark, which at least in
+ * some cases serves as wild card
+ */
+ if (charlen < 1) {
+ temp = 0x003f;
+ charlen = 1;
+ }
+ len_remaining -= charlen;
+ /*
+ * character may take more than one byte in the source
+ * string, but will take exactly two bytes in the
+ * target string
+ */
+ i += charlen;
+ continue;
+ }
+ put_unaligned_le16(temp, &target[j]);
+ i++; /* move to next char in source string */
+ len_remaining--;
+ }
+
+ctoUCS_out:
+ return i;
+}
+
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1e7636b..beeebf1 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -372,6 +372,10 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
GFP_KERNEL);
+ if (!ppace) {
+ cERROR(1, "DACL memory allocation error");
+ return;
+ }
for (i = 0; i < num_aces; ++i) {
ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 66f3d50..a51585f 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -24,7 +24,6 @@
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifs_debug.h"
-#include "md5.h"
#include "cifs_unicode.h"
#include "cifsproto.h"
#include "ntlmssp.h"
@@ -37,11 +36,6 @@
/* Note that the smb header signature field on input contains the
sequence number before this function is called */
-extern void mdfour(unsigned char *out, unsigned char *in, int n);
-extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
-extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
- unsigned char *p24);
-
static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
struct TCP_Server_Info *server, char *signature)
{
@@ -234,6 +228,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
/* first calculate 24 bytes ntlm response and then 16 byte session key */
int setup_ntlm_response(struct cifsSesInfo *ses)
{
+ int rc = 0;
unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
char temp_key[CIFS_SESS_KEY_SIZE];
@@ -247,13 +242,26 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
}
ses->auth_key.len = temp_len;
- SMBNTencrypt(ses->password, ses->server->cryptkey,
+ rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+ if (rc) {
+ cFYI(1, "%s Can't generate NTLM response, error: %d",
+ __func__, rc);
+ return rc;
+ }
- E_md4hash(ses->password, temp_key);
- mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
+ rc = E_md4hash(ses->password, temp_key);
+ if (rc) {
+ cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
+ return rc;
+ }
- return 0;
+ rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
+ if (rc)
+ cFYI(1, "%s Can't generate NTLM session key, error: %d",
+ __func__, rc);
+
+ return rc;
}
#ifdef CONFIG_CIFS_WEAK_PW_HASH
@@ -649,9 +657,10 @@ calc_seckey(struct cifsSesInfo *ses)
get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
- if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
+ if (IS_ERR(tfm_arc4)) {
+ rc = PTR_ERR(tfm_arc4);
cERROR(1, "could not allocate crypto API arc4\n");
- return PTR_ERR(tfm_arc4);
+ return rc;
}
desc.tfm = tfm_arc4;
@@ -700,14 +709,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
unsigned int size;
server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
- if (!server->secmech.hmacmd5 ||
- IS_ERR(server->secmech.hmacmd5)) {
+ if (IS_ERR(server->secmech.hmacmd5)) {
cERROR(1, "could not allocate crypto hmacmd5\n");
return PTR_ERR(server->secmech.hmacmd5);
}
server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
- if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
+ if (IS_ERR(server->secmech.md5)) {
cERROR(1, "could not allocate crypto md5\n");
rc = PTR_ERR(server->secmech.md5);
goto crypto_allocate_md5_fail;
diff --git a/fs/cifs/cifsencrypt.h b/fs/cifs/cifsencrypt.h
deleted file mode 100644
index 15d2ec0..0000000
--- a/fs/cifs/cifsencrypt.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * fs/cifs/cifsencrypt.h
- *
- * Copyright (c) International Business Machines Corp., 2005
- * Author(s): Steve French (sfrench@us.ibm.com)
- *
- * Externs for misc. small encryption routines
- * so we do not have to put them in cifsproto.h
- *
- * This library is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* md4.c */
-extern void mdfour(unsigned char *out, unsigned char *in, int n);
-/* smbdes.c */
-extern void E_P16(unsigned char *p14, unsigned char *p16);
-extern void E_P24(unsigned char *p21, const unsigned char *c8,
- unsigned char *p24);
-
-
-
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 99d777a..f297013 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -600,10 +600,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
ssize_t written;
+ int rc;
written = generic_file_aio_write(iocb, iov, nr_segs, pos);
- if (!CIFS_I(inode)->clientCanCacheAll)
- filemap_fdatawrite(inode->i_mapping);
+
+ if (CIFS_I(inode)->clientCanCacheAll)
+ return written;
+
+ rc = filemap_fdatawrite(inode->i_mapping);
+ if (rc)
+ cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode);
+
return written;
}
@@ -733,6 +740,25 @@ const struct file_operations cifs_file_ops = {
.setlease = cifs_setlease,
};
+const struct file_operations cifs_file_strict_ops = {
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = cifs_strict_readv,
+ .aio_write = cifs_strict_writev,
+ .open = cifs_open,
+ .release = cifs_close,
+ .lock = cifs_lock,
+ .fsync = cifs_strict_fsync,
+ .flush = cifs_flush,
+ .mmap = cifs_file_strict_mmap,
+ .splice_read = generic_file_splice_read,
+ .llseek = cifs_llseek,
+#ifdef CONFIG_CIFS_POSIX
+ .unlocked_ioctl = cifs_ioctl,
+#endif /* CONFIG_CIFS_POSIX */
+ .setlease = cifs_setlease,
+};
+
const struct file_operations cifs_file_direct_ops = {
/* no aio, no readv -
BB reevaluate whether they can be done with directio, no cache */
@@ -751,6 +777,7 @@ const struct file_operations cifs_file_direct_ops = {
.llseek = cifs_llseek,
.setlease = cifs_setlease,
};
+
const struct file_operations cifs_file_nobrl_ops = {
.read = do_sync_read,
.write = do_sync_write,
@@ -769,6 +796,24 @@ const struct file_operations cifs_file_nobrl_ops = {
.setlease = cifs_setlease,
};
+const struct file_operations cifs_file_strict_nobrl_ops = {
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = cifs_strict_readv,
+ .aio_write = cifs_strict_writev,
+ .open = cifs_open,
+ .release = cifs_close,
+ .fsync = cifs_strict_fsync,
+ .flush = cifs_flush,
+ .mmap = cifs_file_strict_mmap,
+ .splice_read = generic_file_splice_read,
+ .llseek = cifs_llseek,
+#ifdef CONFIG_CIFS_POSIX
+ .unlocked_ioctl = cifs_ioctl,
+#endif /* CONFIG_CIFS_POSIX */
+ .setlease = cifs_setlease,
+};
+
const struct file_operations cifs_file_direct_nobrl_ops = {
/* no mmap, no aio, no readv -
BB reevaluate whether they can be done with directio, no cache */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4739a53..a9371b6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -61,6 +61,7 @@ extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
struct dentry *);
extern int cifs_revalidate_file(struct file *filp);
extern int cifs_revalidate_dentry(struct dentry *);
+extern void cifs_invalidate_mapping(struct inode *inode);
extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int cifs_setattr(struct dentry *, struct iattr *);
@@ -72,19 +73,27 @@ extern const struct inode_operations cifs_dfs_referral_inode_operations;
/* Functions related to files and directories */
extern const struct file_operations cifs_file_ops;
extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */
-extern const struct file_operations cifs_file_nobrl_ops;
-extern const struct file_operations cifs_file_direct_nobrl_ops; /* no brlocks */
+extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */
+extern const struct file_operations cifs_file_nobrl_ops; /* no brlocks */
+extern const struct file_operations cifs_file_direct_nobrl_ops;
+extern const struct file_operations cifs_file_strict_nobrl_ops;
extern int cifs_open(struct inode *inode, struct file *file);
extern int cifs_close(struct inode *inode, struct file *file);
extern int cifs_closedir(struct inode *inode, struct file *file);
extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
- size_t read_size, loff_t *poffset);
+ size_t read_size, loff_t *poffset);
+extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
- size_t write_size, loff_t *poffset);
+ size_t write_size, loff_t *poffset);
+extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
extern int cifs_lock(struct file *, int, struct file_lock *);
extern int cifs_fsync(struct file *, int);
+extern int cifs_strict_fsync(struct file *, int);
extern int cifs_flush(struct file *, fl_owner_t id);
extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
+extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *);
extern const struct file_operations cifs_dir_ops;
extern int cifs_dir_open(struct inode *inode, struct file *file);
extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
@@ -118,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.69"
+#define CIFS_VERSION "1.71"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 571132c..17afb0f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -161,46 +161,41 @@ struct TCP_Server_Info {
int srv_count; /* reference counter */
/* 15 character server name + 0x20 16th byte indicating type = srv */
char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
+ enum statusEnum tcpStatus; /* what we think the status is */
char *hostname; /* hostname portion of UNC string */
struct socket *ssocket;
struct sockaddr_storage dstaddr;
struct sockaddr_storage srcaddr; /* locally bind to this IP */
+#ifdef CONFIG_NET_NS
+ struct net *net;
+#endif
wait_queue_head_t response_q;
wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
struct list_head pending_mid_q;
- void *Server_NlsInfo; /* BB - placeholder for future NLS info */
- unsigned short server_codepage; /* codepage for the server */
- enum protocolEnum protocolType;
- char versionMajor;
- char versionMinor;
- bool svlocal:1; /* local server or remote */
bool noblocksnd; /* use blocking sendmsg */
bool noautotune; /* do not autotune send buf sizes */
bool tcp_nodelay;
atomic_t inFlight; /* number of requests on the wire to server */
-#ifdef CONFIG_CIFS_STATS2
- atomic_t inSend; /* requests trying to send */
- atomic_t num_waiters; /* blocked waiting to get in sendrecv */
-#endif
- enum statusEnum tcpStatus; /* what we think the status is */
struct mutex srv_mutex;
struct task_struct *tsk;
char server_GUID[16];
char secMode;
+ bool session_estab; /* mark when very first sess is established */
+ u16 dialect; /* dialect index that server chose */
enum securityEnum secType;
unsigned int maxReq; /* Clients should submit no more */
/* than maxReq distinct unanswered SMBs to the server when using */
/* multiplexed reads or writes */
unsigned int maxBuf; /* maxBuf specifies the maximum */
/* message size the server can send or receive for non-raw SMBs */
+ /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
+ /* when socket is setup (and during reconnect) before NegProt sent */
unsigned int max_rw; /* maxRw specifies the maximum */
/* message size the server can send or receive for */
/* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
unsigned int max_vcs; /* maximum number of smb sessions, at least
those that can be specified uniquely with
vcnumbers */
- char sessid[4]; /* unique token id for this session */
- /* (returned on Negotiate */
int capabilities; /* allow selective disabling of caps by smb sess */
int timeAdj; /* Adjust for difference in server time zone in sec */
__u16 CurrentMid; /* multiplex id - rotating counter */
@@ -210,21 +205,53 @@ struct TCP_Server_Info {
__u32 sequence_number; /* for signing, protected by srv_mutex */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
- u16 dialect; /* dialect index that server chose */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
/* extended security flavors that server supports */
+ bool sec_ntlmssp; /* supports NTLMSSP */
+ bool sec_kerberosu2u; /* supports U2U Kerberos */
bool sec_kerberos; /* supports plain Kerberos */
bool sec_mskerberos; /* supports legacy MS Kerberos */
- bool sec_kerberosu2u; /* supports U2U Kerberos */
- bool sec_ntlmssp; /* supports NTLMSSP */
- bool session_estab; /* mark when very first sess is established */
struct delayed_work echo; /* echo ping workqueue job */
#ifdef CONFIG_CIFS_FSCACHE
struct fscache_cookie *fscache; /* client index cache cookie */
#endif
+#ifdef CONFIG_CIFS_STATS2
+ atomic_t inSend; /* requests trying to send */
+ atomic_t num_waiters; /* blocked waiting to get in sendrecv */
+#endif
};
/*
+ * Macros to allow the TCP_Server_Info->net field and related code to drop out
+ * when CONFIG_NET_NS isn't set.
+ */
+
+#ifdef CONFIG_NET_NS
+
+static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
+{
+ return srv->net;
+}
+
+static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
+{
+ srv->net = net;
+}
+
+#else
+
+static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
+{
+ return &init_net;
+}
+
+static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
+{
+}
+
+#endif
+
+/*
* Session structure. One of these for each uid session with a particular host
*/
struct cifsSesInfo {
@@ -447,11 +474,11 @@ struct cifsInodeInfo {
/* BB add in lists for dirty pages i.e. write caching info for oplock */
struct list_head openFileList;
__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
- unsigned long time; /* jiffies of last update/check of inode */
- bool clientCanCacheRead:1; /* read oplock */
- bool clientCanCacheAll:1; /* read and writebehind oplock */
- bool delete_pending:1; /* DELETE_ON_CLOSE is set */
- bool invalid_mapping:1; /* pagecache is invalid */
+ bool clientCanCacheRead; /* read oplock */
+ bool clientCanCacheAll; /* read and writebehind oplock */
+ bool delete_pending; /* DELETE_ON_CLOSE is set */
+ bool invalid_mapping; /* pagecache is invalid */
+ unsigned long time; /* jiffies of last update of inode */
u64 server_eof; /* current file size on server */
u64 uniqueid; /* server inode number */
u64 createtime; /* creation time on server */
@@ -627,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
#define MID_REQUEST_SUBMITTED 2
#define MID_RESPONSE_RECEIVED 4
#define MID_RETRY_NEEDED 8 /* session closed while this request out */
-#define MID_NO_RESP_NEEDED 0x10
+#define MID_RESPONSE_MALFORMED 0x10
/* Types of response buffer returned from SendReceive2 */
#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index ea205b4..b5c8cc5 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -23,6 +23,7 @@
#define _CIFSPDU_H
#include <net/sock.h>
+#include <asm/unaligned.h>
#include "smbfsctl.h"
#ifdef CONFIG_CIFS_WEAK_PW_HASH
@@ -426,11 +427,49 @@ struct smb_hdr {
__u16 Mid;
__u8 WordCount;
} __attribute__((packed));
-/* given a pointer to an smb_hdr retrieve the value of byte count */
-#define BCC(smb_var) (*(__u16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount)))
-#define BCC_LE(smb_var) (*(__le16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount)))
+
+/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */
+#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \
+ (2 * (smb_var)->WordCount))
+
/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
-#define pByteArea(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount) + 2)
+#define pByteArea(smb_var) (BCC(smb_var) + 2)
+
+/* get the converted ByteCount for a SMB packet and return it */
+static inline __u16
+get_bcc(struct smb_hdr *hdr)
+{
+ __u16 *bc_ptr = (__u16 *)BCC(hdr);
+
+ return get_unaligned(bc_ptr);
+}
+
+/* get the unconverted ByteCount for a SMB packet and return it */
+static inline __u16
+get_bcc_le(struct smb_hdr *hdr)
+{
+ __le16 *bc_ptr = (__le16 *)BCC(hdr);
+
+ return get_unaligned_le16(bc_ptr);
+}
+
+/* set the ByteCount for a SMB packet in host-byte order */
+static inline void
+put_bcc(__u16 count, struct smb_hdr *hdr)
+{
+ __u16 *bc_ptr = (__u16 *)BCC(hdr);
+
+ put_unaligned(count, bc_ptr);
+}
+
+/* set the ByteCount for a SMB packet in little-endian */
+static inline void
+put_bcc_le(__u16 count, struct smb_hdr *hdr)
+{
+ __le16 *bc_ptr = (__le16 *)BCC(hdr);
+
+ put_unaligned_le16(count, bc_ptr);
+}
/*
* Computer Name Length (since Netbios name was length 16 with last byte 0x20)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 982895f..8096f27 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -85,6 +85,8 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
extern bool is_valid_oplock_break(struct smb_hdr *smb,
struct TCP_Server_Info *);
extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
+extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
+ unsigned int bytes_written);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
extern unsigned int smbCalcSize(struct smb_hdr *ptr);
@@ -373,7 +375,7 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
extern int cifs_verify_signature(struct smb_hdr *,
struct TCP_Server_Info *server,
__u32 expected_sequence_number);
-extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
+extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
extern int setup_ntlm_response(struct cifsSesInfo *);
extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
@@ -423,4 +425,11 @@ extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
const unsigned char *path,
struct cifs_sb_info *cifs_sb, int xid);
+extern int mdfour(unsigned char *, unsigned char *, int);
+extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
+extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
+ unsigned char *p24);
+extern void E_P16(unsigned char *p14, unsigned char *p16);
+extern void E_P24(unsigned char *p21, const unsigned char *c8,
+ unsigned char *p24);
#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3711345..904aa47 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
}
}
- if (ses->status == CifsExiting)
- return -EIO;
-
/*
* Give demultiplex thread up to 10 seconds to reconnect, should be
* greater than cifs socket timeout which is 7 seconds
@@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
* retrying until process is killed or server comes
* back on-line
*/
- if (!tcon->retry || ses->status == CifsExiting) {
+ if (!tcon->retry) {
cFYI(1, "gave up waiting on reconnect in smb_init");
return -EHOSTDOWN;
}
@@ -331,37 +328,35 @@ smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon,
static int validate_t2(struct smb_t2_rsp *pSMB)
{
- int rc = -EINVAL;
- int total_size;
- char *pBCC;
+ unsigned int total_size;
+
+ /* check for plausible wct */
+ if (pSMB->hdr.WordCount < 10)
+ goto vt2_err;
- /* check for plausible wct, bcc and t2 data and parm sizes */
/* check for parm and data offset going beyond end of smb */
- if (pSMB->hdr.WordCount >= 10) {
- if ((le16_to_cpu(pSMB->t2_rsp.ParameterOffset) <= 1024) &&
- (le16_to_cpu(pSMB->t2_rsp.DataOffset) <= 1024)) {
- /* check that bcc is at least as big as parms + data */
- /* check that bcc is less than negotiated smb buffer */
- total_size = le16_to_cpu(pSMB->t2_rsp.ParameterCount);
- if (total_size < 512) {
- total_size +=
- le16_to_cpu(pSMB->t2_rsp.DataCount);
- /* BCC le converted in SendReceive */
- pBCC = (pSMB->hdr.WordCount * 2) +
- sizeof(struct smb_hdr) +
- (char *)pSMB;
- if ((total_size <= (*(u16 *)pBCC)) &&
- (total_size <
- CIFSMaxBufSize+MAX_CIFS_HDR_SIZE)) {
- return 0;
- }
- }
- }
- }
+ if (get_unaligned_le16(&pSMB->t2_rsp.ParameterOffset) > 1024 ||
+ get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
+ goto vt2_err;
+
+ /* check that bcc is at least as big as parms + data */
+ /* check that bcc is less than negotiated smb buffer */
+ total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
+ if (total_size >= 512)
+ goto vt2_err;
+
+ total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
+ if (total_size > get_bcc(&pSMB->hdr) ||
+ total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
+ goto vt2_err;
+
+ return 0;
+vt2_err:
cifs_dump_mem("Invalid transact2 SMB: ", (char *)pSMB,
sizeof(struct smb_t2_rsp) + 16);
- return rc;
+ return -EINVAL;
}
+
int
CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
{
@@ -452,7 +447,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize),
(__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
- GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey);
/* even though we do not use raw we might as well set this
accurately, in case we ever find a need for it */
if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
@@ -566,7 +560,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
(__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
cFYI(DBG2, "Max buf = %d", ses->server->maxBuf);
- GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
server->capabilities = le32_to_cpu(pSMBr->Capabilities);
server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
server->timeAdj *= 60;
@@ -737,9 +730,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
/* set up echo request */
smb->hdr.Tid = cpu_to_le16(0xffff);
- smb->hdr.WordCount = cpu_to_le16(1);
- smb->EchoCount = cpu_to_le16(1);
- smb->ByteCount = cpu_to_le16(1);
+ smb->hdr.WordCount = 1;
+ put_unaligned_le16(1, &smb->EchoCount);
+ put_bcc_le(1, &smb->hdr);
smb->Data[0] = 'a';
smb->hdr.smb_buf_length += 3;
@@ -4918,7 +4911,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
__u16 fid, __u32 pid_of_opener, bool SetAllocation)
{
struct smb_com_transaction2_sfi_req *pSMB = NULL;
- char *data_offset;
struct file_end_of_file_info *parm_data;
int rc = 0;
__u16 params, param_offset, offset, byte_count, count;
@@ -4942,8 +4934,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
offset = param_offset + params;
- data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
-
count = sizeof(struct file_end_of_file_info);
pSMB->MaxParameterCount = cpu_to_le16(2);
/* BB find exact max SMB PDU from sess structure BB */
@@ -5611,7 +5601,7 @@ QAllEAsRetry:
}
/* make sure list_len doesn't go past end of SMB */
- end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr);
+ end_of_smb = (char *)pByteArea(&pSMBr->hdr) + get_bcc(&pSMBr->hdr);
if ((char *)ea_response_data + list_len > end_of_smb) {
cFYI(1, "EA list appears to go beyond SMB");
rc = -EIO;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8d46575..8d6c17a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -55,9 +55,6 @@
/* SMB echo "timeout" -- FIXME: tunable? */
#define SMB_ECHO_INTERVAL (60 * HZ)
-extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
- unsigned char *p24);
-
extern mempool_t *cifs_req_poolp;
struct smb_vol {
@@ -87,6 +84,7 @@ struct smb_vol {
bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
bool server_ino:1; /* use inode numbers from server ie UniqueId */
bool direct_io:1;
+ bool strict_io:1; /* strict cache behavior */
bool remap:1; /* set to remap seven reserved chars in filenames */
bool posix_paths:1; /* unset to not ask for posix pathnames. */
bool no_linux_ext:1;
@@ -232,9 +230,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
{
struct smb_t2_rsp *pSMBt;
- int total_data_size;
- int data_in_this_rsp;
int remaining;
+ __u16 total_data_size, data_in_this_rsp;
if (pSMB->Command != SMB_COM_TRANSACTION2)
return 0;
@@ -248,8 +245,8 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
pSMBt = (struct smb_t2_rsp *)pSMB;
- total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount);
- data_in_this_rsp = le16_to_cpu(pSMBt->t2_rsp.DataCount);
+ total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
+ data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
remaining = total_data_size - data_in_this_rsp;
@@ -275,21 +272,18 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
{
struct smb_t2_rsp *pSMB2 = (struct smb_t2_rsp *)psecond;
struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB;
- int total_data_size;
- int total_in_buf;
- int remaining;
- int total_in_buf2;
char *data_area_of_target;
char *data_area_of_buf2;
- __u16 byte_count;
+ int remaining;
+ __u16 byte_count, total_data_size, total_in_buf, total_in_buf2;
- total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount);
+ total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
- if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) {
+ if (total_data_size !=
+ get_unaligned_le16(&pSMB2->t2_rsp.TotalDataCount))
cFYI(1, "total data size of primary and secondary t2 differ");
- }
- total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount);
+ total_in_buf = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
remaining = total_data_size - total_in_buf;
@@ -299,28 +293,28 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
if (remaining == 0) /* nothing to do, ignore */
return 0;
- total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount);
+ total_in_buf2 = get_unaligned_le16(&pSMB2->t2_rsp.DataCount);
if (remaining < total_in_buf2) {
cFYI(1, "transact2 2nd response contains too much data");
}
/* find end of first SMB data area */
data_area_of_target = (char *)&pSMBt->hdr.Protocol +
- le16_to_cpu(pSMBt->t2_rsp.DataOffset);
+ get_unaligned_le16(&pSMBt->t2_rsp.DataOffset);
/* validate target area */
- data_area_of_buf2 = (char *) &pSMB2->hdr.Protocol +
- le16_to_cpu(pSMB2->t2_rsp.DataOffset);
+ data_area_of_buf2 = (char *)&pSMB2->hdr.Protocol +
+ get_unaligned_le16(&pSMB2->t2_rsp.DataOffset);
data_area_of_target += total_in_buf;
/* copy second buffer into end of first buffer */
memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
total_in_buf += total_in_buf2;
- pSMBt->t2_rsp.DataCount = cpu_to_le16(total_in_buf);
- byte_count = le16_to_cpu(BCC_LE(pTargetSMB));
+ put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
+ byte_count = get_bcc_le(pTargetSMB);
byte_count += total_in_buf2;
- BCC_LE(pTargetSMB) = cpu_to_le16(byte_count);
+ put_bcc_le(byte_count, pTargetSMB);
byte_count = pTargetSMB->smb_buf_length;
byte_count += total_in_buf2;
@@ -334,7 +328,6 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
return 0; /* we are done */
} else /* more responses to go */
return 1;
-
}
static void
@@ -344,8 +337,13 @@ cifs_echo_request(struct work_struct *work)
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
- /* no need to ping if we got a response recently */
- if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+ /*
+ * We cannot send an echo until the NEGOTIATE_PROTOCOL request is
+ * done, which is indicated by maxBuf != 0. Also, no need to ping if
+ * we got a response recently
+ */
+ if (server->maxBuf == 0 ||
+ time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
goto requeue_echo;
rc = CIFSSMBEcho(server);
@@ -585,14 +583,23 @@ incomplete_rcv:
else if (reconnect == 1)
continue;
- length += 4; /* account for rfc1002 hdr */
+ total_read += 4; /* account for rfc1002 hdr */
+ dump_smb(smb_buffer, total_read);
- dump_smb(smb_buffer, length);
- if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) {
- cifs_dump_mem("Bad SMB: ", smb_buffer, 48);
- continue;
- }
+ /*
+ * We know that we received enough to get to the MID as we
+ * checked the pdu_length earlier. Now check to see
+ * if the rest of the header is OK. We borrow the length
+ * var for the rest of the loop to avoid a new stack var.
+ *
+ * 48 bytes is enough to display the header and a little bit
+ * into the payload for debugging purposes.
+ */
+ length = checkSMB(smb_buffer, smb_buffer->Mid, total_read);
+ if (length != 0)
+ cifs_dump_mem("Bad SMB: ", smb_buffer,
+ min_t(unsigned int, total_read, 48));
mid_entry = NULL;
server->lstrp = jiffies;
@@ -604,7 +611,8 @@ incomplete_rcv:
if ((mid_entry->mid == smb_buffer->Mid) &&
(mid_entry->midState == MID_REQUEST_SUBMITTED) &&
(mid_entry->command == smb_buffer->Command)) {
- if (check2ndT2(smb_buffer,server->maxBuf) > 0) {
+ if (length == 0 &&
+ check2ndT2(smb_buffer, server->maxBuf) > 0) {
/* We have a multipart transact2 resp */
isMultiRsp = true;
if (mid_entry->resp_buf) {
@@ -639,12 +647,17 @@ incomplete_rcv:
mid_entry->resp_buf = smb_buffer;
mid_entry->largeBuf = isLargeBuf;
multi_t2_fnd:
- mid_entry->midState = MID_RESPONSE_RECEIVED;
- list_del_init(&mid_entry->qhead);
- mid_entry->callback(mid_entry);
+ if (length == 0)
+ mid_entry->midState =
+ MID_RESPONSE_RECEIVED;
+ else
+ mid_entry->midState =
+ MID_RESPONSE_MALFORMED;
#ifdef CONFIG_CIFS_STATS2
mid_entry->when_received = jiffies;
#endif
+ list_del_init(&mid_entry->qhead);
+ mid_entry->callback(mid_entry);
break;
}
mid_entry = NULL;
@@ -660,6 +673,9 @@ multi_t2_fnd:
else
smallbuf = NULL;
}
+ } else if (length != 0) {
+ /* response sanity checks failed */
+ continue;
} else if (!is_valid_oplock_break(smb_buffer, server) &&
!isMultiRsp) {
cERROR(1, "No task to wake, unknown frame received! "
@@ -1349,6 +1365,8 @@ cifs_parse_mount_options(char *options, const char *devname,
vol->direct_io = 1;
} else if (strnicmp(data, "forcedirectio", 13) == 0) {
vol->direct_io = 1;
+ } else if (strnicmp(data, "strictcache", 11) == 0) {
+ vol->strict_io = 1;
} else if (strnicmp(data, "noac", 4) == 0) {
printk(KERN_WARNING "CIFS: Mount option noac not "
"supported. Instead set "
@@ -1573,6 +1591,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+ if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
+ continue;
+
if (!match_address(server, addr,
(struct sockaddr *)&vol->srcaddr))
continue;
@@ -1603,6 +1624,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
return;
}
+ put_net(cifs_net_ns(server));
+
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -1677,6 +1700,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
goto out_err;
}
+ cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
tcp_ses->hostname = extract_hostname(volume_info->UNC);
if (IS_ERR(tcp_ses->hostname)) {
rc = PTR_ERR(tcp_ses->hostname);
@@ -1757,6 +1781,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
out_err_crypto_release:
cifs_crypto_shash_release(tcp_ses);
+ put_net(cifs_net_ns(tcp_ses));
+
out_err:
if (tcp_ses) {
if (!IS_ERR(tcp_ses->hostname))
@@ -2268,8 +2294,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
}
if (socket == NULL) {
- rc = sock_create_kern(sfamily, SOCK_STREAM,
- IPPROTO_TCP, &socket);
+ rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM,
+ IPPROTO_TCP, &socket, 1);
if (rc < 0) {
cERROR(1, "Error %d creating socket", rc);
server->ssocket = NULL;
@@ -2581,6 +2607,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
if (pvolume_info->multiuser)
cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
CIFS_MOUNT_NO_PERM);
+ if (pvolume_info->strict_io)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
if (pvolume_info->direct_io) {
cFYI(1, "mounting share using direct i/o");
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
@@ -2937,8 +2965,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
TCONX_RSP *pSMBr;
unsigned char *bcc_ptr;
int rc = 0;
- int length, bytes_left;
- __u16 count;
+ int length;
+ __u16 bytes_left, count;
if (ses == NULL)
return -EIO;
@@ -2982,7 +3010,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
bcc_ptr);
else
#endif /* CIFS_WEAK_PW_HASH */
- SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
+ rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
+ bcc_ptr);
bcc_ptr += CIFS_AUTH_RESP_SIZE;
if (ses->capabilities & CAP_UNICODE) {
@@ -3032,7 +3061,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
tcon->need_reconnect = false;
tcon->tid = smb_buffer_response->Tid;
bcc_ptr = pByteArea(smb_buffer_response);
- bytes_left = BCC(smb_buffer_response);
+ bytes_left = get_bcc(smb_buffer_response);
length = strnlen(bcc_ptr, bytes_left - 2);
if (smb_buffer->Flags2 & SMBFLG2_UNICODE)
is_unicode = true;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index bd2a028..e964b1c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -287,6 +287,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
struct inode *inode = cifs_file->dentry->d_inode;
struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsLockInfo *li, *tmp;
spin_lock(&cifs_file_list_lock);
@@ -302,6 +303,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
if (list_empty(&cifsi->openFileList)) {
cFYI(1, "closing last open instance for inode %p",
cifs_file->dentry->d_inode);
+
+ /* in strict cache mode we need invalidate mapping on the last
+ close because it may cause a error when we open this file
+ again and get at least level II oplock */
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
+ CIFS_I(inode)->invalid_mapping = true;
+
cifs_set_oplock_level(cifsi, 0);
}
spin_unlock(&cifs_file_list_lock);
@@ -338,7 +346,6 @@ int cifs_open(struct inode *inode, struct file *file)
struct cifsTconInfo *tcon;
struct tcon_link *tlink;
struct cifsFileInfo *pCifsFile = NULL;
- struct cifsInodeInfo *pCifsInode;
char *full_path = NULL;
bool posix_open_ok = false;
__u16 netfid;
@@ -353,8 +360,6 @@ int cifs_open(struct inode *inode, struct file *file)
}
tcon = tlink_tcon(tlink);
- pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
rc = -ENOMEM;
@@ -840,7 +845,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
}
/* update the file size (if needed) after a write */
-static void
+void
cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
unsigned int bytes_written)
{
@@ -1138,7 +1143,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
char *write_data;
int rc = -EFAULT;
int bytes_written = 0;
- struct cifs_sb_info *cifs_sb;
struct inode *inode;
struct cifsFileInfo *open_file;
@@ -1146,7 +1150,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
return -EFAULT;
inode = page->mapping->host;
- cifs_sb = CIFS_SB(inode->i_sb);
offset += (loff_t)from;
write_data = kmap(page);
@@ -1520,27 +1523,47 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
return rc;
}
-int cifs_fsync(struct file *file, int datasync)
+int cifs_strict_fsync(struct file *file, int datasync)
{
int xid;
int rc = 0;
struct cifsTconInfo *tcon;
struct cifsFileInfo *smbfile = file->private_data;
struct inode *inode = file->f_path.dentry->d_inode;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
xid = GetXid();
cFYI(1, "Sync file - name: %s datasync: 0x%x",
file->f_path.dentry->d_name.name, datasync);
- rc = filemap_write_and_wait(inode->i_mapping);
- if (rc == 0) {
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ if (!CIFS_I(inode)->clientCanCacheRead)
+ cifs_invalidate_mapping(inode);
- tcon = tlink_tcon(smbfile->tlink);
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
- rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
- }
+ tcon = tlink_tcon(smbfile->tlink);
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
+ rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
+
+ FreeXid(xid);
+ return rc;
+}
+
+int cifs_fsync(struct file *file, int datasync)
+{
+ int xid;
+ int rc = 0;
+ struct cifsTconInfo *tcon;
+ struct cifsFileInfo *smbfile = file->private_data;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+
+ xid = GetXid();
+
+ cFYI(1, "Sync file - name: %s datasync: 0x%x",
+ file->f_path.dentry->d_name.name, datasync);
+
+ tcon = tlink_tcon(smbfile->tlink);
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
+ rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
FreeXid(xid);
return rc;
@@ -1591,42 +1614,244 @@ int cifs_flush(struct file *file, fl_owner_t id)
return rc;
}
-ssize_t cifs_user_read(struct file *file, char __user *read_data,
- size_t read_size, loff_t *poffset)
+static int
+cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
{
- int rc = -EACCES;
+ int rc = 0;
+ unsigned long i;
+
+ for (i = 0; i < num_pages; i++) {
+ pages[i] = alloc_page(__GFP_HIGHMEM);
+ if (!pages[i]) {
+ /*
+ * save number of pages we have already allocated and
+ * return with ENOMEM error
+ */
+ num_pages = i;
+ rc = -ENOMEM;
+ goto error;
+ }
+ }
+
+ return rc;
+
+error:
+ for (i = 0; i < num_pages; i++)
+ put_page(pages[i]);
+ return rc;
+}
+
+static inline
+size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
+{
+ size_t num_pages;
+ size_t clen;
+
+ clen = min_t(const size_t, len, wsize);
+ num_pages = clen / PAGE_CACHE_SIZE;
+ if (clen % PAGE_CACHE_SIZE)
+ num_pages++;
+
+ if (cur_len)
+ *cur_len = clen;
+
+ return num_pages;
+}
+
+static ssize_t
+cifs_iovec_write(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *poffset)
+{
+ unsigned int written;
+ unsigned long num_pages, npages, i;
+ size_t copied, len, cur_len;
+ ssize_t total_written = 0;
+ struct kvec *to_send;
+ struct page **pages;
+ struct iov_iter it;
+ struct inode *inode;
+ struct cifsFileInfo *open_file;
+ struct cifsTconInfo *pTcon;
+ struct cifs_sb_info *cifs_sb;
+ int xid, rc;
+
+ len = iov_length(iov, nr_segs);
+ if (!len)
+ return 0;
+
+ rc = generic_write_checks(file, poffset, &len, 0);
+ if (rc)
+ return rc;
+
+ cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
+
+ pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
+ if (!to_send) {
+ kfree(pages);
+ return -ENOMEM;
+ }
+
+ rc = cifs_write_allocate_pages(pages, num_pages);
+ if (rc) {
+ kfree(pages);
+ kfree(to_send);
+ return rc;
+ }
+
+ xid = GetXid();
+ open_file = file->private_data;
+ pTcon = tlink_tcon(open_file->tlink);
+ inode = file->f_path.dentry->d_inode;
+
+ iov_iter_init(&it, iov, nr_segs, len, 0);
+ npages = num_pages;
+
+ do {
+ size_t save_len = cur_len;
+ for (i = 0; i < npages; i++) {
+ copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
+ copied = iov_iter_copy_from_user(pages[i], &it, 0,
+ copied);
+ cur_len -= copied;
+ iov_iter_advance(&it, copied);
+ to_send[i+1].iov_base = kmap(pages[i]);
+ to_send[i+1].iov_len = copied;
+ }
+
+ cur_len = save_len - cur_len;
+
+ do {
+ if (open_file->invalidHandle) {
+ rc = cifs_reopen_file(open_file, false);
+ if (rc != 0)
+ break;
+ }
+ rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid,
+ cur_len, *poffset, &written,
+ to_send, npages, 0);
+ } while (rc == -EAGAIN);
+
+ for (i = 0; i < npages; i++)
+ kunmap(pages[i]);
+
+ if (written) {
+ len -= written;
+ total_written += written;
+ cifs_update_eof(CIFS_I(inode), *poffset, written);
+ *poffset += written;
+ } else if (rc < 0) {
+ if (!total_written)
+ total_written = rc;
+ break;
+ }
+
+ /* get length and number of kvecs of the next write */
+ npages = get_numpages(cifs_sb->wsize, len, &cur_len);
+ } while (len > 0);
+
+ if (total_written > 0) {
+ spin_lock(&inode->i_lock);
+ if (*poffset > inode->i_size)
+ i_size_write(inode, *poffset);
+ spin_unlock(&inode->i_lock);
+ }
+
+ cifs_stats_bytes_written(pTcon, total_written);
+ mark_inode_dirty_sync(inode);
+
+ for (i = 0; i < num_pages; i++)
+ put_page(pages[i]);
+ kfree(to_send);
+ kfree(pages);
+ FreeXid(xid);
+ return total_written;
+}
+
+static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ ssize_t written;
+ struct inode *inode;
+
+ inode = iocb->ki_filp->f_path.dentry->d_inode;
+
+ /*
+ * BB - optimize the way when signing is disabled. We can drop this
+ * extra memory-to-memory copying and use iovec buffers for constructing
+ * write request.
+ */
+
+ written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
+ if (written > 0) {
+ CIFS_I(inode)->invalid_mapping = true;
+ iocb->ki_pos = pos;
+ }
+
+ return written;
+}
+
+ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct inode *inode;
+
+ inode = iocb->ki_filp->f_path.dentry->d_inode;
+
+ if (CIFS_I(inode)->clientCanCacheAll)
+ return generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+ /*
+ * In strict cache mode we need to write the data to the server exactly
+ * from the pos to pos+len-1 rather than flush all affected pages
+ * because it may cause a error with mandatory locks on these pages but
+ * not on the region from pos to ppos+len-1.
+ */
+
+ return cifs_user_writev(iocb, iov, nr_segs, pos);
+}
+
+static ssize_t
+cifs_iovec_read(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *poffset)
+{
+ int rc;
+ int xid;
+ ssize_t total_read;
unsigned int bytes_read = 0;
- unsigned int total_read = 0;
- unsigned int current_read_size;
+ size_t len, cur_len;
+ int iov_offset = 0;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
- int xid;
struct cifsFileInfo *open_file;
- char *smb_read_data;
- char __user *current_offset;
struct smb_com_read_rsp *pSMBr;
+ char *read_data;
+
+ if (!nr_segs)
+ return 0;
+
+ len = iov_length(iov, nr_segs);
+ if (!len)
+ return 0;
xid = GetXid();
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
- if (file->private_data == NULL) {
- rc = -EBADF;
- FreeXid(xid);
- return rc;
- }
open_file = file->private_data;
pTcon = tlink_tcon(open_file->tlink);
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
cFYI(1, "attempting read on write only file instance");
- for (total_read = 0, current_offset = read_data;
- read_size > total_read;
- total_read += bytes_read, current_offset += bytes_read) {
- current_read_size = min_t(const int, read_size - total_read,
- cifs_sb->rsize);
+ for (total_read = 0; total_read < len; total_read += bytes_read) {
+ cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
rc = -EAGAIN;
- smb_read_data = NULL;
+ read_data = NULL;
+
while (rc == -EAGAIN) {
int buf_type = CIFS_NO_BUFFER;
if (open_file->invalidHandle) {
@@ -1634,27 +1859,25 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
if (rc != 0)
break;
}
- rc = CIFSSMBRead(xid, pTcon,
- open_file->netfid,
- current_read_size, *poffset,
- &bytes_read, &smb_read_data,
- &buf_type);
- pSMBr = (struct smb_com_read_rsp *)smb_read_data;
- if (smb_read_data) {
- if (copy_to_user(current_offset,
- smb_read_data +
- 4 /* RFC1001 length field */ +
- le16_to_cpu(pSMBr->DataOffset),
- bytes_read))
+ rc = CIFSSMBRead(xid, pTcon, open_file->netfid,
+ cur_len, *poffset, &bytes_read,
+ &read_data, &buf_type);
+ pSMBr = (struct smb_com_read_rsp *)read_data;
+ if (read_data) {
+ char *data_offset = read_data + 4 +
+ le16_to_cpu(pSMBr->DataOffset);
+ if (memcpy_toiovecend(iov, data_offset,
+ iov_offset, bytes_read))
rc = -EFAULT;
-
if (buf_type == CIFS_SMALL_BUFFER)
- cifs_small_buf_release(smb_read_data);
+ cifs_small_buf_release(read_data);
else if (buf_type == CIFS_LARGE_BUFFER)
- cifs_buf_release(smb_read_data);
- smb_read_data = NULL;
+ cifs_buf_release(read_data);
+ read_data = NULL;
+ iov_offset += bytes_read;
}
}
+
if (rc || (bytes_read == 0)) {
if (total_read) {
break;
@@ -1667,13 +1890,57 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
*poffset += bytes_read;
}
}
+
FreeXid(xid);
return total_read;
}
+ssize_t cifs_user_read(struct file *file, char __user *read_data,
+ size_t read_size, loff_t *poffset)
+{
+ struct iovec iov;
+ iov.iov_base = read_data;
+ iov.iov_len = read_size;
+
+ return cifs_iovec_read(file, &iov, 1, poffset);
+}
+
+static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ ssize_t read;
+
+ read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
+ if (read > 0)
+ iocb->ki_pos = pos;
+
+ return read;
+}
+
+ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct inode *inode;
+
+ inode = iocb->ki_filp->f_path.dentry->d_inode;
+
+ if (CIFS_I(inode)->clientCanCacheRead)
+ return generic_file_aio_read(iocb, iov, nr_segs, pos);
+
+ /*
+ * In strict cache mode we need to read from the server all the time
+ * if we don't have level II oplock because the server can delay mtime
+ * change - so we can't make a decision about inode invalidating.
+ * And we can also fail with pagereading if there are mandatory locks
+ * on pages affected by this read but not on the region from pos to
+ * pos+len-1.
+ */
+
+ return cifs_user_readv(iocb, iov, nr_segs, pos);
+}
static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
- loff_t *poffset)
+ loff_t *poffset)
{
int rc = -EACCES;
unsigned int bytes_read = 0;
@@ -1741,6 +2008,21 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
return total_read;
}
+int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int rc, xid;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ xid = GetXid();
+
+ if (!CIFS_I(inode)->clientCanCacheRead)
+ cifs_invalidate_mapping(inode);
+
+ rc = generic_file_mmap(file, vma);
+ FreeXid(xid);
+ return rc;
+}
+
int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
int rc, xid;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 6c9ee80..8852470 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -44,13 +44,17 @@ static void cifs_set_ops(struct inode *inode)
inode->i_fop = &cifs_file_direct_nobrl_ops;
else
inode->i_fop = &cifs_file_direct_ops;
+ } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ inode->i_fop = &cifs_file_strict_nobrl_ops;
+ else
+ inode->i_fop = &cifs_file_strict_ops;
} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
inode->i_fop = &cifs_file_nobrl_ops;
else { /* not direct, send byte range locks */
inode->i_fop = &cifs_file_ops;
}
-
/* check if server can support readpages */
if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf <
PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
@@ -1679,7 +1683,7 @@ cifs_inode_needs_reval(struct inode *inode)
/*
* Zap the cache. Called when invalid_mapping flag is set.
*/
-static void
+void
cifs_invalidate_mapping(struct inode *inode)
{
int rc;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 306769d..e8804d3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -28,7 +28,6 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
-#include "md5.h"
#define CIFS_MF_SYMLINK_LEN_OFFSET (4+1)
#define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1))
@@ -47,6 +46,45 @@
md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15]
static int
+symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
+{
+ int rc;
+ unsigned int size;
+ struct crypto_shash *md5;
+ struct sdesc *sdescmd5;
+
+ md5 = crypto_alloc_shash("md5", 0, 0);
+ if (IS_ERR(md5)) {
+ rc = PTR_ERR(md5);
+ cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc);
+ return rc;
+ }
+ size = sizeof(struct shash_desc) + crypto_shash_descsize(md5);
+ sdescmd5 = kmalloc(size, GFP_KERNEL);
+ if (!sdescmd5) {
+ rc = -ENOMEM;
+ cERROR(1, "%s: Memory allocation failure\n", __func__);
+ goto symlink_hash_err;
+ }
+ sdescmd5->shash.tfm = md5;
+ sdescmd5->shash.flags = 0x0;
+
+ rc = crypto_shash_init(&sdescmd5->shash);
+ if (rc) {
+ cERROR(1, "%s: Could not init md5 shash\n", __func__);
+ goto symlink_hash_err;
+ }
+ crypto_shash_update(&sdescmd5->shash, link_str, link_len);
+ rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
+
+symlink_hash_err:
+ crypto_free_shash(md5);
+ kfree(sdescmd5);
+
+ return rc;
+}
+
+static int
CIFSParseMFSymlink(const u8 *buf,
unsigned int buf_len,
unsigned int *_link_len,
@@ -56,7 +94,6 @@ CIFSParseMFSymlink(const u8 *buf,
unsigned int link_len;
const char *md5_str1;
const char *link_str;
- struct MD5Context md5_ctx;
u8 md5_hash[16];
char md5_str2[34];
@@ -70,9 +107,11 @@ CIFSParseMFSymlink(const u8 *buf,
if (rc != 1)
return -EINVAL;
- cifs_MD5_init(&md5_ctx);
- cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len);
- cifs_MD5_final(md5_hash, &md5_ctx);
+ rc = symlink_hash(link_len, link_str, md5_hash);
+ if (rc) {
+ cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
+ return rc;
+ }
snprintf(md5_str2, sizeof(md5_str2),
CIFS_MF_SYMLINK_MD5_FORMAT,
@@ -94,9 +133,9 @@ CIFSParseMFSymlink(const u8 *buf,
static int
CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
{
+ int rc;
unsigned int link_len;
unsigned int ofs;
- struct MD5Context md5_ctx;
u8 md5_hash[16];
if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE)
@@ -107,9 +146,11 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
return -ENAMETOOLONG;
- cifs_MD5_init(&md5_ctx);
- cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len);
- cifs_MD5_final(md5_hash, &md5_ctx);
+ rc = symlink_hash(link_len, link_str, md5_hash);
+ if (rc) {
+ cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
+ return rc;
+ }
snprintf(buf, buf_len,
CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT,
diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c
deleted file mode 100644
index a725c26..0000000
--- a/fs/cifs/md4.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- Unix SMB/Netbios implementation.
- Version 1.9.
- a implementation of MD4 designed for use in the SMB authentication protocol
- Copyright (C) Andrew Tridgell 1997-1998.
- Modified by Steve French (sfrench@us.ibm.com) 2002-2003
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-#include <linux/module.h>
-#include <linux/fs.h>
-#include "cifsencrypt.h"
-
-/* NOTE: This code makes no attempt to be fast! */
-
-static __u32
-F(__u32 X, __u32 Y, __u32 Z)
-{
- return (X & Y) | ((~X) & Z);
-}
-
-static __u32
-G(__u32 X, __u32 Y, __u32 Z)
-{
- return (X & Y) | (X & Z) | (Y & Z);
-}
-
-static __u32
-H(__u32 X, __u32 Y, __u32 Z)
-{
- return X ^ Y ^ Z;
-}
-
-static __u32
-lshift(__u32 x, int s)
-{
- x &= 0xFFFFFFFF;
- return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
-}
-
-#define ROUND1(a,b,c,d,k,s) (*a) = lshift((*a) + F(*b,*c,*d) + X[k], s)
-#define ROUND2(a,b,c,d,k,s) (*a) = lshift((*a) + G(*b,*c,*d) + X[k] + (__u32)0x5A827999,s)
-#define ROUND3(a,b,c,d,k,s) (*a) = lshift((*a) + H(*b,*c,*d) + X[k] + (__u32)0x6ED9EBA1,s)
-
-/* this applies md4 to 64 byte chunks */
-static void
-mdfour64(__u32 *M, __u32 *A, __u32 *B, __u32 *C, __u32 *D)
-{
- int j;
- __u32 AA, BB, CC, DD;
- __u32 X[16];
-
-
- for (j = 0; j < 16; j++)
- X[j] = M[j];
-
- AA = *A;
- BB = *B;
- CC = *C;
- DD = *D;
-
- ROUND1(A, B, C, D, 0, 3);
- ROUND1(D, A, B, C, 1, 7);
- ROUND1(C, D, A, B, 2, 11);
- ROUND1(B, C, D, A, 3, 19);
- ROUND1(A, B, C, D, 4, 3);
- ROUND1(D, A, B, C, 5, 7);
- ROUND1(C, D, A, B, 6, 11);
- ROUND1(B, C, D, A, 7, 19);
- ROUND1(A, B, C, D, 8, 3);
- ROUND1(D, A, B, C, 9, 7);
- ROUND1(C, D, A, B, 10, 11);
- ROUND1(B, C, D, A, 11, 19);
- ROUND1(A, B, C, D, 12, 3);
- ROUND1(D, A, B, C, 13, 7);
- ROUND1(C, D, A, B, 14, 11);
- ROUND1(B, C, D, A, 15, 19);
-
- ROUND2(A, B, C, D, 0, 3);
- ROUND2(D, A, B, C, 4, 5);
- ROUND2(C, D, A, B, 8, 9);
- ROUND2(B, C, D, A, 12, 13);
- ROUND2(A, B, C, D, 1, 3);
- ROUND2(D, A, B, C, 5, 5);
- ROUND2(C, D, A, B, 9, 9);
- ROUND2(B, C, D, A, 13, 13);
- ROUND2(A, B, C, D, 2, 3);
- ROUND2(D, A, B, C, 6, 5);
- ROUND2(C, D, A, B, 10, 9);
- ROUND2(B, C, D, A, 14, 13);
- ROUND2(A, B, C, D, 3, 3);
- ROUND2(D, A, B, C, 7, 5);
- ROUND2(C, D, A, B, 11, 9);
- ROUND2(B, C, D, A, 15, 13);
-
- ROUND3(A, B, C, D, 0, 3);
- ROUND3(D, A, B, C, 8, 9);
- ROUND3(C, D, A, B, 4, 11);
- ROUND3(B, C, D, A, 12, 15);
- ROUND3(A, B, C, D, 2, 3);
- ROUND3(D, A, B, C, 10, 9);
- ROUND3(C, D, A, B, 6, 11);
- ROUND3(B, C, D, A, 14, 15);
- ROUND3(A, B, C, D, 1, 3);
- ROUND3(D, A, B, C, 9, 9);
- ROUND3(C, D, A, B, 5, 11);
- ROUND3(B, C, D, A, 13, 15);
- ROUND3(A, B, C, D, 3, 3);
- ROUND3(D, A, B, C, 11, 9);
- ROUND3(C, D, A, B, 7, 11);
- ROUND3(B, C, D, A, 15, 15);
-
- *A += AA;
- *B += BB;
- *C += CC;
- *D += DD;
-
- *A &= 0xFFFFFFFF;
- *B &= 0xFFFFFFFF;
- *C &= 0xFFFFFFFF;
- *D &= 0xFFFFFFFF;
-
- for (j = 0; j < 16; j++)
- X[j] = 0;
-}
-
-static void
-copy64(__u32 *M, unsigned char *in)
-{
- int i;
-
- for (i = 0; i < 16; i++)
- M[i] = (in[i * 4 + 3] << 24) | (in[i * 4 + 2] << 16) |
- (in[i * 4 + 1] << 8) | (in[i * 4 + 0] << 0);
-}
-
-static void
-copy4(unsigned char *out, __u32 x)
-{
- out[0] = x & 0xFF;
- out[1] = (x >> 8) & 0xFF;
- out[2] = (x >> 16) & 0xFF;
- out[3] = (x >> 24) & 0xFF;
-}
-
-/* produce a md4 message digest from data of length n bytes */
-void
-mdfour(unsigned char *out, unsigned char *in, int n)
-{
- unsigned char buf[128];
- __u32 M[16];
- __u32 b = n * 8;
- int i;
- __u32 A = 0x67452301;
- __u32 B = 0xefcdab89;
- __u32 C = 0x98badcfe;
- __u32 D = 0x10325476;
-
- while (n > 64) {
- copy64(M, in);
- mdfour64(M, &A, &B, &C, &D);
- in += 64;
- n -= 64;
- }
-
- for (i = 0; i < 128; i++)
- buf[i] = 0;
- memcpy(buf, in, n);
- buf[n] = 0x80;
-
- if (n <= 55) {
- copy4(buf + 56, b);
- copy64(M, buf);
- mdfour64(M, &A, &B, &C, &D);
- } else {
- copy4(buf + 120, b);
- copy64(M, buf);
- mdfour64(M, &A, &B, &C, &D);
- copy64(M, buf + 64);
- mdfour64(M, &A, &B, &C, &D);
- }
-
- for (i = 0; i < 128; i++)
- buf[i] = 0;
- copy64(M, buf);
-
- copy4(out, A);
- copy4(out + 4, B);
- copy4(out + 8, C);
- copy4(out + 12, D);
-
- A = B = C = D = 0;
-}
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
deleted file mode 100644
index 98b66a5..0000000
--- a/fs/cifs/md5.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * This code implements the MD5 message-digest algorithm.
- * The algorithm is due to Ron Rivest. This code was
- * written by Colin Plumb in 1993, no copyright is claimed.
- * This code is in the public domain; do with it what you wish.
- *
- * Equivalent code is available from RSA Data Security, Inc.
- * This code has been tested against that, and is equivalent,
- * except that you don't need to include two pages of legalese
- * with every copy.
- *
- * To compute the message digest of a chunk of bytes, declare an
- * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as
- * needed on buffers full of bytes, and then call cifs_MD5_final, which
- * will fill a supplied 16-byte array with the digest.
- */
-
-/* This code slightly modified to fit into Samba by
- abartlet@samba.org Jun 2001
- and to fit the cifs vfs by
- Steve French sfrench@us.ibm.com */
-
-#include <linux/string.h>
-#include "md5.h"
-
-static void MD5Transform(__u32 buf[4], __u32 const in[16]);
-
-/*
- * Note: this code is harmless on little-endian machines.
- */
-static void
-byteReverse(unsigned char *buf, unsigned longs)
-{
- __u32 t;
- do {
- t = (__u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
- ((unsigned) buf[1] << 8 | buf[0]);
- *(__u32 *) buf = t;
- buf += 4;
- } while (--longs);
-}
-
-/*
- * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
- * initialization constants.
- */
-void
-cifs_MD5_init(struct MD5Context *ctx)
-{
- ctx->buf[0] = 0x67452301;
- ctx->buf[1] = 0xefcdab89;
- ctx->buf[2] = 0x98badcfe;
- ctx->buf[3] = 0x10325476;
-
- ctx->bits[0] = 0;
- ctx->bits[1] = 0;
-}
-
-/*
- * Update context to reflect the concatenation of another buffer full
- * of bytes.
- */
-void
-cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
-{
- register __u32 t;
-
- /* Update bitcount */
-
- t = ctx->bits[0];
- if ((ctx->bits[0] = t + ((__u32) len << 3)) < t)
- ctx->bits[1]++; /* Carry from low to high */
- ctx->bits[1] += len >> 29;
-
- t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
-
- /* Handle any leading odd-sized chunks */
-
- if (t) {
- unsigned char *p = (unsigned char *) ctx->in + t;
-
- t = 64 - t;
- if (len < t) {
- memmove(p, buf, len);
- return;
- }
- memmove(p, buf, t);
- byteReverse(ctx->in, 16);
- MD5Transform(ctx->buf, (__u32 *) ctx->in);
- buf += t;
- len -= t;
- }
- /* Process data in 64-byte chunks */
-
- while (len >= 64) {
- memmove(ctx->in, buf, 64);
- byteReverse(ctx->in, 16);
- MD5Transform(ctx->buf, (__u32 *) ctx->in);
- buf += 64;
- len -= 64;
- }
-
- /* Handle any remaining bytes of data. */
-
- memmove(ctx->in, buf, len);
-}
-
-/*
- * Final wrapup - pad to 64-byte boundary with the bit pattern
- * 1 0* (64-bit count of bits processed, MSB-first)
- */
-void
-cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx)
-{
- unsigned int count;
- unsigned char *p;
-
- /* Compute number of bytes mod 64 */
- count = (ctx->bits[0] >> 3) & 0x3F;
-
- /* Set the first char of padding to 0x80. This is safe since there is
- always at least one byte free */
- p = ctx->in + count;
- *p++ = 0x80;
-
- /* Bytes of padding needed to make 64 bytes */
- count = 64 - 1 - count;
-
- /* Pad out to 56 mod 64 */
- if (count < 8) {
- /* Two lots of padding: Pad the first block to 64 bytes */
- memset(p, 0, count);
- byteReverse(ctx->in, 16);
- MD5Transform(ctx->buf, (__u32 *) ctx->in);
-
- /* Now fill the next block with 56 bytes */
- memset(ctx->in, 0, 56);
- } else {
- /* Pad block to 56 bytes */
- memset(p, 0, count - 8);
- }
- byteReverse(ctx->in, 14);
-
- /* Append length in bits and transform */
- ((__u32 *) ctx->in)[14] = ctx->bits[0];
- ((__u32 *) ctx->in)[15] = ctx->bits[1];
-
- MD5Transform(ctx->buf, (__u32 *) ctx->in);
- byteReverse((unsigned char *) ctx->buf, 4);
- memmove(digest, ctx->buf, 16);
- memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
-}
-
-/* The four core functions - F1 is optimized somewhat */
-
-/* #define F1(x, y, z) (x & y | ~x & z) */
-#define F1(x, y, z) (z ^ (x & (y ^ z)))
-#define F2(x, y, z) F1(z, x, y)
-#define F3(x, y, z) (x ^ y ^ z)
-#define F4(x, y, z) (y ^ (x | ~z))
-
-/* This is the central step in the MD5 algorithm. */
-#define MD5STEP(f, w, x, y, z, data, s) \
- (w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x)
-
-/*
- * The core of the MD5 algorithm, this alters an existing MD5 hash to
- * reflect the addition of 16 longwords of new data. cifs_MD5_update blocks
- * the data and converts bytes into longwords for this routine.
- */
-static void
-MD5Transform(__u32 buf[4], __u32 const in[16])
-{
- register __u32 a, b, c, d;
-
- a = buf[0];
- b = buf[1];
- c = buf[2];
- d = buf[3];
-
- MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
- MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
- MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
- MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
- MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
- MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
- MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
- MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
- MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
- MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
- MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
- MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
- MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
- MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
- MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
- MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
-
- MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
- MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
- MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
- MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
- MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
- MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
- MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
- MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
- MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
- MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
- MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
- MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
- MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
- MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
- MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
- MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
-
- MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
- MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
- MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
- MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
- MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
- MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
- MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
- MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
- MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
- MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
- MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
- MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
- MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
- MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
- MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
- MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
-
- MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
- MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
- MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
- MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
- MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
- MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
- MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
- MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
- MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
- MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
- MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
- MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
- MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
- MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
- MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
- MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
-
- buf[0] += a;
- buf[1] += b;
- buf[2] += c;
- buf[3] += d;
-}
-
-#if 0 /* currently unused */
-/***********************************************************************
- the rfc 2104 version of hmac_md5 initialisation.
-***********************************************************************/
-static void
-hmac_md5_init_rfc2104(unsigned char *key, int key_len,
- struct HMACMD5Context *ctx)
-{
- int i;
-
- /* if key is longer than 64 bytes reset it to key=MD5(key) */
- if (key_len > 64) {
- unsigned char tk[16];
- struct MD5Context tctx;
-
- cifs_MD5_init(&tctx);
- cifs_MD5_update(&tctx, key, key_len);
- cifs_MD5_final(tk, &tctx);
-
- key = tk;
- key_len = 16;
- }
-
- /* start out by storing key in pads */
- memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
- memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
- memcpy(ctx->k_ipad, key, key_len);
- memcpy(ctx->k_opad, key, key_len);
-
- /* XOR key with ipad and opad values */
- for (i = 0; i < 64; i++) {
- ctx->k_ipad[i] ^= 0x36;
- ctx->k_opad[i] ^= 0x5c;
- }
-
- cifs_MD5_init(&ctx->ctx);
- cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
-}
-#endif
-
-/***********************************************************************
- the microsoft version of hmac_md5 initialisation.
-***********************************************************************/
-void
-hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
- struct HMACMD5Context *ctx)
-{
- int i;
-
- /* if key is longer than 64 bytes truncate it */
- if (key_len > 64)
- key_len = 64;
-
- /* start out by storing key in pads */
- memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
- memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
- memcpy(ctx->k_ipad, key, key_len);
- memcpy(ctx->k_opad, key, key_len);
-
- /* XOR key with ipad and opad values */
- for (i = 0; i < 64; i++) {
- ctx->k_ipad[i] ^= 0x36;
- ctx->k_opad[i] ^= 0x5c;
- }
-
- cifs_MD5_init(&ctx->ctx);
- cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
-}
-
-/***********************************************************************
- update hmac_md5 "inner" buffer
-***********************************************************************/
-void
-hmac_md5_update(const unsigned char *text, int text_len,
- struct HMACMD5Context *ctx)
-{
- cifs_MD5_update(&ctx->ctx, text, text_len); /* then text of datagram */
-}
-
-/***********************************************************************
- finish off hmac_md5 "inner" buffer and generate outer one.
-***********************************************************************/
-void
-hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
-{
- struct MD5Context ctx_o;
-
- cifs_MD5_final(digest, &ctx->ctx);
-
- cifs_MD5_init(&ctx_o);
- cifs_MD5_update(&ctx_o, ctx->k_opad, 64);
- cifs_MD5_update(&ctx_o, digest, 16);
- cifs_MD5_final(digest, &ctx_o);
-}
-
-/***********************************************************
- single function to calculate an HMAC MD5 digest from data.
- use the microsoft hmacmd5 init method because the key is 16 bytes.
-************************************************************/
-#if 0 /* currently unused */
-static void
-hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
- unsigned char *digest)
-{
- struct HMACMD5Context ctx;
- hmac_md5_init_limK_to_64(key, 16, &ctx);
- if (data_len != 0)
- hmac_md5_update(data, data_len, &ctx);
-
- hmac_md5_final(digest, &ctx);
-}
-#endif
diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h
deleted file mode 100644
index 6fba8cb..0000000
--- a/fs/cifs/md5.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef MD5_H
-#define MD5_H
-#ifndef HEADER_MD5_H
-/* Try to avoid clashes with OpenSSL */
-#define HEADER_MD5_H
-#endif
-
-struct MD5Context {
- __u32 buf[4];
- __u32 bits[2];
- unsigned char in[64];
-};
-#endif /* !MD5_H */
-
-#ifndef _HMAC_MD5_H
-struct HMACMD5Context {
- struct MD5Context ctx;
- unsigned char k_ipad[65];
- unsigned char k_opad[65];
-};
-#endif /* _HMAC_MD5_H */
-
-void cifs_MD5_init(struct MD5Context *context);
-void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf,
- unsigned len);
-void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context);
-
-/* The following definitions come from lib/hmacmd5.c */
-
-/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len,
- struct HMACMD5Context *ctx);*/
-void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
- struct HMACMD5Context *ctx);
-void hmac_md5_update(const unsigned char *text, int text_len,
- struct HMACMD5Context *ctx);
-void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx);
-/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
- unsigned char *digest);*/
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 09bfcf0..2a930a7 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -236,10 +236,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
{
__u16 mid = 0;
__u16 last_mid;
- int collision;
-
- if (server == NULL)
- return mid;
+ bool collision;
spin_lock(&GlobalMid_Lock);
last_mid = server->CurrentMid; /* we do not want to loop forever */
@@ -252,24 +249,38 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
(and it would also have to have been a request that
did not time out) */
while (server->CurrentMid != last_mid) {
- struct list_head *tmp;
struct mid_q_entry *mid_entry;
+ unsigned int num_mids;
- collision = 0;
+ collision = false;
if (server->CurrentMid == 0)
server->CurrentMid++;
- list_for_each(tmp, &server->pending_mid_q) {
- mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-
- if ((mid_entry->mid == server->CurrentMid) &&
- (mid_entry->midState == MID_REQUEST_SUBMITTED)) {
+ num_mids = 0;
+ list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
+ ++num_mids;
+ if (mid_entry->mid == server->CurrentMid &&
+ mid_entry->midState == MID_REQUEST_SUBMITTED) {
/* This mid is in use, try a different one */
- collision = 1;
+ collision = true;
break;
}
}
- if (collision == 0) {
+
+ /*
+ * if we have more than 32k mids in the list, then something
+ * is very wrong. Possibly a local user is trying to DoS the
+ * box by issuing long-running calls and SIGKILL'ing them. If
+ * we get to 2^16 mids then we're in big trouble as this
+ * function could loop forever.
+ *
+ * Go ahead and assign out the mid in this situation, but force
+ * an eventual reconnect to clean out the pending_mid_q.
+ */
+ if (num_mids > 32768)
+ server->tcpStatus = CifsNeedReconnect;
+
+ if (!collision) {
mid = server->CurrentMid;
break;
}
@@ -381,29 +392,31 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
}
static int
-checkSMBhdr(struct smb_hdr *smb, __u16 mid)
+check_smb_hdr(struct smb_hdr *smb, __u16 mid)
{
- /* Make sure that this really is an SMB, that it is a response,
- and that the message ids match */
- if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) &&
- (mid == smb->Mid)) {
- if (smb->Flags & SMBFLG_RESPONSE)
- return 0;
- else {
- /* only one valid case where server sends us request */
- if (smb->Command == SMB_COM_LOCKING_ANDX)
- return 0;
- else
- cERROR(1, "Received Request not response");
- }
- } else { /* bad signature or mid */
- if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff))
- cERROR(1, "Bad protocol string signature header %x",
- *(unsigned int *) smb->Protocol);
- if (mid != smb->Mid)
- cERROR(1, "Mids do not match");
+ /* does it have the right SMB "signature" ? */
+ if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) {
+ cERROR(1, "Bad protocol string signature header 0x%x",
+ *(unsigned int *)smb->Protocol);
+ return 1;
}
- cERROR(1, "bad smb detected. The Mid=%d", smb->Mid);
+
+ /* Make sure that message ids match */
+ if (mid != smb->Mid) {
+ cERROR(1, "Mids do not match. received=%u expected=%u",
+ smb->Mid, mid);
+ return 1;
+ }
+
+ /* if it's a response then accept */
+ if (smb->Flags & SMBFLG_RESPONSE)
+ return 0;
+
+ /* only one valid case where server sends us request */
+ if (smb->Command == SMB_COM_LOCKING_ANDX)
+ return 0;
+
+ cERROR(1, "Server sent request, not response. mid=%u", smb->Mid);
return 1;
}
@@ -448,7 +461,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
return 1;
}
- if (checkSMBhdr(smb, mid))
+ if (check_smb_hdr(smb, mid))
return 1;
clc_len = smbCalcSize_LE(smb);
@@ -465,25 +478,26 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF))
return 0; /* bcc wrapped */
}
- cFYI(1, "Calculated size %d vs length %d mismatch for mid %d",
+ cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u",
clc_len, 4 + len, smb->Mid);
- /* Windows XP can return a few bytes too much, presumably
- an illegal pad, at the end of byte range lock responses
- so we allow for that three byte pad, as long as actual
- received length is as long or longer than calculated length */
- /* We have now had to extend this more, since there is a
- case in which it needs to be bigger still to handle a
- malformed response to transact2 findfirst from WinXP when
- access denied is returned and thus bcc and wct are zero
- but server says length is 0x21 bytes too long as if the server
- forget to reset the smb rfc1001 length when it reset the
- wct and bcc to minimum size and drop the t2 parms and data */
- if ((4+len > clc_len) && (len <= clc_len + 512))
- return 0;
- else {
- cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d",
+
+ if (4 + len < clc_len) {
+ cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u",
len, smb->Mid);
return 1;
+ } else if (len > clc_len + 512) {
+ /*
+ * Some servers (Windows XP in particular) send more
+ * data than the lengths in the SMB packet would
+ * indicate on certain calls (byte range locks and
+ * trans2 find first calls in particular). While the
+ * client can handle such a frame by ignoring the
+ * trailing data, we choose limit the amount of extra
+ * data to 512 bytes.
+ */
+ cERROR(1, "RFC1001 size %u more than 512 bytes larger "
+ "than SMB for mid=%u", len, smb->Mid);
+ return 1;
}
}
return 0;
@@ -637,77 +651,6 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
return;
}
-/* Convert 16 bit Unicode pathname to wire format from string in current code
- page. Conversion may involve remapping up the seven characters that are
- only legal in POSIX-like OS (if they are present in the string). Path
- names are little endian 16 bit Unicode on the wire */
-int
-cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
- const struct nls_table *cp, int mapChars)
-{
- int i, j, charlen;
- int len_remaining = maxlen;
- char src_char;
- __u16 temp;
-
- if (!mapChars)
- return cifs_strtoUCS(target, source, PATH_MAX, cp);
-
- for (i = 0, j = 0; i < maxlen; j++) {
- src_char = source[i];
- switch (src_char) {
- case 0:
- target[j] = 0;
- goto ctoUCS_out;
- case ':':
- target[j] = cpu_to_le16(UNI_COLON);
- break;
- case '*':
- target[j] = cpu_to_le16(UNI_ASTERIK);
- break;
- case '?':
- target[j] = cpu_to_le16(UNI_QUESTION);
- break;
- case '<':
- target[j] = cpu_to_le16(UNI_LESSTHAN);
- break;
- case '>':
- target[j] = cpu_to_le16(UNI_GRTRTHAN);
- break;
- case '|':
- target[j] = cpu_to_le16(UNI_PIPE);
- break;
- /* BB We can not handle remapping slash until
- all the calls to build_path_from_dentry
- are modified, as they use slash as separator BB */
- /* case '\\':
- target[j] = cpu_to_le16(UNI_SLASH);
- break;*/
- default:
- charlen = cp->char2uni(source+i,
- len_remaining, &temp);
- /* if no match, use question mark, which
- at least in some cases servers as wild card */
- if (charlen < 1) {
- target[j] = cpu_to_le16(0x003f);
- charlen = 1;
- } else
- target[j] = cpu_to_le16(temp);
- len_remaining -= charlen;
- /* character may take more than one byte in the
- the source string, but will take exactly two
- bytes in the target string */
- i += charlen;
- continue;
- }
- i++; /* move to next char in source string */
- len_remaining--;
- }
-
-ctoUCS_out:
- return i;
-}
-
void
cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
{
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 6783ce6..79f641e 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
{
int rc, alen, slen;
const char *pct;
- char *endp, scope_id[13];
+ char scope_id[13];
struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
@@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
memcpy(scope_id, pct + 1, slen);
scope_id[slen] = '\0';
- s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
- if (endp != scope_id + slen)
- return 0;
+ rc = strict_strtoul(scope_id, 0,
+ (unsigned long *)&s6->sin6_scope_id);
+ rc = (rc == 0) ? 1 : 0;
}
return rc;
@@ -916,14 +916,14 @@ unsigned int
smbCalcSize(struct smb_hdr *ptr)
{
return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
- 2 /* size of the bcc field */ + BCC(ptr));
+ 2 /* size of the bcc field */ + get_bcc(ptr));
}
unsigned int
smbCalcSize_LE(struct smb_hdr *ptr)
{
return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
- 2 /* size of the bcc field */ + le16_to_cpu(BCC_LE(ptr)));
+ 2 /* size of the bcc field */ + get_bcc_le(ptr));
}
/* The following are taken from fs/ntfs/util.c */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 7f25cc3..f8e4cd2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -764,7 +764,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
{
int rc = 0;
int xid, i;
- struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
struct cifsFileInfo *cifsFile = NULL;
char *current_entry;
@@ -775,8 +774,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
xid = GetXid();
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-
/*
* Ensure FindFirst doesn't fail before doing filldir() for '.' and
* '..'. Otherwise we won't be able to notify VFS in case of failure.
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1cffd82..1676570 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -277,7 +277,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
}
static void
-decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
+decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
const struct nls_table *nls_cp)
{
int len;
@@ -323,7 +323,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
return;
}
-static int decode_ascii_ssetup(char **pbcc_area, int bleft,
+static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
struct cifsSesInfo *ses,
const struct nls_table *nls_cp)
{
@@ -575,12 +575,11 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
char *str_area;
SESSION_SETUP_ANDX *pSMB;
__u32 capabilities;
- int count;
+ __u16 count;
int resp_buf_type;
struct kvec iov[3];
enum securityEnum type;
- __u16 action;
- int bytes_remaining;
+ __u16 action, bytes_remaining;
struct key *spnego_key = NULL;
__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
u16 blob_len;
@@ -657,13 +656,13 @@ ssetup_ntlmssp_authenticate:
if (type == LANMAN) {
#ifdef CONFIG_CIFS_WEAK_PW_HASH
- char lnm_session_key[CIFS_SESS_KEY_SIZE];
+ char lnm_session_key[CIFS_AUTH_RESP_SIZE];
pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
/* no capabilities flags in old lanman negotiation */
- pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
+ pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
/* Calculate hash with password and copy into bcc_ptr.
* Encryption Key (stored as in cryptkey) gets used if the
@@ -676,8 +675,8 @@ ssetup_ntlmssp_authenticate:
true : false, lnm_session_key);
ses->flags |= CIFS_SES_LANMAN;
- memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
- bcc_ptr += CIFS_SESS_KEY_SIZE;
+ memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
/* can not sign if LANMAN negotiated so no need
to calculate signing key? but what if server
@@ -876,7 +875,7 @@ ssetup_ntlmssp_authenticate:
count = iov[1].iov_len + iov[2].iov_len;
smb_buf->smb_buf_length += count;
- BCC_LE(smb_buf) = cpu_to_le16(count);
+ put_bcc_le(count, smb_buf);
rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
CIFS_LOG_ERROR);
@@ -910,7 +909,7 @@ ssetup_ntlmssp_authenticate:
cFYI(1, "UID = %d ", ses->Suid);
/* response can have either 3 or 4 word count - Samba sends 3 */
/* and lanman response is 3 */
- bytes_remaining = BCC(smb_buf);
+ bytes_remaining = get_bcc(smb_buf);
bcc_ptr = pByteArea(smb_buf);
if (smb_buf->WordCount == 4) {
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index b6b6dcb..0472148 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -45,7 +45,6 @@
up with a different answer to the one above)
*/
#include <linux/slab.h>
-#include "cifsencrypt.h"
#define uchar unsigned char
static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 192ea51..b5041c8 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -32,9 +32,8 @@
#include "cifs_unicode.h"
#include "cifspdu.h"
#include "cifsglob.h"
-#include "md5.h"
#include "cifs_debug.h"
-#include "cifsencrypt.h"
+#include "cifsproto.h"
#ifndef false
#define false 0
@@ -48,14 +47,58 @@
#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
-/*The following definitions come from libsmb/smbencrypt.c */
+/* produce a md4 message digest from data of length n bytes */
+int
+mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
+{
+ int rc;
+ unsigned int size;
+ struct crypto_shash *md4;
+ struct sdesc *sdescmd4;
+
+ md4 = crypto_alloc_shash("md4", 0, 0);
+ if (IS_ERR(md4)) {
+ rc = PTR_ERR(md4);
+ cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc);
+ return rc;
+ }
+ size = sizeof(struct shash_desc) + crypto_shash_descsize(md4);
+ sdescmd4 = kmalloc(size, GFP_KERNEL);
+ if (!sdescmd4) {
+ rc = -ENOMEM;
+ cERROR(1, "%s: Memory allocation failure\n", __func__);
+ goto mdfour_err;
+ }
+ sdescmd4->shash.tfm = md4;
+ sdescmd4->shash.flags = 0x0;
+
+ rc = crypto_shash_init(&sdescmd4->shash);
+ if (rc) {
+ cERROR(1, "%s: Could not init md4 shash\n", __func__);
+ goto mdfour_err;
+ }
+ crypto_shash_update(&sdescmd4->shash, link_str, link_len);
+ rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
-void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
- unsigned char *p24);
-void E_md4hash(const unsigned char *passwd, unsigned char *p16);
-static void SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
- unsigned char p24[24]);
-void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24);
+mdfour_err:
+ crypto_free_shash(md4);
+ kfree(sdescmd4);
+
+ return rc;
+}
+
+/* Does the des encryption from the NT or LM MD4 hash. */
+static void
+SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
+ unsigned char p24[24])
+{
+ unsigned char p21[21];
+
+ memset(p21, '\0', 21);
+
+ memcpy(p21, passwd, 16);
+ E_P24(p21, c8, p24);
+}
/*
This implements the X/Open SMB password encryption
@@ -118,9 +161,10 @@ _my_mbstowcs(__u16 *dst, const unsigned char *src, int len)
* Creates the MD4 Hash of the users password in NT UNICODE.
*/
-void
+int
E_md4hash(const unsigned char *passwd, unsigned char *p16)
{
+ int rc;
int len;
__u16 wpwd[129];
@@ -139,8 +183,10 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16)
/* Calculate length in bytes */
len = _my_wcslen(wpwd) * sizeof(__u16);
- mdfour(p16, (unsigned char *) wpwd, len);
+ rc = mdfour(p16, (unsigned char *) wpwd, len);
memset(wpwd, 0, 129 * 2);
+
+ return rc;
}
#if 0 /* currently unused */
@@ -212,19 +258,6 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n,
}
#endif
-/* Does the des encryption from the NT or LM MD4 hash. */
-static void
-SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
- unsigned char p24[24])
-{
- unsigned char p21[21];
-
- memset(p21, '\0', 21);
-
- memcpy(p21, passwd, 16);
- E_P24(p21, c8, p24);
-}
-
/* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */
#if 0 /* currently unused */
static void
@@ -242,16 +275,21 @@ NTLMSSPOWFencrypt(unsigned char passwd[8],
#endif
/* Does the NT MD4 hash then des encryption. */
-
-void
+int
SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
{
+ int rc;
unsigned char p21[21];
memset(p21, '\0', 21);
- E_md4hash(passwd, p21);
+ rc = E_md4hash(passwd, p21);
+ if (rc) {
+ cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
+ return rc;
+ }
SMBOWFencrypt(p21, c8, p24);
+ return rc;
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c8e2808..46d8756f 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -236,9 +236,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
server->tcpStatus = CifsNeedReconnect;
}
- if (rc < 0) {
+ if (rc < 0 && rc != -EINTR)
cERROR(1, "Error %d sending data on socket to server", rc);
- } else
+ else
rc = 0;
/* Don't want to modify the buffer as a
@@ -359,6 +359,10 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
if (rc)
return rc;
+ /* enable signing if server requires it */
+ if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
+
mutex_lock(&server->srv_mutex);
mid = AllocMidQEntry(in_buf, server);
if (mid == NULL) {
@@ -453,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
case MID_RETRY_NEEDED:
rc = -EAGAIN;
break;
+ case MID_RESPONSE_MALFORMED:
+ rc = -EIO;
+ break;
default:
cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
mid->mid, mid->midState);
@@ -484,7 +491,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2;
in_buf->Command = SMB_COM_NT_CANCEL;
in_buf->WordCount = 0;
- BCC_LE(in_buf) = 0;
+ put_bcc_le(0, in_buf);
mutex_lock(&server->srv_mutex);
rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -570,17 +577,33 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
#endif
mutex_unlock(&ses->server->srv_mutex);
- cifs_small_buf_release(in_buf);
- if (rc < 0)
+ if (rc < 0) {
+ cifs_small_buf_release(in_buf);
goto out;
+ }
- if (long_op == CIFS_ASYNC_OP)
+ if (long_op == CIFS_ASYNC_OP) {
+ cifs_small_buf_release(in_buf);
goto out;
+ }
rc = wait_for_response(ses->server, midQ);
- if (rc != 0)
- goto out;
+ if (rc != 0) {
+ send_nt_cancel(ses->server, in_buf, midQ);
+ spin_lock(&GlobalMid_Lock);
+ if (midQ->midState == MID_REQUEST_SUBMITTED) {
+ midQ->callback = DeleteMidQEntry;
+ spin_unlock(&GlobalMid_Lock);
+ cifs_small_buf_release(in_buf);
+ atomic_dec(&ses->server->inFlight);
+ wake_up(&ses->server->request_q);
+ return rc;
+ }
+ spin_unlock(&GlobalMid_Lock);
+ }
+
+ cifs_small_buf_release(in_buf);
rc = sync_mid_result(midQ, ses->server);
if (rc != 0) {
@@ -632,8 +655,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
if (receive_len >= sizeof(struct smb_hdr) - 4
/* do not count RFC1001 header */ +
(2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
- BCC(midQ->resp_buf) =
- le16_to_cpu(BCC_LE(midQ->resp_buf));
+ put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
if ((flags & CIFS_NO_RESP) == 0)
midQ->resp_buf = NULL; /* mark it so buf will
not be freed by
@@ -725,8 +747,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
goto out;
rc = wait_for_response(ses->server, midQ);
- if (rc != 0)
- goto out;
+ if (rc != 0) {
+ send_nt_cancel(ses->server, in_buf, midQ);
+ spin_lock(&GlobalMid_Lock);
+ if (midQ->midState == MID_REQUEST_SUBMITTED) {
+ /* no longer considered to be "in-flight" */
+ midQ->callback = DeleteMidQEntry;
+ spin_unlock(&GlobalMid_Lock);
+ atomic_dec(&ses->server->inFlight);
+ wake_up(&ses->server->request_q);
+ return rc;
+ }
+ spin_unlock(&GlobalMid_Lock);
+ }
rc = sync_mid_result(midQ, ses->server);
if (rc != 0) {
@@ -776,7 +809,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
if (receive_len >= sizeof(struct smb_hdr) - 4
/* do not count RFC1001 header */ +
(2 * out_buf->WordCount) + 2 /* bcc */ )
- BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
+ put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
} else {
rc = -EIO;
cERROR(1, "Bad MID state?");
@@ -923,10 +956,21 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
}
}
- if (wait_for_response(ses->server, midQ) == 0) {
- /* We got the response - restart system call. */
- rstart = 1;
+ rc = wait_for_response(ses->server, midQ);
+ if (rc) {
+ send_nt_cancel(ses->server, in_buf, midQ);
+ spin_lock(&GlobalMid_Lock);
+ if (midQ->midState == MID_REQUEST_SUBMITTED) {
+ /* no longer considered to be "in-flight" */
+ midQ->callback = DeleteMidQEntry;
+ spin_unlock(&GlobalMid_Lock);
+ return rc;
+ }
+ spin_unlock(&GlobalMid_Lock);
}
+
+ /* We got the response - restart system call. */
+ rstart = 1;
}
rc = sync_mid_result(midQ, ses->server);
@@ -977,7 +1021,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
if (receive_len >= sizeof(struct smb_hdr) - 4
/* do not count RFC1001 header */ +
(2 * out_buf->WordCount) + 2 /* bcc */ )
- BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
+ put_bcc(get_bcc_le(out_buf), out_buf);
out:
delete_mid(midQ);
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a0..c6d31a3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
*/
asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs(buf, &tmp);
- path_put(&path);
- }
+ struct kstatfs tmp;
+ int error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
{
- struct file * file;
struct kstatfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ int error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct path path;
+ struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs64(buf, &tmp);
- path_put(&path);
- }
+ error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs64(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct file * file;
struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs64(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PREAD)
+ ret = compat_readv(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PWRITE)
+ ret = compat_writev(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
}
#endif /* CONFIG_TIMERFD */
+
+#ifdef CONFIG_FHANDLE
+/*
+ * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
+ * doesn't set the O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_open_by_handle_at(int mountdirfd,
+ struct file_handle __user *handle, int flags)
+{
+ return do_handle_open(mountdirfd, handle, flags);
+}
+#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 9f493ee..a39fe47 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -176,6 +176,7 @@ static void d_free(struct dentry *dentry)
/**
* dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * @dentry: the target dentry
* After this call, in-progress rcu-walk path lookup will fail. This
* should be called after unhashing, and after changing d_inode (if
* the dentry has not already been unhashed).
@@ -281,6 +282,7 @@ static void dentry_lru_move_tail(struct dentry *dentry)
/**
* d_kill - kill dentry and return parent
* @dentry: dentry to kill
+ * @parent: parent dentry
*
* The dentry must already be unhashed and removed from the LRU.
*
@@ -294,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(parent->d_lock)
__releases(dentry->d_inode->i_lock)
{
- dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
+ /*
+ * Inform try_to_ascend() that we are no longer attached to the
+ * dentry tree
+ */
+ dentry->d_flags |= DCACHE_DISCONNECTED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1010,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
/*
+ * This tries to ascend one level of parenthood, but
+ * we can race with renaming, so we need to re-check
+ * the parenthood after dropping the lock and check
+ * that the sequence number still matches.
+ */
+static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+{
+ struct dentry *new = old->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&old->d_lock);
+ spin_lock(&new->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (new != old->d_parent ||
+ (old->d_flags & DCACHE_DISCONNECTED) ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&new->d_lock);
+ new = NULL;
+ }
+ rcu_read_unlock();
+ return new;
+}
+
+
+/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
@@ -1064,24 +1099,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1179,24 +1200,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1521,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
}
EXPORT_SYMBOL(d_alloc_root);
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (list_empty(&inode->i_dentry))
+ return NULL;
+ alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+ __dget(alias);
+ return alias;
+}
+
+static struct dentry * d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+
+
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1550,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
if (IS_ERR(inode))
return ERR_CAST(inode);
- res = d_find_alias(inode);
+ res = d_find_any_alias(inode);
if (res)
goto out_iput;
@@ -1563,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
spin_lock(&inode->i_lock);
- res = __d_find_alias(inode, 0);
+ res = __d_find_any_alias(inode);
if (res) {
spin_unlock(&inode->i_lock);
dput(tmp);
@@ -1973,7 +2002,7 @@ out:
/**
* d_validate - verify dentry provided from insecure source (deprecated)
* @dentry: The dentry alleged to be valid child of @dparent
- * @parent: The parent dentry (known to be valid)
+ * @dparent: The parent dentry (known to be valid)
*
* An insecure source has sent us a dentry, here we verify it and dget() it.
* This is used by ncpfs in its readdir implementation.
@@ -2918,28 +2947,14 @@ resume:
spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
+ struct dentry *child = this_parent;
if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
this_parent->d_flags |= DCACHE_GENOCIDE;
this_parent->d_count--;
}
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9c64ae9..2d8c87b 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,15 +1468,13 @@ static void work_stop(void)
static int work_start(void)
{
- recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ recv_workqueue = create_singlethread_workqueue("dlm_recv");
if (!recv_workqueue) {
log_print("can't start dlm_recv");
return -ENOMEM;
}
- send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
if (!send_workqueue) {
log_print("can't start dlm_send");
destroy_workqueue(recv_workqueue);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 6fc4f31..534c1d4 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *lower_dentry;
struct vfsmount *lower_mnt;
- struct dentry *dentry_save;
- struct vfsmount *vfsmount_save;
+ struct dentry *dentry_save = NULL;
+ struct vfsmount *vfsmount_save = NULL;
int rc = 1;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
goto out;
- dentry_save = nd->path.dentry;
- vfsmount_save = nd->path.mnt;
- nd->path.dentry = lower_dentry;
- nd->path.mnt = lower_mnt;
+ if (nd) {
+ dentry_save = nd->path.dentry;
+ vfsmount_save = nd->path.mnt;
+ nd->path.dentry = lower_dentry;
+ nd->path.mnt = lower_mnt;
+ }
rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
- nd->path.dentry = dentry_save;
- nd->path.mnt = vfsmount_save;
+ if (nd) {
+ nd->path.dentry = dentry_save;
+ nd->path.mnt = vfsmount_save;
+ }
if (dentry->d_inode) {
struct inode *lower_inode =
ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index dbc84ed..e007534 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
u32 flags);
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct inode *ecryptfs_dir_inode,
- struct nameidata *ecryptfs_nd);
+ struct inode *ecryptfs_dir_inode);
int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
size_t *decrypted_name_size,
struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 81e10e6..7d1050e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
const struct file_operations ecryptfs_dir_fops = {
.readdir = ecryptfs_readdir,
+ .read = generic_read_dir,
.unlocked_ioctl = ecryptfs_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bd33f87..b592938 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
unsigned int flags_save;
int rc;
- dentry_save = nd->path.dentry;
- vfsmount_save = nd->path.mnt;
- flags_save = nd->flags;
- nd->path.dentry = lower_dentry;
- nd->path.mnt = lower_mnt;
- nd->flags &= ~LOOKUP_OPEN;
+ if (nd) {
+ dentry_save = nd->path.dentry;
+ vfsmount_save = nd->path.mnt;
+ flags_save = nd->flags;
+ nd->path.dentry = lower_dentry;
+ nd->path.mnt = lower_mnt;
+ nd->flags &= ~LOOKUP_OPEN;
+ }
rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
- nd->path.dentry = dentry_save;
- nd->path.mnt = vfsmount_save;
- nd->flags = flags_save;
+ if (nd) {
+ nd->path.dentry = dentry_save;
+ nd->path.mnt = vfsmount_save;
+ nd->flags = flags_save;
+ }
return rc;
}
@@ -241,8 +245,7 @@ out:
*/
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct inode *ecryptfs_dir_inode,
- struct nameidata *ecryptfs_nd)
+ struct inode *ecryptfs_dir_inode)
{
struct dentry *lower_dir_dentry;
struct vfsmount *lower_mnt;
@@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
goto out;
if (special_file(lower_inode->i_mode))
goto out;
- if (!ecryptfs_nd)
- goto out;
/* Released in this function */
page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
if (!page_virt) {
@@ -349,75 +350,6 @@ out:
}
/**
- * ecryptfs_new_lower_dentry
- * @name: The name of the new dentry.
- * @lower_dir_dentry: Parent directory of the new dentry.
- * @nd: nameidata from last lookup.
- *
- * Create a new dentry or get it from lower parent dir.
- */
-static struct dentry *
-ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
- struct nameidata *nd)
-{
- struct dentry *new_dentry;
- struct dentry *tmp;
- struct inode *lower_dir_inode;
-
- lower_dir_inode = lower_dir_dentry->d_inode;
-
- tmp = d_alloc(lower_dir_dentry, name);
- if (!tmp)
- return ERR_PTR(-ENOMEM);
-
- mutex_lock(&lower_dir_inode->i_mutex);
- new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
- mutex_unlock(&lower_dir_inode->i_mutex);
-
- if (!new_dentry)
- new_dentry = tmp;
- else
- dput(tmp);
-
- return new_dentry;
-}
-
-
-/**
- * ecryptfs_lookup_one_lower
- * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
- * @lower_dir_dentry: lower parent directory
- * @name: lower file name
- *
- * Get the lower dentry from vfs. If lower dentry does not exist yet,
- * create it.
- */
-static struct dentry *
-ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
- struct dentry *lower_dir_dentry, struct qstr *name)
-{
- struct nameidata nd;
- struct vfsmount *lower_mnt;
- int err;
-
- lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
- ecryptfs_dentry->d_parent));
- err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
- mntput(lower_mnt);
-
- if (!err) {
- /* we dont need the mount */
- mntput(nd.path.mnt);
- return nd.path.dentry;
- }
- if (err != -ENOENT)
- return ERR_PTR(err);
-
- /* create a new lower dentry */
- return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
-}
-
-/**
* ecryptfs_lookup
* @ecryptfs_dir_inode: The eCryptfs directory inode
* @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
size_t encrypted_and_encoded_name_size;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
struct dentry *lower_dir_dentry, *lower_dentry;
- struct qstr lower_name;
int rc = 0;
if ((ecryptfs_dentry->d_name.len == 1
@@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
goto out_d_drop;
}
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
- lower_name.name = ecryptfs_dentry->d_name.name;
- lower_name.len = ecryptfs_dentry->d_name.len;
- lower_name.hash = ecryptfs_dentry->d_name.hash;
- if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
- rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- lower_dir_dentry->d_inode, &lower_name);
- if (rc < 0)
- goto out_d_drop;
- }
- lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
- lower_dir_dentry, &lower_name);
+ mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+ lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
+ lower_dir_dentry,
+ ecryptfs_dentry->d_name.len);
+ mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
- ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
+ ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
encrypted_and_encoded_name);
goto out_d_drop;
@@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
"filename; rc = [%d]\n", __func__, rc);
goto out_d_drop;
}
- lower_name.name = encrypted_and_encoded_name;
- lower_name.len = encrypted_and_encoded_name_size;
- lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
- if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
- rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- lower_dir_dentry->d_inode, &lower_name);
- if (rc < 0)
- goto out_d_drop;
- }
- lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
- lower_dir_dentry, &lower_name);
+ mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+ lower_dentry = lookup_one_len(encrypted_and_encoded_name,
+ lower_dir_dentry,
+ encrypted_and_encoded_name_size);
+ mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
- ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
+ ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
encrypted_and_encoded_name);
goto out_d_drop;
}
lookup_and_interpose:
rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
- ecryptfs_dir_inode,
- ecryptfs_nd);
+ ecryptfs_dir_inode);
goto out;
out_d_drop:
d_drop(ecryptfs_dentry);
@@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
ecryptfs_dentry_to_lower(dentry), &lower_stat);
if (!rc) {
+ fsstack_copy_attr_all(dentry->d_inode,
+ ecryptfs_inode_to_lower(dentry->d_inode));
generic_fillattr(dentry->d_inode, stat);
stat->blocks = lower_stat.blocks;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e0194b3..d9a5917 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get);
* @ctx: [in] Pointer to eventfd context.
*
* The eventfd context reference must have been previously acquired either
- * with eventfd_ctx_get() or eventfd_ctx_fdget()).
+ * with eventfd_ctx_get() or eventfd_ctx_fdget().
*/
void eventfd_ctx_put(struct eventfd_ctx *ctx)
{
@@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
* eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
* @ctx: [in] Pointer to eventfd context.
* @wait: [in] Wait queue to be removed.
- * @cnt: [out] Pointer to the 64bit conter value.
+ * @cnt: [out] Pointer to the 64-bit counter value.
*
- * Returns zero if successful, or the following error codes:
+ * Returns %0 if successful, or the following error codes:
*
* -EAGAIN : The operation would have blocked.
*
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
* eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
* @ctx: [in] Pointer to eventfd context.
* @no_wait: [in] Different from zero if the operation should not block.
- * @cnt: [out] Pointer to the 64bit conter value.
+ * @cnt: [out] Pointer to the 64-bit counter value.
*
- * Returns zero if successful, or the following error codes:
+ * Returns %0 if successful, or the following error codes:
*
- * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
+ * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
* -ERESTARTSYS : A signal interrupted the wait operation.
*
* If @no_wait is zero, the function might sleep until the eventfd internal
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cc8a9b7..4a09af9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
* cleanup path and it is also acquired by eventpoll_release_file()
* if a file has been pushed inside an epoll set and it is then
* close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
+ * It is also acquired when inserting an epoll fd onto another epoll
+ * fd. We do this so that we walk the epoll tree and ensure that this
+ * insertion does not create a cycle of epoll file descriptors, which
+ * could lead to deadlock. We need a global mutex to prevent two
+ * simultaneous inserts (A into B and B into A) from racing and
+ * constructing a cycle without either insert observing that it is
+ * going to.
* It is possible to drop the "ep->mtx" and to use the global
* mutex "epmutex" (together with "ep->lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+/* Used to check for epoll file descriptor inclusion loops */
+static struct nested_calls poll_loop_ncalls;
+
/* Used for safe wake up implementation */
static struct nested_calls poll_safewake_ncalls;
@@ -1114,6 +1124,17 @@ static int ep_send_events(struct eventpoll *ep,
return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
}
+static inline struct timespec ep_set_mstimeout(long ms)
+{
+ struct timespec now, ts = {
+ .tv_sec = ms / MSEC_PER_SEC,
+ .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
+ };
+
+ ktime_get_ts(&now);
+ return timespec_add_safe(now, ts);
+}
+
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, long timeout)
{
@@ -1121,12 +1142,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
unsigned long flags;
long slack;
wait_queue_t wait;
- struct timespec end_time;
ktime_t expires, *to = NULL;
if (timeout > 0) {
- ktime_get_ts(&end_time);
- timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
+ struct timespec end_time = ep_set_mstimeout(timeout);
+
slack = select_estimate_accuracy(&end_time);
to = &expires;
*to = timespec_to_ktime(end_time);
@@ -1188,6 +1208,62 @@ retry:
return res;
}
+/**
+ * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
+ * API, to verify that adding an epoll file inside another
+ * epoll structure, does not violate the constraints, in
+ * terms of closed loops, or too deep chains (which can
+ * result in excessive stack usage).
+ *
+ * @priv: Pointer to the epoll file to be currently checked.
+ * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
+ * data structure pointer.
+ * @call_nests: Current dept of the @ep_call_nested() call stack.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+{
+ int error = 0;
+ struct file *file = priv;
+ struct eventpoll *ep = file->private_data;
+ struct rb_node *rbp;
+ struct epitem *epi;
+
+ mutex_lock(&ep->mtx);
+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ epi = rb_entry(rbp, struct epitem, rbn);
+ if (unlikely(is_file_epoll(epi->ffd.file))) {
+ error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, epi->ffd.file,
+ epi->ffd.file->private_data, current);
+ if (error != 0)
+ break;
+ }
+ }
+ mutex_unlock(&ep->mtx);
+
+ return error;
+}
+
+/**
+ * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
+ * another epoll file (represented by @ep) does not create
+ * closed loops or too deep chains.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @file: Pointer to the epoll file to be checked.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check(struct eventpoll *ep, struct file *file)
+{
+ return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, file, ep, current);
+}
+
/*
* Open an eventpoll file descriptor.
*/
@@ -1236,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
+ int did_lock_epmutex = 0;
struct file *file, *tfile;
struct eventpoll *ep;
struct epitem *epi;
@@ -1277,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
*/
ep = file->private_data;
+ /*
+ * When we insert an epoll file descriptor, inside another epoll file
+ * descriptor, there is the change of creating closed loops, which are
+ * better be handled here, than in more critical paths.
+ *
+ * We hold epmutex across the loop check and the insert in this case, in
+ * order to prevent two separate inserts from racing and each doing the
+ * insert "at the same time" such that ep_loop_check passes on both
+ * before either one does the insert, thereby creating a cycle.
+ */
+ if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+ mutex_lock(&epmutex);
+ did_lock_epmutex = 1;
+ error = -ELOOP;
+ if (ep_loop_check(ep, tfile) != 0)
+ goto error_tgt_fput;
+ }
+
+
mutex_lock(&ep->mtx);
/*
@@ -1312,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
+ if (unlikely(did_lock_epmutex))
+ mutex_unlock(&epmutex);
+
fput(tfile);
error_fput:
fput(file);
@@ -1431,6 +1530,12 @@ static int __init eventpoll_init(void)
EP_ITEM_COST;
BUG_ON(max_user_watches < 0);
+ /*
+ * Initialize the structure used to perform epoll file descriptor
+ * inclusion loops checks.
+ */
+ ep_nested_calls_init(&poll_loop_ncalls);
+
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/exec.c b/fs/exec.c
index c62efcb..ba99e1a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
struct file *file;
char *tmp = getname(library);
int error = PTR_ERR(tmp);
+ static const struct open_flags uselib_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
if (IS_ERR(tmp))
goto out;
- file = do_filp_open(AT_FDCWD, tmp,
- O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
- MAY_READ | MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
putname(tmp);
error = PTR_ERR(file);
if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
{
struct file *file;
int err;
+ static const struct open_flags open_exec_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
- file = do_filp_open(AT_FDCWD, name,
- O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
- MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
if (IS_ERR(file))
goto out;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 4268542..a755523 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
}
- inode->i_mapping->backing_dev_info = sb->s_bdi;
if (S_ISREG(inode->i_mode)) {
inode->i_op = &exofs_file_inode_operations;
inode->i_fop = &exofs_file_operations;
@@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
sbi = sb->s_fs_info;
- inode->i_mapping->backing_dev_info = sb->s_bdi;
sb->s_dirt = 1;
inode_init_owner(inode, dir, mode);
inode->i_ino = sbi->s_nextid++;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d..4d70db1 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
err = exofs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME;
if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= EXOFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = exofs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
exofs_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b68257..b05acb7 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
struct inode * inode = dentry->d_inode;
int len = *max_len;
int type = FILEID_INO32_GEN;
-
- if (len < 2 || (connectable && len < 4))
+
+ if (connectable && (len < 4)) {
+ *max_len = 4;
+ return 255;
+ } else if (len < 2) {
+ *max_len = 2;
return 255;
+ }
len = 2;
fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
/*
* Try to get any dentry for the given file handle from the filesystem.
*/
+ if (!nop || !nop->fh_to_dentry)
+ return ERR_PTR(-ESTALE);
result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
if (!result)
result = ERR_PTR(-ESTALE);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d834..adb9185 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= EXT2_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ext2_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(old_inode);
ext2_delete_entry (old_de, old_page);
- inode_dec_link_count(old_inode);
if (dir_de) {
if (old_dir != new_dir)
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71..561f692 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7aa767d..9cc19a1d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -754,7 +754,7 @@ static int ext3_release_dquot(struct dquot *dquot);
static int ext3_mark_dquot_dirty(struct dquot *dquot);
static int ext3_write_info(struct super_block *sb, int type);
static int ext3_quota_on(struct super_block *sb, int type, int format_id,
- char *path);
+ struct path *path);
static int ext3_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext3_qctl_operations;
sb->dq_op = &ext3_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
@@ -2877,27 +2878,20 @@ static int ext3_quota_on_mount(struct super_block *sb, int type)
* Standard function to be called on quota_on
*/
static int ext3_quota_on(struct super_block *sb, int type, int format_id,
- char *name)
+ struct path *path)
{
int err;
- struct path path;
if (!test_opt(sb, QUOTA))
return -EINVAL;
- err = kern_path(name, LOOKUP_FOLLOW, &path);
- if (err)
- return err;
-
/* Quotafile not on the same filesystem? */
- if (path.mnt->mnt_sb != sb) {
- path_put(&path);
+ if (path->mnt->mnt_sb != sb)
return -EXDEV;
- }
/* Journaling quota? */
if (EXT3_SB(sb)->s_qf_names[type]) {
/* Quotafile not of fs root? */
- if (path.dentry->d_parent != sb->s_root)
+ if (path->dentry->d_parent != sb->s_root)
ext3_msg(sb, KERN_WARNING,
"warning: Quota file not on filesystem root. "
"Journaled quota will not work.");
@@ -2907,7 +2901,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
* When we journal data on quota file, we have to flush journal to see
* all updates to the file when we bypass pagecache...
*/
- if (ext3_should_journal_data(path.dentry->d_inode)) {
+ if (ext3_should_journal_data(path->dentry->d_inode)) {
/*
* We don't need to lock updates but journal_flush() could
* otherwise be livelocked...
@@ -2915,15 +2909,11 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
journal_lock_updates(EXT3_SB(sb)->s_journal);
err = journal_flush(EXT3_SB(sb)->s_journal);
journal_unlock_updates(EXT3_SB(sb)->s_journal);
- if (err) {
- path_put(&path);
+ if (err)
return err;
- }
}
- err = dquot_quota_on_path(sb, type, format_id, &path);
- path_put(&path);
- return err;
+ return dquot_quota_on(sb, type, format_id, path);
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c8d97b..3aa0b72 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
+ atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock;
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+/* For ioend & aio unwritten conversion wait queues */
+#define EXT4_WQ_HASH_SZ 37
+#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
+ EXT4_WQ_HASH_SZ])
+#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
+ EXT4_WQ_HASH_SZ])
+extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
#endif /* __KERNEL__ */
#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63a7581..ccce8a7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* that this IO needs to convertion to written when IO is
* completed
*/
- if (io)
+ if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN;
- else
+ atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+ } else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* that we need to perform convertion when IO is done.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
- if (io)
+ if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN;
- else
+ atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+ } else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2e8322c..7b80d54 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
return 0;
}
+static void ext4_aiodio_wait(struct inode *inode)
+{
+ wait_queue_head_t *wq = ext4_ioend_wq(inode);
+
+ wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
+}
+
+/*
+ * This tests whether the IO in question is block-aligned or not.
+ * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
+ * are converted to written only after the IO is complete. Until they are
+ * mapped, these blocks appear as holes, so dio_zero_block() will assume that
+ * it needs to zero out portions of the start and/or end block. If 2 AIO
+ * threads are at work on the same unwritten block, they must be synchronized
+ * or one thread will zero the other's data, causing corruption.
+ */
+static int
+ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct super_block *sb = inode->i_sb;
+ int blockmask = sb->s_blocksize - 1;
+ size_t count = iov_length(iov, nr_segs);
+ loff_t final_size = pos + count;
+
+ if (pos >= inode->i_size)
+ return 0;
+
+ if ((pos & blockmask) || (final_size & blockmask))
+ return 1;
+
+ return 0;
+}
+
static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+ int unaligned_aio = 0;
+ int ret;
/*
* If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
sbi->s_bitmap_maxbytes - pos);
}
+ } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
+ !is_sync_kiocb(iocb))) {
+ unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
}
- return generic_file_aio_write(iocb, iov, nr_segs, pos);
+ /* Unaligned direct AIO must be serialized; see comment above */
+ if (unaligned_aio) {
+ static unsigned long unaligned_warn_time;
+
+ /* Warn about this once per day */
+ if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
+ ext4_msg(inode->i_sb, KERN_WARNING,
+ "Unaligned AIO/DIO on inode %ld by %s; "
+ "performance will be poor.",
+ inode->i_ino, current->comm);
+ mutex_lock(ext4_aio_mutex(inode));
+ ext4_aiodio_wait(inode);
+ }
+
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+ if (unaligned_aio)
+ mutex_unlock(ext4_aio_mutex(inode));
+
+ return ret;
}
static const struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b..d1fe09a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
/* We create slab caches for groupinfo data structures based on the
* superblock block size. There will be one per mounted filesystem for
* each unique s_blocksize_bits */
-#define NR_GRPINFO_CACHES \
- (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
+#define NR_GRPINFO_CACHES 8
static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
+ "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
+ "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
+ "ext4_groupinfo_64k", "ext4_groupinfo_128k"
+};
+
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
return -ENOMEM;
}
+static void ext4_groupinfo_destroy_slabs(void)
+{
+ int i;
+
+ for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+ if (ext4_groupinfo_caches[i])
+ kmem_cache_destroy(ext4_groupinfo_caches[i]);
+ ext4_groupinfo_caches[i] = NULL;
+ }
+}
+
+static int ext4_groupinfo_create_slab(size_t size)
+{
+ static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
+ int slab_size;
+ int blocksize_bits = order_base_2(size);
+ int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ struct kmem_cache *cachep;
+
+ if (cache_index >= NR_GRPINFO_CACHES)
+ return -EINVAL;
+
+ if (unlikely(cache_index < 0))
+ cache_index = 0;
+
+ mutex_lock(&ext4_grpinfo_slab_create_mutex);
+ if (ext4_groupinfo_caches[cache_index]) {
+ mutex_unlock(&ext4_grpinfo_slab_create_mutex);
+ return 0; /* Already created */
+ }
+
+ slab_size = offsetof(struct ext4_group_info,
+ bb_counters[blocksize_bits + 2]);
+
+ cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
+ slab_size, 0, SLAB_RECLAIM_ACCOUNT,
+ NULL);
+
+ mutex_unlock(&ext4_grpinfo_slab_create_mutex);
+ if (!cachep) {
+ printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
+ return -ENOMEM;
+ }
+
+ ext4_groupinfo_caches[cache_index] = cachep;
+
+ return 0;
+}
+
int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned offset;
unsigned max;
int ret;
- int cache_index;
- struct kmem_cache *cachep;
- char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
goto out;
}
- cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
- cachep = ext4_groupinfo_caches[cache_index];
- if (!cachep) {
- char name[32];
- int len = offsetof(struct ext4_group_info,
- bb_counters[sb->s_blocksize_bits + 2]);
-
- sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
- namep = kstrdup(name, GFP_KERNEL);
- if (!namep) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* Need to free the kmem_cache_name() when we
- * destroy the slab */
- cachep = kmem_cache_create(namep, len, 0,
- SLAB_RECLAIM_ACCOUNT, NULL);
- if (!cachep) {
- ret = -ENOMEM;
- goto out;
- }
- ext4_groupinfo_caches[cache_index] = cachep;
- }
+ ret = ext4_groupinfo_create_slab(sb->s_blocksize);
+ if (ret < 0)
+ goto out;
/* order 0 is regular bitmap */
sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
if (ret) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
- kfree(namep);
}
return ret;
}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
void ext4_exit_mballoc(void)
{
- int i;
/*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);
-
- for (i = 0; i < NR_GRPINFO_CACHES; i++) {
- struct kmem_cache *cachep = ext4_groupinfo_caches[i];
- if (cachep) {
- char *name = (char *)kmem_cache_name(cachep);
- kmem_cache_destroy(cachep);
- kfree(name);
- }
- }
+ ext4_groupinfo_destroy_slabs();
ext4_remove_debugfs_entry();
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390..e781b7e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcf..955cc30 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
static struct kmem_cache *io_page_cachep, *io_end_cachep;
-#define WQ_HASH_SZ 37
-#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
-static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
-
int __init ext4_init_pageio(void)
{
- int i;
-
io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL)
return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
kmem_cache_destroy(io_page_cachep);
return -ENOMEM;
}
- for (i = 0; i < WQ_HASH_SZ; i++)
- init_waitqueue_head(&ioend_wq[i]);
-
return 0;
}
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
void ext4_ioend_wait(struct inode *inode)
{
- wait_queue_head_t *wq = to_ioend_wq(inode);
+ wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
for (i = 0; i < io->num_io_pages; i++)
put_io_page(io->pages[i]);
io->num_io_pages = 0;
- wq = to_ioend_wq(io->inode);
+ wq = ext4_ioend_wq(io->inode);
if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
waitqueue_active(wq))
wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
+ wait_queue_head_t *wq;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
- io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ if (io->flag & EXT4_IO_END_UNWRITTEN) {
+ io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ /* Wake up anyone waiting on unwritten extent conversion */
+ wq = ext4_ioend_wq(io->inode);
+ if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
+ waitqueue_active(wq)) {
+ wake_up_all(wq);
+ }
+ }
+
return ret;
}
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
struct inode *inode;
unsigned long flags;
int i;
+ sector_t bi_sector = bio->bi_sector;
BUG_ON(!io_end);
bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
if (error)
SetPageError(page);
BUG_ON(!head);
- if (head->b_size == PAGE_CACHE_SIZE)
- clear_buffer_dirty(head);
- else {
+ if (head->b_size != PAGE_CACHE_SIZE) {
loff_t offset;
loff_t io_end_offset = io_end->offset + io_end->size;
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
if (error)
buffer_io_error(bh);
- clear_buffer_dirty(bh);
}
if (buffer_delay(bh))
partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
- bio->bi_sector >> (inode->i_blkbits - 9));
+ bi_sector >> (inode->i_blkbits - 9));
}
/* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
blocksize = 1 << inode->i_blkbits;
+ BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
set_page_writeback(page);
ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
for (bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
+
block_end = block_start + blocksize;
if (block_start >= len) {
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
}
+ clear_buffer_dirty(bh);
ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
if (ret) {
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cb10a06..203f9e4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
+static void ext4_clear_request_list(void);
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
atomic_set(&ei->i_ioend_count, 0);
+ atomic_set(&ei->i_aiodio_unwritten, 0);
return &ei->vfs_inode;
}
@@ -1161,7 +1163,7 @@ static int ext4_release_dquot(struct dquot *dquot);
static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- char *path);
+ struct path *path);
static int ext4_quota_off(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
mutex_unlock(&ext4_li_info->li_list_mtx);
}
+static struct task_struct *ext4_lazyinit_task;
+
/*
* This is the function where ext4lazyinit thread lives. It walks
* through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
if (time_before(jiffies, next_wakeup))
schedule();
finish_wait(&eli->li_wait_daemon, &wait);
+ if (kthread_should_stop()) {
+ ext4_clear_request_list();
+ goto exit_thread;
+ }
}
exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
wake_up(&eli->li_wait_task);
kfree(ext4_li_info);
+ ext4_lazyinit_task = NULL;
ext4_li_info = NULL;
mutex_unlock(&ext4_li_mtx);
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
static int ext4_run_lazyinit_thread(void)
{
- struct task_struct *t;
-
- t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
- if (IS_ERR(t)) {
- int err = PTR_ERR(t);
+ ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
+ ext4_li_info, "ext4lazyinit");
+ if (IS_ERR(ext4_lazyinit_task)) {
+ int err = PTR_ERR(ext4_lazyinit_task);
ext4_clear_request_list();
del_timer_sync(&ext4_li_info->li_timer);
kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
* If thread exited earlier
* there's nothing to be done.
*/
- if (!ext4_li_info)
+ if (!ext4_li_info || !ext4_lazyinit_task)
return;
- ext4_clear_request_list();
-
- while (ext4_li_info->li_task) {
- wake_up(&ext4_li_info->li_wait_daemon);
- wait_event(ext4_li_info->li_wait_task,
- ext4_li_info->li_task == NULL);
- }
+ kthread_stop(ext4_lazyinit_task);
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -3413,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext4_qctl_operations;
sb->dq_op = &ext4_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
@@ -3507,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
no_journal:
- EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
+ /*
+ * The maximum number of concurrent works can be high and
+ * concurrency isn't really necessary. Limit it to 1.
+ */
+ EXT4_SB(sb)->dio_unwritten_wq =
+ alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
goto failed_mount_wq;
@@ -4558,27 +4567,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
* Standard function to be called on quota_on
*/
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- char *name)
+ struct path *path)
{
int err;
- struct path path;
if (!test_opt(sb, QUOTA))
return -EINVAL;
- err = kern_path(name, LOOKUP_FOLLOW, &path);
- if (err)
- return err;
-
/* Quotafile not on the same filesystem? */
- if (path.mnt->mnt_sb != sb) {
- path_put(&path);
+ if (path->mnt->mnt_sb != sb)
return -EXDEV;
- }
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
- if (path.dentry->d_parent != sb->s_root)
+ if (path->dentry->d_parent != sb->s_root)
ext4_msg(sb, KERN_WARNING,
"Quota file not on filesystem root. "
"Journaled quota will not work");
@@ -4589,7 +4591,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* all updates to the file when we bypass pagecache...
*/
if (EXT4_SB(sb)->s_journal &&
- ext4_should_journal_data(path.dentry->d_inode)) {
+ ext4_should_journal_data(path->dentry->d_inode)) {
/*
* We don't need to lock updates but journal_flush() could
* otherwise be livelocked...
@@ -4597,15 +4599,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- if (err) {
- path_put(&path);
+ if (err)
return err;
- }
}
- err = dquot_quota_on_path(sb, type, format_id, &path);
- path_put(&path);
- return err;
+ return dquot_quota_on(sb, type, format_id, path);
}
static int ext4_quota_off(struct super_block *sb, int type)
@@ -4779,7 +4777,7 @@ static struct file_system_type ext4_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
-int __init ext4_init_feat_adverts(void)
+static int __init ext4_init_feat_adverts(void)
{
struct ext4_features *ef;
int ret = -ENOMEM;
@@ -4803,23 +4801,44 @@ out:
return ret;
}
+static void ext4_exit_feat_adverts(void)
+{
+ kobject_put(&ext4_feat->f_kobj);
+ wait_for_completion(&ext4_feat->f_kobj_unregister);
+ kfree(ext4_feat);
+}
+
+/* Shared across all ext4 file systems */
+wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
static int __init ext4_init_fs(void)
{
- int err;
+ int i, err;
ext4_check_flag_values();
+
+ for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
+ mutex_init(&ext4__aio_mutex[i]);
+ init_waitqueue_head(&ext4__ioend_wq[i]);
+ }
+
err = ext4_init_pageio();
if (err)
return err;
err = ext4_init_system_zone();
if (err)
- goto out5;
+ goto out7;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
- goto out4;
+ goto out6;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
+ if (!ext4_proc_root)
+ goto out5;
err = ext4_init_feat_adverts();
+ if (err)
+ goto out4;
err = ext4_init_mballoc();
if (err)
@@ -4849,12 +4868,14 @@ out1:
out2:
ext4_exit_mballoc();
out3:
- kfree(ext4_feat);
+ ext4_exit_feat_adverts();
+out4:
remove_proc_entry("fs/ext4", NULL);
+out5:
kset_unregister(ext4_kset);
-out4:
+out6:
ext4_exit_system_zone();
-out5:
+out7:
ext4_exit_pageio();
return err;
}
@@ -4868,6 +4889,7 @@ static void __exit ext4_exit_fs(void)
destroy_inodecache();
ext4_exit_xattr();
ext4_exit_mballoc();
+ ext4_exit_feat_adverts();
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
ext4_exit_system_zone();
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe..0e277ec 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
struct inode *inode = de->d_inode;
u32 ipos_h, ipos_m, ipos_l;
- if (len < 5)
+ if (len < 5) {
+ *lenp = 5;
return 255; /* no room */
+ }
ipos_h = MSDOS_I(inode)->i_pos >> 8;
ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752..adae3fb 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ecc8b39..6c82e5b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
SYSCALL_DEFINE1(dup, unsigned int, fildes)
{
int ret = -EBADF;
- struct file *file = fget(fildes);
+ struct file *file = fget_raw(fildes);
if (file) {
ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
return err;
}
+static int check_fcntl_cmd(unsigned cmd)
+{
+ switch (cmd) {
+ case F_DUPFD:
+ case F_DUPFD_CLOEXEC:
+ case F_GETFD:
+ case F_SETFD:
+ case F_GETFL:
+ return 1;
+ }
+ return 0;
+}
+
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
struct file *filp;
long err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
long err;
err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- FMODE_EXEC
+ __FMODE_EXEC | O_PATH
));
fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 0000000..bf93ad2
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/exportfs.h>
+#include <linux/fs_struct.h>
+#include <linux/fsnotify.h>
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static long do_sys_name_to_handle(struct path *path,
+ struct file_handle __user *ufh,
+ int __user *mnt_id)
+{
+ long retval;
+ struct file_handle f_handle;
+ int handle_dwords, handle_bytes;
+ struct file_handle *handle = NULL;
+
+ /*
+ * We need t make sure wether the file system
+ * support decoding of the file handle
+ */
+ if (!path->mnt->mnt_sb->s_export_op ||
+ !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
+ return -EFAULT;
+
+ if (f_handle.handle_bytes > MAX_HANDLE_SZ)
+ return -EINVAL;
+
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ /* convert handle size to multiple of sizeof(u32) */
+ handle_dwords = f_handle.handle_bytes >> 2;
+
+ /* we ask for a non connected handle */
+ retval = exportfs_encode_fh(path->dentry,
+ (struct fid *)handle->f_handle,
+ &handle_dwords, 0);
+ handle->handle_type = retval;
+ /* convert handle size to bytes */
+ handle_bytes = handle_dwords * sizeof(u32);
+ handle->handle_bytes = handle_bytes;
+ if ((handle->handle_bytes > f_handle.handle_bytes) ||
+ (retval == 255) || (retval == -ENOSPC)) {
+ /* As per old exportfs_encode_fh documentation
+ * we could return ENOSPC to indicate overflow
+ * But file system returned 255 always. So handle
+ * both the values
+ */
+ /*
+ * set the handle size to zero so we copy only
+ * non variable part of the file_handle
+ */
+ handle_bytes = 0;
+ retval = -EOVERFLOW;
+ } else
+ retval = 0;
+ /* copy the mount id */
+ if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
+ copy_to_user(ufh, handle,
+ sizeof(struct file_handle) + handle_bytes))
+ retval = -EFAULT;
+ kfree(handle);
+ return retval;
+}
+
+/**
+ * sys_name_to_handle_at: convert name to handle
+ * @dfd: directory relative to which name is interpreted if not absolute
+ * @name: name that should be converted to handle.
+ * @handle: resulting file handle
+ * @mnt_id: mount id of the file system containing the file
+ * @flag: flag value to indicate whether to follow symlink or not
+ *
+ * @handle->handle_size indicate the space available to store the
+ * variable part of the file handle in bytes. If there is not
+ * enough space, the field is updated to return the minimum
+ * value required.
+ */
+SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
+ struct file_handle __user *, handle, int __user *, mnt_id,
+ int, flag)
+{
+ struct path path;
+ int lookup_flags;
+ int err;
+
+ if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
+ return -EINVAL;
+
+ lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ err = user_path_at(dfd, name, lookup_flags, &path);
+ if (!err) {
+ err = do_sys_name_to_handle(&path, handle, mnt_id);
+ path_put(&path);
+ }
+ return err;
+}
+
+static struct vfsmount *get_vfsmount_from_fd(int fd)
+{
+ struct path path;
+
+ if (fd == AT_FDCWD) {
+ struct fs_struct *fs = current->fs;
+ spin_lock(&fs->lock);
+ path = fs->pwd;
+ mntget(path.mnt);
+ spin_unlock(&fs->lock);
+ } else {
+ int fput_needed;
+ struct file *file = fget_light(fd, &fput_needed);
+ if (!file)
+ return ERR_PTR(-EBADF);
+ path = file->f_path;
+ mntget(path.mnt);
+ fput_light(file, fput_needed);
+ }
+ return path.mnt;
+}
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+ return 1;
+}
+
+static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
+ struct path *path)
+{
+ int retval = 0;
+ int handle_dwords;
+
+ path->mnt = get_vfsmount_from_fd(mountdirfd);
+ if (IS_ERR(path->mnt)) {
+ retval = PTR_ERR(path->mnt);
+ goto out_err;
+ }
+ /* change the handle size to multiple of sizeof(u32) */
+ handle_dwords = handle->handle_bytes >> 2;
+ path->dentry = exportfs_decode_fh(path->mnt,
+ (struct fid *)handle->f_handle,
+ handle_dwords, handle->handle_type,
+ vfs_dentry_acceptable, NULL);
+ if (IS_ERR(path->dentry)) {
+ retval = PTR_ERR(path->dentry);
+ goto out_mnt;
+ }
+ return 0;
+out_mnt:
+ mntput(path->mnt);
+out_err:
+ return retval;
+}
+
+static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+ struct path *path)
+{
+ int retval = 0;
+ struct file_handle f_handle;
+ struct file_handle *handle = NULL;
+
+ /*
+ * With handle we don't look at the execute bit on the
+ * the directory. Ideally we would like CAP_DAC_SEARCH.
+ * But we don't have that
+ */
+ if (!capable(CAP_DAC_READ_SEARCH)) {
+ retval = -EPERM;
+ goto out_err;
+ }
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
+ retval = -EFAULT;
+ goto out_err;
+ }
+ if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
+ (f_handle.handle_bytes == 0)) {
+ retval = -EINVAL;
+ goto out_err;
+ }
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle) {
+ retval = -ENOMEM;
+ goto out_err;
+ }
+ /* copy the full handle */
+ if (copy_from_user(handle, ufh,
+ sizeof(struct file_handle) +
+ f_handle.handle_bytes)) {
+ retval = -EFAULT;
+ goto out_handle;
+ }
+
+ retval = do_handle_to_path(mountdirfd, handle, path);
+
+out_handle:
+ kfree(handle);
+out_err:
+ return retval;
+}
+
+long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag)
+{
+ long retval = 0;
+ struct path path;
+ struct file *file;
+ int fd;
+
+ retval = handle_to_path(mountdirfd, ufh, &path);
+ if (retval)
+ return retval;
+
+ fd = get_unused_fd_flags(open_flag);
+ if (fd < 0) {
+ path_put(&path);
+ return fd;
+ }
+ file = file_open_root(path.dentry, path.mnt, "", open_flag);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ retval = PTR_ERR(file);
+ } else {
+ retval = fd;
+ fsnotify_open(file);
+ fd_install(fd, file);
+ }
+ path_put(&path);
+ return retval;
+}
+
+/**
+ * sys_open_by_handle_at: Open the file handle
+ * @mountdirfd: directory file descriptor
+ * @handle: file handle to be opened
+ * @flag: open flags.
+ *
+ * @mountdirfd indicate the directory file descriptor
+ * of the mount point. file handle is decoded relative
+ * to the vfsmount pointed by the @mountdirfd. @flags
+ * value is same as the open(2) flags.
+ */
+SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
+ struct file_handle __user *, handle,
+ int, flags)
+{
+ long ret;
+
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+
+ ret = do_handle_open(mountdirfd, handle, flags);
+ return ret;
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index c3e89ad..74a9544 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -125,13 +125,13 @@ struct file *get_empty_filp(void)
goto fail;
percpu_counter_inc(&nr_files);
+ f->f_cred = get_cred(cred);
if (security_file_alloc(f))
goto fail_sec;
INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
- f->f_cred = get_cred(cred);
spin_lock_init(&f->f_lock);
eventpoll_init_file(f);
/* f->f_version: 0 */
@@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- if (!atomic_long_inc_not_zero(&file->f_count)) {
- /* File object ref couldn't be taken */
- rcu_read_unlock();
- return NULL;
- }
+ /* File object ref couldn't be taken */
+ if (file->f_mode & FMODE_PATH ||
+ !atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
}
rcu_read_unlock();
@@ -289,6 +288,25 @@ struct file *fget(unsigned int fd)
EXPORT_SYMBOL(fget);
+struct file *fget_raw(unsigned int fd)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ /* File object ref couldn't be taken */
+ if (!atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ }
+ rcu_read_unlock();
+
+ return file;
+}
+
+EXPORT_SYMBOL(fget_raw);
+
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
*
@@ -313,6 +331,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
*fput_needed = 0;
if (atomic_read(&files->count) == 1) {
file = fcheck_files(files, fd);
+ if (file && (file->f_mode & FMODE_PATH))
+ file = NULL;
+ } else {
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ if (!(file->f_mode & FMODE_PATH) &&
+ atomic_long_inc_not_zero(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
+ }
+ rcu_read_unlock();
+ }
+
+ return file;
+}
+
+struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ *fput_needed = 0;
+ if (atomic_read(&files->count) == 1) {
+ file = fcheck_files(files, fd);
} else {
rcu_read_lock();
file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed844..8bd0ef9 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
{
struct inode *inode;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
if (err)
return err;
- if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
- return 0;
+ if (attr->ia_valid & ATTR_OPEN) {
+ if (fc->atomic_o_trunc)
+ return 0;
+ file = NULL;
+ }
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc..9e0832d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
+static void fuse_release_async(struct work_struct *work)
+{
+ struct fuse_req *req;
+ struct fuse_conn *fc;
+ struct path path;
+
+ req = container_of(work, struct fuse_req, misc.release.work);
+ path = req->misc.release.path;
+ fc = get_fuse_conn(path.dentry->d_inode);
+
+ fuse_put_request(fc, req);
+ path_put(&path);
+}
+
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- path_put(&req->misc.release.path);
+ if (fc->destroy_req) {
+ /*
+ * If this is a fuseblk mount, then it's possible that
+ * releasing the path will result in releasing the
+ * super block and sending the DESTROY request. If
+ * the server is single threaded, this would hang.
+ * For this reason do the path_put() in a separate
+ * thread.
+ */
+ atomic_inc(&req->count);
+ INIT_WORK(&req->misc.release.work, fuse_release_async);
+ schedule_work(&req->misc.release.work);
+ } else {
+ path_put(&req->misc.release.path);
+ }
}
-static void fuse_file_put(struct fuse_file *ff)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
{
if (atomic_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- req->end = fuse_release_end;
- fuse_request_send_background(ff->fc, req);
+ if (sync) {
+ fuse_request_send(ff->fc, req);
+ path_put(&req->misc.release.path);
+ fuse_put_request(ff->fc, req);
+ } else {
+ req->end = fuse_release_end;
+ fuse_request_send_background(ff->fc, req);
+ }
kfree(ff);
}
}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
* Normally this will send the RELEASE request, however if
* some asynchronous READ or WRITE requests are outstanding,
* the sending will be delayed.
+ *
+ * Make the release synchronous if this is a fuseblk mount,
+ * synchronous RELEASE is allowed (and desirable) in this case
+ * because the server can be trusted not to screw up.
*/
- fuse_file_put(ff);
+ fuse_file_put(ff, ff->fc->destroy_req != NULL);
}
static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
page_cache_release(page);
}
if (req->ff)
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
__free_page(req->pages[0]);
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a..d428694 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/poll.h>
+#include <linux/workqueue.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
struct {
- struct fuse_release_in in;
+ union {
+ struct fuse_release_in in;
+ struct work_struct work;
+ };
struct path path;
} release;
struct fuse_init_in init_in;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68c..051b1a0 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
u64 nodeid;
u32 generation;
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
nodeid = get_fuse_inode(inode)->nodeid;
generation = inode->i_generation;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a45633..0da8da2 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
int error;
int had_lock = 0;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8..b5a5e60 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
struct super_block *sb = inode->i_sb;
struct gfs2_inode *ip = GFS2_I(inode);
- if (*len < GFS2_SMALL_FH_SIZE ||
- (connectable && *len < GFS2_LARGE_FH_SIZE))
+ if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
+ *len = GFS2_LARGE_FH_SIZE;
return 255;
+ } else if (*len < GFS2_SMALL_FH_SIZE) {
+ *len = GFS2_SMALL_FH_SIZE;
+ return 255;
+ }
fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 85044b4..e243131 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1692,11 +1692,11 @@ int __init gfs2_glock_init(void)
}
glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ WQ_HIGHPRI | WQ_FREEZABLE, 0);
if (IS_ERR(glock_workqueue))
return PTR_ERR(glock_workqueue);
gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
- WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+ WQ_MEM_RECLAIM | WQ_FREEZABLE,
0);
if (IS_ERR(gfs2_delete_workqueue)) {
destroy_workqueue(glock_workqueue);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index d850004..888a5f5 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -61,14 +61,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
struct address_space *mapping = (struct address_space *)(gl + 1);
gfs2_init_glock_once(gl);
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ address_space_init_once(mapping);
}
/**
@@ -146,7 +139,7 @@ static int __init init_gfs2_fs(void)
error = -ENOMEM;
gfs_recovery_wq = alloc_workqueue("gfs_recovery",
- WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0);
+ WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
if (!gfs_recovery_wq)
goto fail_wq;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aa..b4d70b1 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
/*
- * hfs_unlink()
+ * hfs_remove()
*
- * This is the unlink() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * file, given the inode for the parent directory and the name
- * (and its length) of the existing file.
- */
-static int hfs_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode;
- int res;
-
- inode = dentry->d_inode;
- res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
- if (res)
- return res;
-
- drop_nlink(inode);
- hfs_delete_inode(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
-
- return res;
-}
-
-/*
- * hfs_rmdir()
+ * This serves as both unlink() and rmdir() in the inode_operations
+ * structure for regular HFS directories. The purpose is to delete
+ * an existing child, given the inode for the parent directory and
+ * the name (and its length) of the existing directory.
*
- * This is the rmdir() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * directory, given the inode for the parent directory and the name
- * (and its length) of the existing directory.
+ * HFS does not have hardlinks, so both rmdir and unlink set the
+ * link count to 0. The only difference is the emptiness check.
*/
-static int hfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int hfs_remove(struct inode *dir, struct dentry *dentry)
{
- struct inode *inode;
+ struct inode *inode = dentry->d_inode;
int res;
- inode = dentry->d_inode;
- if (inode->i_size != 2)
+ if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
return -ENOTEMPTY;
res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* Unlink destination if it already exists */
if (new_dentry->d_inode) {
- res = hfs_unlink(new_dir, new_dentry);
+ res = hfs_remove(new_dir, new_dentry);
if (res)
return res;
}
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
const struct inode_operations hfs_dir_inode_operations = {
.create = hfs_create,
.lookup = hfs_lookup,
- .unlink = hfs_unlink,
+ .unlink = hfs_remove,
.mkdir = hfs_mkdir,
- .rmdir = hfs_rmdir,
+ .rmdir = hfs_remove,
.rename = hfs_rename,
.setattr = hfs_inode_setattr,
};
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 52a0bca..b1991a2 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode)
u32 start, len, goal;
int res;
- if (sbi->total_blocks - sbi->free_blocks + 8 >
- sbi->alloc_file->i_size * 8) {
+ if (sbi->alloc_file->i_size * 8 <
+ sbi->total_blocks - sbi->free_blocks + 8) {
/* extend alloc file */
printk(KERN_ERR "hfs: extend alloc file! "
"(%llu,%u,%u)\n",
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index d66ad11..40ad88c 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb,
res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
data, READ);
if (res)
- return res;
+ goto out;
switch (be16_to_cpu(*((__be16 *)data))) {
case HFS_OLD_PMAP_MAGIC:
@@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb,
res = -ENOENT;
break;
}
-
+out:
kfree(data);
return res;
}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a3b479..b49b555 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root, *inode;
struct qstr str;
struct nls_table *nls = NULL;
- int err = -EINVAL;
+ int err;
+ err = -EINVAL;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- return -ENOMEM;
+ goto out;
sb->s_fs_info = sbi;
mutex_init(&sbi->alloc_mutex);
mutex_init(&sbi->vh_mutex);
hfsplus_fill_defaults(sbi);
+
+ err = -EINVAL;
if (!hfsplus_parse_options(data, sbi)) {
printk(KERN_ERR "hfs: unable to parse mount options\n");
- err = -EINVAL;
- goto cleanup;
+ goto out_unload_nls;
}
/* temporarily use utf8 to correctly find the hidden dir below */
@@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
sbi->nls = load_nls("utf8");
if (!sbi->nls) {
printk(KERN_ERR "hfs: unable to load nls for utf8\n");
- err = -EINVAL;
- goto cleanup;
+ goto out_unload_nls;
}
/* Grab the volume header */
if (hfsplus_read_wrapper(sb)) {
if (!silent)
printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n");
- err = -EINVAL;
- goto cleanup;
+ goto out_unload_nls;
}
vhdr = sbi->s_vhdr;
@@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION ||
be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) {
printk(KERN_ERR "hfs: wrong filesystem version\n");
- goto cleanup;
+ goto out_free_vhdr;
}
sbi->total_blocks = be32_to_cpu(vhdr->total_blocks);
sbi->free_blocks = be32_to_cpu(vhdr->free_blocks);
@@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
if (!sbi->ext_tree) {
printk(KERN_ERR "hfs: failed to load extents file\n");
- goto cleanup;
+ goto out_free_vhdr;
}
sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
if (!sbi->cat_tree) {
printk(KERN_ERR "hfs: failed to load catalog file\n");
- goto cleanup;
+ goto out_close_ext_tree;
}
inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
if (IS_ERR(inode)) {
printk(KERN_ERR "hfs: failed to load allocation file\n");
err = PTR_ERR(inode);
- goto cleanup;
+ goto out_close_cat_tree;
}
sbi->alloc_file = inode;
@@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
if (IS_ERR(root)) {
printk(KERN_ERR "hfs: failed to load root directory\n");
err = PTR_ERR(root);
- goto cleanup;
- }
- sb->s_d_op = &hfsplus_dentry_operations;
- sb->s_root = d_alloc_root(root);
- if (!sb->s_root) {
- iput(root);
- err = -ENOMEM;
- goto cleanup;
+ goto out_put_alloc_file;
}
str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
@@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
- goto cleanup;
+ goto out_put_root;
inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
- goto cleanup;
+ goto out_put_root;
}
sbi->hidden_dir = inode;
} else
hfs_find_exit(&fd);
- if (sb->s_flags & MS_RDONLY)
- goto out;
+ if (!(sb->s_flags & MS_RDONLY)) {
+ /*
+ * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused
+ * all three are registered with Apple for our use
+ */
+ vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
+ vhdr->modify_date = hfsp_now2mt();
+ be32_add_cpu(&vhdr->write_count, 1);
+ vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
+ vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
+ hfsplus_sync_fs(sb, 1);
- /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused
- * all three are registered with Apple for our use
- */
- vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
- vhdr->modify_date = hfsp_now2mt();
- be32_add_cpu(&vhdr->write_count, 1);
- vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
- vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
- hfsplus_sync_fs(sb, 1);
-
- if (!sbi->hidden_dir) {
- mutex_lock(&sbi->vh_mutex);
- sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
- hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
- &str, sbi->hidden_dir);
- mutex_unlock(&sbi->vh_mutex);
-
- hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY);
+ if (!sbi->hidden_dir) {
+ mutex_lock(&sbi->vh_mutex);
+ sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
+ hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str,
+ sbi->hidden_dir);
+ mutex_unlock(&sbi->vh_mutex);
+
+ hfsplus_mark_inode_dirty(sbi->hidden_dir,
+ HFSPLUS_I_CAT_DIRTY);
+ }
}
-out:
+
+ sb->s_d_op = &hfsplus_dentry_operations;
+ sb->s_root = d_alloc_root(root);
+ if (!sb->s_root) {
+ err = -ENOMEM;
+ goto out_put_hidden_dir;
+ }
+
unload_nls(sbi->nls);
sbi->nls = nls;
return 0;
-cleanup:
- hfsplus_put_super(sb);
+out_put_hidden_dir:
+ iput(sbi->hidden_dir);
+out_put_root:
+ iput(sbi->alloc_file);
+out_put_alloc_file:
+ iput(sbi->alloc_file);
+out_close_cat_tree:
+ hfs_btree_close(sbi->cat_tree);
+out_close_ext_tree:
+ hfs_btree_close(sbi->ext_tree);
+out_free_vhdr:
+ kfree(sbi->s_vhdr);
+ kfree(sbi->s_backup_vhdr);
+out_unload_nls:
+ unload_nls(sbi->nls);
unload_nls(nls);
+ kfree(sbi);
+out:
return err;
}
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 1962317..3031d81 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -167,7 +167,7 @@ reread:
break;
case cpu_to_be16(HFSP_WRAP_MAGIC):
if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
- goto out;
+ goto out_free_backup_vhdr;
wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
part_size = wd.embed_count * wd.ablk_size;
@@ -179,7 +179,7 @@ reread:
* (should do this only for cdrom/loop though)
*/
if (hfs_part_find(sb, &part_start, &part_size))
- goto out;
+ goto out_free_backup_vhdr;
goto reread;
}
diff --git a/fs/inode.c b/fs/inode.c
index da85e56..9910c03 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly;
DEFINE_SPINLOCK(inode_lock);
/*
- * iprune_sem provides exclusion between the kswapd or try_to_free_pages
- * icache shrinking path, and the umount path. Without this exclusion,
- * by the time prune_icache calls iput for the inode whose pages it has
- * been invalidating, or by the time it calls clear_inode & destroy_inode
- * from its final dispose_list, the struct super_block they refer to
- * (for inode->i_sb->s_op) may already have been freed and reused.
+ * iprune_sem provides exclusion between the icache shrinking and the
+ * umount path.
*
- * We make this an rwsem because the fastpath is icache shrinking. In
- * some cases a filesystem may be doing a significant amount of work in
- * its inode reclaim code, so this should improve parallelism.
+ * We don't actually need it to protect anything in the umount path,
+ * but only need to cycle through it to make sure any inode that
+ * prune_icache took off the LRU list has been fully torn down by the
+ * time we are past evict_inodes.
*/
static DECLARE_RWSEM(iprune_sem);
@@ -295,6 +292,20 @@ static void destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, i_callback);
}
+void address_space_init_once(struct address_space *mapping)
+{
+ memset(mapping, 0, sizeof(*mapping));
+ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+ spin_lock_init(&mapping->tree_lock);
+ spin_lock_init(&mapping->i_mmap_lock);
+ INIT_LIST_HEAD(&mapping->private_list);
+ spin_lock_init(&mapping->private_lock);
+ INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+ INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
+
/*
* These are initializations that only need to be done
* once, because the fields are idempotent across use
@@ -308,13 +319,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
- INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
- spin_lock_init(&inode->i_data.tree_lock);
- spin_lock_init(&inode->i_data.i_mmap_lock);
- INIT_LIST_HEAD(&inode->i_data.private_list);
- spin_lock_init(&inode->i_data.private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
- INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
+ address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -508,17 +513,12 @@ void evict_inodes(struct super_block *sb)
struct inode *inode, *next;
LIST_HEAD(dispose);
- down_write(&iprune_sem);
-
spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (atomic_read(&inode->i_count))
continue;
-
- if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
- WARN_ON(1);
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
continue;
- }
inode->i_state |= I_FREEING;
@@ -534,28 +534,40 @@ void evict_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&dispose);
+
+ /*
+ * Cycle through iprune_sem to make sure any inode that prune_icache
+ * moved off the list before we took the lock has been fully torn
+ * down.
+ */
+ down_write(&iprune_sem);
up_write(&iprune_sem);
}
/**
* invalidate_inodes - attempt to free all inodes on a superblock
* @sb: superblock to operate on
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Attempts to free all inodes for a given superblock. If there were any
* busy inodes return a non-zero value, else zero.
+ * If @kill_dirty is set, discard dirty inodes too, otherwise treat
+ * them as busy.
*/
-int invalidate_inodes(struct super_block *sb)
+int invalidate_inodes(struct super_block *sb, bool kill_dirty)
{
int busy = 0;
struct inode *inode, *next;
LIST_HEAD(dispose);
- down_write(&iprune_sem);
-
spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
continue;
+ if (inode->i_state & I_DIRTY && !kill_dirty) {
+ busy = 1;
+ continue;
+ }
if (atomic_read(&inode->i_count)) {
busy = 1;
continue;
@@ -575,7 +587,6 @@ int invalidate_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&dispose);
- up_write(&iprune_sem);
return busy;
}
diff --git a/fs/internal.h b/fs/internal.h
index 0663568..f3d15de 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,10 +106,23 @@ extern void put_super(struct super_block *sb);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+struct open_flags {
+ int open_flag;
+ int mode;
+ int acc_mode;
+ int intent;
+};
+extern struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int lookup_flags);
+extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
+ const char *, const struct open_flags *, int lookup_flags);
+
+extern long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag);
/*
* inode.c
*/
extern int get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index a59635e..1eebeb7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode,
len = isize;
}
+ /*
+ * Some filesystems can't deal with being asked to map less than
+ * blocksize, so make sure our len is at least block length.
+ */
+ if (logical_to_blk(inode, len) == 0)
+ len = blk_to_logical(inode, 1);
+
start_blk = logical_to_blk(inode, start);
last_blk = logical_to_blk(inode, start + len - 1);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb..dd4687f 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
* offset of the inode and the upper 16 bits of fh32[1] to
* hold the offset of the parent.
*/
-
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *max_len = 5;
+ return 255;
+ } else if (len < 3) {
+ *max_len = 3;
return 255;
+ }
len = 3;
fh32[0] = ei->i_iget5_block;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e46869..97e7346 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
}
/*
- * Called under j_state_lock. Returns true if a transaction commit was started.
+ * Called with j_state_lock locked for writing.
+ * Returns true if a transaction commit was started.
*/
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
{
transaction_t *transaction = NULL;
tid_t tid;
+ int need_to_start = 0;
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction;
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ if (!tid_geq(journal->j_commit_request, transaction->t_tid))
+ need_to_start = 1;
} else if (journal->j_committing_transaction)
transaction = journal->j_committing_transaction;
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
tid = transaction->t_tid;
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
return 1;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd..1d11910 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
static int start_this_handle(journal_t *journal, handle_t *handle,
int gfp_mask)
{
- transaction_t *transaction;
- int needed;
- int nblocks = handle->h_buffer_credits;
- transaction_t *new_transaction = NULL;
+ transaction_t *transaction, *new_transaction = NULL;
+ tid_t tid;
+ int needed, need_to_start;
+ int nblocks = handle->h_buffer_credits;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
atomic_sub(nblocks, &transaction->t_outstanding_credits);
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ tid = transaction->t_tid;
+ need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat;
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
- int ret;
+ tid_t tid;
+ int need_to_start, ret;
/* If we've had an abort of any type, don't even think about
* actually doing the restart! */
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "restarting handle %p\n", handle);
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ tid = transaction->t_tid;
+ need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead85..3f04a180 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
if (ip->i_nlink == JFS_LINK_MAX)
return -EMLINK;
- if (ip->i_nlink == 0)
- return -ENOENT;
-
dquot_initialize(dir);
tid = txBegin(ip->i_sb, 0);
@@ -1600,7 +1597,7 @@ out:
static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 5f1bcb2..b7c99bf 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -520,7 +520,7 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
struct nsm_handle *nsm,
const struct nlm_reboot *info)
{
- struct nlm_host *host = NULL;
+ struct nlm_host *host;
struct hlist_head *chain;
struct hlist_node *pos;
@@ -532,12 +532,13 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
host->h_state++;
nlm_get_host(host);
- goto out;
+ mutex_unlock(&nlm_host_mutex);
+ return host;
}
}
-out:
+
mutex_unlock(&nlm_host_mutex);
- return host;
+ return NULL;
}
/**
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337d..6e6777f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
new_de = minix_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
minix_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= info->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = minix_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
minix_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24..b912b7a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
return retval;
}
-char * getname(const char __user * filename)
+static char *getname_flags(const char __user * filename, int flags)
{
char *tmp, *result;
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
result = tmp;
if (retval < 0) {
- __putname(tmp);
- result = ERR_PTR(retval);
+ if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
+ __putname(tmp);
+ result = ERR_PTR(retval);
+ }
}
}
audit_getname(result);
return result;
}
+char *getname(const char __user * filename)
+{
+ return getname_flags(filename, 0);
+}
+
#ifdef CONFIG_AUDITSYSCALL
void putname(const char *name)
{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
{
struct fs_struct *fs = current->fs;
struct dentry *dentry = nd->path.dentry;
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
goto err;
BUG_ON(nd->inode != dentry->d_inode);
spin_unlock(&dentry->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
err:
spin_unlock(&dentry->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -454,17 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
-
- /*
- * It can be possible to revalidate the dentry that we started
- * the path walk with. force_reval_path may also revalidate the
- * dentry already committed to the nameidata.
- */
- if (unlikely(parent == dentry))
- return nameidata_drop_rcu(nd);
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -484,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
parent->d_count++;
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -498,7 +501,7 @@ err:
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -506,8 +509,16 @@ err_root:
/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
{
- if (nd->flags & LOOKUP_RCU)
- return nameidata_dentry_drop_rcu(nd, dentry);
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
+ }
+ }
return 0;
}
@@ -526,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
spin_lock(&dentry->d_lock);
if (!__d_rcu_to_refcount(dentry, nd->seq))
goto err_unlock;
@@ -547,53 +559,31 @@ err_unlock:
return -ECHILD;
}
-/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
-static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
-{
- if (likely(nd->flags & LOOKUP_RCU))
- return nameidata_drop_rcu_last(nd);
- return 0;
-}
-
/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
*/
void release_open_intent(struct nameidata *nd)
{
- if (nd->intent.open.file->f_path.dentry == NULL)
- put_filp(nd->intent.open.file);
- else
- fput(nd->intent.open.file);
-}
+ struct file *file = nd->intent.open.file;
-/*
- * Call d_revalidate and handle filesystems that request rcu-walk
- * to be dropped. This may be called and return in rcu-walk mode,
- * regardless of success or error. If -ECHILD is returned, the caller
- * must return -ECHILD back up the path walk stack so path walk may
- * be restarted in ref-walk mode.
- */
-static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- int status;
-
- status = dentry->d_op->d_revalidate(dentry, nd);
- if (status == -ECHILD) {
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return status;
- status = dentry->d_op->d_revalidate(dentry, nd);
+ if (file && !IS_ERR(file)) {
+ if (file->f_path.dentry == NULL)
+ put_filp(file);
+ else
+ fput(file);
}
+}
- return status;
+static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ return dentry->d_op->d_revalidate(dentry, nd);
}
-static inline struct dentry *
+static struct dentry *
do_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- int status;
-
- status = d_revalidate(dentry, nd);
+ int status = d_revalidate(dentry, nd);
if (unlikely(status <= 0)) {
/*
* The dentry failed validation.
@@ -602,37 +592,18 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
* to return a fail status.
*/
if (status < 0) {
- /* If we're in rcu-walk, we don't have a ref */
- if (!(nd->flags & LOOKUP_RCU))
- dput(dentry);
+ dput(dentry);
dentry = ERR_PTR(status);
-
- } else {
- /* Don't d_invalidate in rcu-walk mode */
- if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
- return ERR_PTR(-ECHILD);
- if (!d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
+ } else if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
}
}
return dentry;
}
-static inline int need_reval_dot(struct dentry *dentry)
-{
- if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
- return 0;
-
- if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
- return 0;
-
- return 1;
-}
-
/*
- * force_reval_path - force revalidation of a dentry
+ * handle_reval_path - force revalidation of a dentry
*
* In some situations the path walking code will trust dentries without
* revalidating them. This causes problems for filesystems that depend on
@@ -646,30 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
* invalidate the dentry. It's up to the caller to handle putting references
* to the path if necessary.
*/
-static int
-force_reval_path(struct path *path, struct nameidata *nd)
+static inline int handle_reval_path(struct nameidata *nd)
{
+ struct dentry *dentry = nd->path.dentry;
int status;
- struct dentry *dentry = path->dentry;
- /*
- * only check on filesystems where it's possible for the dentry to
- * become stale.
- */
- if (!need_reval_dot(dentry))
+ if (likely(!(nd->flags & LOOKUP_JUMPED)))
+ return 0;
+
+ if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+ return 0;
+
+ if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
return 0;
+ /* Note: we do not d_invalidate() */
status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
- if (!status) {
- /* Don't d_invalidate in rcu-walk mode */
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- d_invalidate(dentry);
+ if (!status)
status = -ESTALE;
- }
+
return status;
}
@@ -738,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
path_put(&nd->path);
nd->path = nd->root;
path_get(&nd->root);
+ nd->flags |= LOOKUP_JUMPED;
}
nd->inode = nd->path.dentry->d_inode;
@@ -767,18 +737,43 @@ static inline void path_to_nameidata(const struct path *path,
nd->path.dentry = path->dentry;
}
+static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
+{
+ struct inode *inode = link->dentry->d_inode;
+ if (!IS_ERR(cookie) && inode->i_op->put_link)
+ inode->i_op->put_link(link->dentry, nd, cookie);
+ path_put(link);
+}
+
static __always_inline int
-__do_follow_link(const struct path *link, struct nameidata *nd, void **p)
+follow_link(struct path *link, struct nameidata *nd, void **p)
{
int error;
struct dentry *dentry = link->dentry;
- touch_atime(link->mnt, dentry);
- nd_set_link(nd, NULL);
+ BUG_ON(nd->flags & LOOKUP_RCU);
if (link->mnt == nd->path.mnt)
mntget(link->mnt);
+ if (unlikely(current->total_link_count >= 40)) {
+ *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+ cond_resched();
+ current->total_link_count++;
+
+ touch_atime(link->mnt, dentry);
+ nd_set_link(nd, NULL);
+
+ error = security_inode_follow_link(link->dentry, nd);
+ if (error) {
+ *p = ERR_PTR(error); /* no ->put_link(), please */
+ path_put(&nd->path);
+ return error;
+ }
+
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
error = PTR_ERR(*p);
@@ -788,50 +783,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
if (s)
error = __vfs_follow_link(nd, s);
else if (nd->last_type == LAST_BIND) {
- error = force_reval_path(&nd->path, nd);
- if (error)
+ nd->flags |= LOOKUP_JUMPED;
+ nd->inode = nd->path.dentry->d_inode;
+ if (nd->inode->i_op->follow_link) {
+ /* stepped on a _really_ weird one */
path_put(&nd->path);
+ error = -ELOOP;
+ }
}
}
return error;
}
-/*
- * This limits recursive symlink follows to 8, while
- * limiting consecutive symlinks to 40.
- *
- * Without that kind of total limit, nasty chains of consecutive
- * symlinks can cause almost arbitrarily long lookups.
- */
-static inline int do_follow_link(struct path *path, struct nameidata *nd)
-{
- void *cookie;
- int err = -ELOOP;
- if (current->link_count >= MAX_NESTED_LINKS)
- goto loop;
- if (current->total_link_count >= 40)
- goto loop;
- BUG_ON(nd->depth >= MAX_NESTED_LINKS);
- cond_resched();
- err = security_inode_follow_link(path->dentry, nd);
- if (err)
- goto loop;
- current->link_count++;
- current->total_link_count++;
- nd->depth++;
- err = __do_follow_link(path, nd, &cookie);
- if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
- path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
- path_put(path);
- current->link_count--;
- nd->depth--;
- return err;
-loop:
- path_put_conditional(path, nd);
- path_put(&nd->path);
- return err;
-}
-
static int follow_up_rcu(struct path *path)
{
struct vfsmount *parent;
@@ -1070,7 +1033,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
seq = read_seqcount_begin(&parent->d_seq);
if (read_seqcount_retry(&old->d_seq, nd->seq))
- return -ECHILD;
+ goto failed;
inode = parent->d_inode;
nd->path.dentry = parent;
nd->seq = seq;
@@ -1083,8 +1046,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
}
__follow_mount_rcu(nd, &nd->path, &inode, true);
nd->inode = inode;
-
return 0;
+
+failed:
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
}
/*
@@ -1218,57 +1188,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
- struct inode *dir;
+ int need_reval = 1;
+ int status = 1;
int err;
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- err = parent->d_op->d_hash(parent, nd->inode, name);
- if (err < 0)
- return err;
- }
-
- /*
* Rename seqlock is not required here because in the off chance
* of a false negative due to a concurrent rename, we're going to
* do the non-racy lookup, below.
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
-
*inode = nd->inode;
dentry = __d_lookup_rcu(parent, name, &seq, inode);
- if (!dentry) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto need_lookup;
- }
+ if (!dentry)
+ goto unlazy;
+
/* Memory barrier in read_seqcount_begin of child is enough */
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
-
nd->seq = seq;
- if (dentry->d_flags & DCACHE_OP_REVALIDATE)
- goto need_revalidate;
-done2:
+
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status != -ECHILD)
+ need_reval = 0;
+ goto unlazy;
+ }
+ }
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, false)))
return 0;
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- /* fallthru */
+unlazy:
+ if (dentry) {
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return -ECHILD;
+ } else {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ } else {
+ dentry = __d_lookup(parent, name);
}
- dentry = __d_lookup(parent, name);
- if (!dentry)
- goto need_lookup;
-found:
- if (dentry->d_flags & DCACHE_OP_REVALIDATE)
- goto need_revalidate;
-done:
+
+retry:
+ if (unlikely(!dentry)) {
+ struct inode *dir = parent->d_inode;
+ BUG_ON(nd->inode != dir);
+
+ mutex_lock(&dir->i_mutex);
+ dentry = d_lookup(parent, name);
+ if (likely(!dentry)) {
+ dentry = d_alloc_and_lookup(parent, name, nd);
+ if (IS_ERR(dentry)) {
+ mutex_unlock(&dir->i_mutex);
+ return PTR_ERR(dentry);
+ }
+ /* known good */
+ need_reval = 0;
+ status = 1;
+ }
+ mutex_unlock(&dir->i_mutex);
+ }
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status < 0) {
+ dput(dentry);
+ return status;
+ }
+ if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ need_reval = 1;
+ goto retry;
+ }
+ }
+
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd->flags);
@@ -1278,49 +1276,113 @@ done:
}
*inode = path->dentry->d_inode;
return 0;
+}
-need_lookup:
- dir = parent->d_inode;
- BUG_ON(nd->inode != dir);
+static inline int may_lookup(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU) {
+ int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
+ if (err != -ECHILD)
+ return err;
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ return exec_permission(nd->inode, 0);
+}
- mutex_lock(&dir->i_mutex);
- /*
- * First re-do the cached lookup just in case it was created
- * while we waited for the directory semaphore, or the first
- * lookup failed due to an unrelated rename.
- *
- * This could use version numbering or similar to avoid unnecessary
- * cache lookups, but then we'd have to do the first lookup in the
- * non-racy way. However in the common case here, everything should
- * be hot in cache, so would it be a big win?
- */
- dentry = d_lookup(parent, name);
- if (likely(!dentry)) {
- dentry = d_alloc_and_lookup(parent, name, nd);
- mutex_unlock(&dir->i_mutex);
- if (IS_ERR(dentry))
- goto fail;
- goto done;
+static inline int handle_dots(struct nameidata *nd, int type)
+{
+ if (type == LAST_DOTDOT) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
+ }
+ return 0;
+}
+
+static void terminate_walk(struct nameidata *nd)
+{
+ if (!(nd->flags & LOOKUP_RCU)) {
+ path_put(&nd->path);
+ } else {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
}
+}
+
+static inline int walk_component(struct nameidata *nd, struct path *path,
+ struct qstr *name, int type, int follow)
+{
+ struct inode *inode;
+ int err;
/*
- * Uhhuh! Nasty case: the cache was re-populated while
- * we waited on the semaphore. Need to revalidate.
+ * "." and ".." are special - ".." especially so because it has
+ * to be able to know about the current root directory and
+ * parent relationships.
*/
- mutex_unlock(&dir->i_mutex);
- goto found;
+ if (unlikely(type != LAST_NORM))
+ return handle_dots(nd, type);
+ err = do_lookup(nd, name, path, &inode);
+ if (unlikely(err)) {
+ terminate_walk(nd);
+ return err;
+ }
+ if (!inode) {
+ path_to_nameidata(path, nd);
+ terminate_walk(nd);
+ return -ENOENT;
+ }
+ if (unlikely(inode->i_op->follow_link) && follow) {
+ if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
+ return -ECHILD;
+ BUG_ON(inode != path->dentry->d_inode);
+ return 1;
+ }
+ path_to_nameidata(path, nd);
+ nd->inode = inode;
+ return 0;
+}
-need_revalidate:
- dentry = do_revalidate(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
- if (nd->flags & LOOKUP_RCU)
- goto done2;
- goto done;
+/*
+ * This limits recursive symlink follows to 8, while
+ * limiting consecutive symlinks to 40.
+ *
+ * Without that kind of total limit, nasty chains of consecutive
+ * symlinks can cause almost arbitrarily long lookups.
+ */
+static inline int nested_symlink(struct path *path, struct nameidata *nd)
+{
+ int res;
-fail:
- return PTR_ERR(dentry);
+ BUG_ON(nd->depth >= MAX_NESTED_LINKS);
+ if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
+ path_put_conditional(path, nd);
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+
+ nd->depth++;
+ current->link_count++;
+
+ do {
+ struct path link = *path;
+ void *cookie;
+
+ res = follow_link(&link, nd, &cookie);
+ if (!res)
+ res = walk_component(nd, path, &nd->last,
+ nd->last_type, LOOKUP_FOLLOW);
+ put_link(nd, &link, cookie);
+ } while (res > 0);
+
+ current->link_count--;
+ nd->depth--;
+ return res;
}
/*
@@ -1340,30 +1402,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
while (*name=='/')
name++;
if (!*name)
- goto return_reval;
-
- if (nd->depth)
- lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
+ return 0;
/* At this point we know we have a real path component. */
for(;;) {
- struct inode *inode;
unsigned long hash;
struct qstr this;
unsigned int c;
+ int type;
nd->flags |= LOOKUP_CONTINUE;
- if (nd->flags & LOOKUP_RCU) {
- err = exec_permission(nd->inode, IPERM_FLAG_RCU);
- if (err == -ECHILD) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto exec_again;
- }
- } else {
-exec_again:
- err = exec_permission(nd->inode, 0);
- }
+
+ err = may_lookup(nd);
if (err)
break;
@@ -1379,56 +1429,43 @@ exec_again:
this.len = name - (const char *) this.name;
this.hash = end_name_hash(hash);
+ type = LAST_NORM;
+ if (this.name[0] == '.') switch (this.len) {
+ case 2:
+ if (this.name[1] == '.') {
+ type = LAST_DOTDOT;
+ nd->flags |= LOOKUP_JUMPED;
+ }
+ break;
+ case 1:
+ type = LAST_DOT;
+ }
+ if (likely(type == LAST_NORM)) {
+ struct dentry *parent = nd->path.dentry;
+ nd->flags &= ~LOOKUP_JUMPED;
+ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
+ err = parent->d_op->d_hash(parent, nd->inode,
+ &this);
+ if (err < 0)
+ break;
+ }
+ }
+
/* remove trailing slashes? */
if (!c)
goto last_component;
while (*++name == '/');
if (!*name)
- goto last_with_slashes;
+ goto last_component;
- /*
- * "." and ".." are special - ".." especially so because it has
- * to be able to know about the current root directory and
- * parent relationships.
- */
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- continue;
- }
- /* This does the actual lookups.. */
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- err = -ENOENT;
- if (!inode)
- goto out_dput;
+ err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
+ if (err < 0)
+ return err;
- if (inode->i_op->follow_link) {
- /* We commonly drop rcu-walk here */
- if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
- return -ECHILD;
- BUG_ON(inode != next.dentry->d_inode);
- err = do_follow_link(&next, nd);
+ if (err) {
+ err = nested_symlink(&next, nd);
if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- err = -ENOENT;
- if (!nd->inode)
- break;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
+ return err;
}
err = -ENOTDIR;
if (!nd->inode->i_op->lookup)
@@ -1436,209 +1473,109 @@ exec_again:
continue;
/* here ends the main loop */
-last_with_slashes:
- lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
/* Clear LOOKUP_CONTINUE iff it was previously unset */
nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
- if (lookup_flags & LOOKUP_PARENT)
- goto lookup_parent;
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- goto return_reval;
- }
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- if (inode && unlikely(inode->i_op->follow_link) &&
- (lookup_flags & LOOKUP_FOLLOW)) {
- if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
- return -ECHILD;
- BUG_ON(inode != next.dentry->d_inode);
- err = do_follow_link(&next, nd);
- if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
- }
- err = -ENOENT;
- if (!nd->inode)
- break;
- if (lookup_flags & LOOKUP_DIRECTORY) {
- err = -ENOTDIR;
- if (!nd->inode->i_op->lookup)
- break;
- }
- goto return_base;
-lookup_parent:
nd->last = this;
- nd->last_type = LAST_NORM;
- if (this.name[0] != '.')
- goto return_base;
- if (this.len == 1)
- nd->last_type = LAST_DOT;
- else if (this.len == 2 && this.name[1] == '.')
- nd->last_type = LAST_DOTDOT;
- else
- goto return_base;
-return_reval:
- /*
- * We bypassed the ordinary revalidation routines.
- * We may need to check the cached dentry for staleness.
- */
- if (need_reval_dot(nd->path.dentry)) {
- /* Note: we do not d_invalidate() */
- err = d_revalidate(nd->path.dentry, nd);
- if (!err)
- err = -ESTALE;
- if (err < 0)
- break;
- }
-return_base:
- if (nameidata_drop_rcu_last_maybe(nd))
- return -ECHILD;
+ nd->last_type = type;
return 0;
-out_dput:
- if (!(nd->flags & LOOKUP_RCU))
- path_put_conditional(&next, nd);
- break;
}
- if (!(nd->flags & LOOKUP_RCU))
- path_put(&nd->path);
-return_err:
+ terminate_walk(nd);
return err;
}
-static inline int path_walk_rcu(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static inline int path_walk_simple(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static int path_walk(const char *name, struct nameidata *nd)
-{
- struct path save = nd->path;
- int result;
-
- current->total_link_count = 0;
-
- /* make sure the stuff we saved doesn't go away */
- path_get(&save);
-
- result = link_path_walk(name, nd);
- if (result == -ESTALE) {
- /* nd->path had been dropped */
- current->total_link_count = 0;
- nd->path = save;
- path_get(&nd->path);
- nd->flags |= LOOKUP_REVAL;
- result = link_path_walk(name, nd);
- }
-
- path_put(&save);
-
- return result;
-}
-
-static void path_finish_rcu(struct nameidata *nd)
-{
- if (nd->flags & LOOKUP_RCU) {
- /* RCU dangling. Cancel it. */
- nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
- }
- if (nd->file)
- fput(nd->file);
-}
-
-static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags,
+ struct nameidata *nd, struct file **fp)
{
int retval = 0;
int fput_needed;
struct file *file;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags | LOOKUP_RCU;
+ nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
+ if (flags & LOOKUP_ROOT) {
+ struct inode *inode = nd->root.dentry->d_inode;
+ if (*name) {
+ if (!inode->i_op->lookup)
+ return -ENOTDIR;
+ retval = inode_permission(inode, MAY_EXEC);
+ if (retval)
+ return retval;
+ }
+ nd->path = nd->root;
+ nd->inode = inode;
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } else {
+ path_get(&nd->path);
+ }
+ return 0;
+ }
+
nd->root.mnt = NULL;
- nd->file = NULL;
if (*name=='/') {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->root = fs->root;
- nd->path = nd->root;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
-
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ set_root_rcu(nd);
+ } else {
+ set_root(nd);
+ path_get(&nd->root);
+ }
+ nd->path = nd->root;
} else if (dfd == AT_FDCWD) {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->path = fs->pwd;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->path = fs->pwd;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+ } else {
+ get_fs_pwd(current->fs, &nd->path);
+ }
} else {
struct dentry *dentry;
- file = fget_light(dfd, &fput_needed);
+ file = fget_raw_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
dentry = file->f_path.dentry;
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
+ if (*name) {
+ retval = -ENOTDIR;
+ if (!S_ISDIR(dentry->d_inode->i_mode))
+ goto fput_fail;
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
+ retval = file_permission(file, MAY_EXEC);
+ if (retval)
+ goto fput_fail;
+ }
nd->path = file->f_path;
- if (fput_needed)
- nd->file = file;
-
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ if (fput_needed)
+ *fp = file;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ } else {
+ path_get(&file->f_path);
+ fput_light(file, fput_needed);
+ }
}
+
nd->inode = nd->path.dentry->d_inode;
return 0;
@@ -1648,60 +1585,23 @@ out_fail:
return retval;
}
-static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static inline int lookup_last(struct nameidata *nd, struct path *path)
{
- int retval = 0;
- int fput_needed;
- struct file *file;
-
- nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags;
- nd->depth = 0;
- nd->root.mnt = NULL;
+ if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- if (*name=='/') {
- set_root(nd);
- nd->path = nd->root;
- path_get(&nd->root);
- } else if (dfd == AT_FDCWD) {
- get_fs_pwd(current->fs, &nd->path);
- } else {
- struct dentry *dentry;
-
- file = fget_light(dfd, &fput_needed);
- retval = -EBADF;
- if (!file)
- goto out_fail;
-
- dentry = file->f_path.dentry;
-
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
-
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
-
- nd->path = file->f_path;
- path_get(&file->f_path);
-
- fput_light(file, fput_needed);
- }
- nd->inode = nd->path.dentry->d_inode;
- return 0;
-
-fput_fail:
- fput_light(file, fput_needed);
-out_fail:
- return retval;
+ nd->flags &= ~LOOKUP_PARENT;
+ return walk_component(nd, path, &nd->last, nd->last_type,
+ nd->flags & LOOKUP_FOLLOW);
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
+static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- int retval;
+ struct file *base = NULL;
+ struct path path;
+ int err;
/*
* Path walking is largely split up into 2 different synchronisation
@@ -1717,44 +1617,75 @@ static int do_path_lookup(int dfd, const char *name,
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
- retval = path_init_rcu(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk_rcu(name, nd);
- path_finish_rcu(nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
+
+ if (unlikely(err))
+ return err;
+
+ current->total_link_count = 0;
+ err = link_path_walk(name, nd);
+
+ if (!err && !(flags & LOOKUP_PARENT)) {
+ err = lookup_last(nd, &path);
+ while (err > 0) {
+ void *cookie;
+ struct path link = path;
+ nd->flags |= LOOKUP_PARENT;
+ err = follow_link(&link, nd, &cookie);
+ if (!err)
+ err = lookup_last(nd, &path);
+ put_link(nd, &link, cookie);
+ }
}
- if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
- /* slower, locked walk */
- if (retval == -ESTALE)
- flags |= LOOKUP_REVAL;
- retval = path_init(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk(name, nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ if (nd->flags & LOOKUP_RCU) {
+ /* went all way through without dropping RCU */
+ BUG_ON(err);
+ if (nameidata_drop_rcu_last(nd))
+ err = -ECHILD;
+ }
+
+ if (!err)
+ err = handle_reval_path(nd);
+
+ if (!err && nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup) {
+ path_put(&nd->path);
+ return -ENOTDIR;
}
}
+ if (base)
+ fput(base);
+
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ return err;
+}
+
+static int do_path_lookup(int dfd, const char *name,
+ unsigned int flags, struct nameidata *nd)
+{
+ int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
+ if (unlikely(retval == -ECHILD))
+ retval = path_lookupat(dfd, name, flags, nd);
+ if (unlikely(retval == -ESTALE))
+ retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
+
if (likely(!retval)) {
if (unlikely(!audit_dummy_context())) {
if (nd->path.dentry && nd->inode)
audit_inode(name, nd->path.dentry);
}
}
-
return retval;
}
-int path_lookup(const char *name, unsigned int flags,
- struct nameidata *nd)
+int kern_path_parent(const char *name, struct nameidata *nd)
{
- return do_path_lookup(AT_FDCWD, name, flags, nd);
+ return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
}
int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1778,29 +1709,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
struct nameidata *nd)
{
- int retval;
-
- /* same as do_path_lookup */
- nd->last_type = LAST_ROOT;
- nd->flags = flags;
- nd->depth = 0;
-
- nd->path.dentry = dentry;
- nd->path.mnt = mnt;
- path_get(&nd->path);
- nd->root = nd->path;
- path_get(&nd->root);
- nd->inode = nd->path.dentry->d_inode;
-
- retval = path_walk(name, nd);
- if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->inode))
- audit_inode(name, nd->path.dentry);
-
- path_put(&nd->root);
- nd->root.mnt = NULL;
-
- return retval;
+ nd->root.dentry = dentry;
+ nd->root.mnt = mnt;
+ /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
+ return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
}
static struct dentry *__lookup_hash(struct qstr *name,
@@ -1815,17 +1727,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
return ERR_PTR(err);
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (base->d_flags & DCACHE_OP_HASH) {
- err = base->d_op->d_hash(base, inode, name);
- dentry = ERR_PTR(err);
- if (err < 0)
- goto out;
- }
-
- /*
* Don't bother with __d_lookup: callers are for creat as
* well as unlink, so a lot of the time it would cost
* a double lookup.
@@ -1837,7 +1738,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
if (!dentry)
dentry = d_alloc_and_lookup(base, name, nd);
-out:
+
return dentry;
}
@@ -1851,28 +1752,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
return __lookup_hash(&nd->last, nd->path.dentry, nd);
}
-static int __lookup_one_len(const char *name, struct qstr *this,
- struct dentry *base, int len)
-{
- unsigned long hash;
- unsigned int c;
-
- this->name = name;
- this->len = len;
- if (!len)
- return -EACCES;
-
- hash = init_name_hash();
- while (len--) {
- c = *(const unsigned char *)name++;
- if (c == '/' || c == '\0')
- return -EACCES;
- hash = partial_name_hash(c, hash);
- }
- this->hash = end_name_hash(hash);
- return 0;
-}
-
/**
* lookup_one_len - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
@@ -1886,14 +1765,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
- int err;
struct qstr this;
+ unsigned long hash;
+ unsigned int c;
WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
- err = __lookup_one_len(name, &this, base, len);
- if (err)
- return ERR_PTR(err);
+ this.name = name;
+ this.len = len;
+ if (!len)
+ return ERR_PTR(-EACCES);
+
+ hash = init_name_hash();
+ while (len--) {
+ c = *(const unsigned char *)name++;
+ if (c == '/' || c == '\0')
+ return ERR_PTR(-EACCES);
+ hash = partial_name_hash(c, hash);
+ }
+ this.hash = end_name_hash(hash);
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_flags & DCACHE_OP_HASH) {
+ int err = base->d_op->d_hash(base, base->d_inode, &this);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
return __lookup_hash(&this, base, NULL);
}
@@ -1902,7 +1801,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
struct path *path)
{
struct nameidata nd;
- char *tmp = getname(name);
+ char *tmp = getname_flags(name, flags);
int err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
@@ -2082,12 +1981,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
return error;
}
-int may_open(struct path *path, int acc_mode, int flag)
+static int may_open(struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
struct inode *inode = dentry->d_inode;
int error;
+ /* O_PATH? */
+ if (!acc_mode)
+ return 0;
+
if (!inode)
return -ENOENT;
@@ -2156,34 +2059,6 @@ static int handle_truncate(struct file *filp)
}
/*
- * Be careful about ever adding any more callers of this
- * function. Its flags must be in the namei format, not
- * what get passed to sys_open().
- */
-static int __open_namei_create(struct nameidata *nd, struct path *path,
- int open_flag, int mode)
-{
- int error;
- struct dentry *dir = nd->path.dentry;
-
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
- error = security_path_mknod(&nd->path, path->dentry, mode, 0);
- if (error)
- goto out_unlock;
- error = vfs_create(dir->d_inode, path->dentry, mode, nd);
-out_unlock:
- mutex_unlock(&dir->d_inode->i_mutex);
- dput(nd->path.dentry);
- nd->path.dentry = path->dentry;
-
- if (error)
- return error;
- /* Don't check for write permission, don't truncate */
- return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
-}
-
-/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
@@ -2207,128 +2082,115 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
-static int open_will_truncate(int flag, struct inode *inode)
-{
- /*
- * We'll never write to the fs underlying
- * a device file.
- */
- if (special_file(inode->i_mode))
- return 0;
- return (flag & O_TRUNC);
-}
-
-static struct file *finish_open(struct nameidata *nd,
- int open_flag, int acc_mode)
-{
- struct file *filp;
- int will_truncate;
- int error;
-
- will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
- if (will_truncate) {
- error = mnt_want_write(nd->path.mnt);
- if (error)
- goto exit;
- }
- error = may_open(&nd->path, acc_mode, open_flag);
- if (error) {
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- if (!IS_ERR(filp)) {
- if (will_truncate) {
- error = handle_truncate(filp);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- }
- /*
- * It is now safe to drop the mnt write
- * because the filp has had a write taken
- * on its behalf.
- */
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- return filp;
-
-exit:
- if (!IS_ERR(nd->intent.open.file))
- release_open_intent(nd);
- path_put(&nd->path);
- return ERR_PTR(error);
-}
-
/*
- * Handle O_CREAT case for do_filp_open
+ * Handle the last step of open()
*/
static struct file *do_last(struct nameidata *nd, struct path *path,
- int open_flag, int acc_mode,
- int mode, const char *pathname)
+ const struct open_flags *op, const char *pathname)
{
struct dentry *dir = nd->path.dentry;
+ struct dentry *dentry;
+ int open_flag = op->open_flag;
+ int will_truncate = open_flag & O_TRUNC;
+ int want_write = 0;
+ int acc_mode = op->acc_mode;
struct file *filp;
- int error = -EISDIR;
+ int error;
+
+ nd->flags &= ~LOOKUP_PARENT;
+ nd->flags |= op->intent;
switch (nd->last_type) {
case LAST_DOTDOT:
- follow_dotdot(nd);
- dir = nd->path.dentry;
case LAST_DOT:
- if (need_reval_dot(dir)) {
- int status = d_revalidate(nd->path.dentry, nd);
- if (!status)
- status = -ESTALE;
- if (status < 0) {
- error = status;
- goto exit;
- }
- }
+ error = handle_dots(nd, nd->last_type);
+ if (error)
+ return ERR_PTR(error);
/* fallthrough */
case LAST_ROOT:
- goto exit;
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
+ audit_inode(pathname, nd->path.dentry);
+ if (open_flag & O_CREAT) {
+ error = -EISDIR;
+ goto exit;
+ }
+ goto ok;
case LAST_BIND:
+ /* can't be RCU mode here */
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
audit_inode(pathname, dir);
goto ok;
}
+ if (!(open_flag & O_CREAT)) {
+ int symlink_ok = 0;
+ if (nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+ if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
+ symlink_ok = 1;
+ /* we _can_ be in RCU mode here */
+ error = walk_component(nd, path, &nd->last, LAST_NORM,
+ !symlink_ok);
+ if (error < 0)
+ return ERR_PTR(error);
+ if (error) /* symlink */
+ return NULL;
+ /* sayonara */
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ error = -ENOTDIR;
+ if (nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup)
+ goto exit;
+ }
+ audit_inode(pathname, nd->path.dentry);
+ goto ok;
+ }
+
+ /* create side of things */
+
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ audit_inode(pathname, dir);
+ error = -EISDIR;
/* trailing slashes? */
if (nd->last.name[nd->last.len])
goto exit;
mutex_lock(&dir->d_inode->i_mutex);
- path->dentry = lookup_hash(nd);
- path->mnt = nd->path.mnt;
-
- error = PTR_ERR(path->dentry);
- if (IS_ERR(path->dentry)) {
+ dentry = lookup_hash(nd);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
mutex_unlock(&dir->d_inode->i_mutex);
goto exit;
}
- if (IS_ERR(nd->intent.open.file)) {
- error = PTR_ERR(nd->intent.open.file);
- goto exit_mutex_unlock;
- }
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
/* Negative dentry, just create the file */
- if (!path->dentry->d_inode) {
+ if (!dentry->d_inode) {
+ int mode = op->mode;
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current_umask();
/*
* This write is needed to ensure that a
- * ro->rw transition does not occur between
+ * rw->ro transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
* the 'struct file' in nameidata_to_filp().
@@ -2336,22 +2198,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
error = mnt_want_write(nd->path.mnt);
if (error)
goto exit_mutex_unlock;
- error = __open_namei_create(nd, path, open_flag, mode);
- if (error) {
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- return filp;
+ want_write = 1;
+ /* Don't check for write permission, don't truncate */
+ open_flag &= ~O_TRUNC;
+ will_truncate = 0;
+ acc_mode = MAY_OPEN;
+ error = security_path_mknod(&nd->path, dentry, mode, 0);
+ if (error)
+ goto exit_mutex_unlock;
+ error = vfs_create(dir->d_inode, dentry, mode, nd);
+ if (error)
+ goto exit_mutex_unlock;
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(nd->path.dentry);
+ nd->path.dentry = dentry;
+ goto common;
}
/*
@@ -2381,7 +2242,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (S_ISDIR(nd->inode->i_mode))
goto exit;
ok:
- filp = finish_open(nd, open_flag, acc_mode);
+ if (!S_ISREG(nd->inode->i_mode))
+ will_truncate = 0;
+
+ if (will_truncate) {
+ error = mnt_want_write(nd->path.mnt);
+ if (error)
+ goto exit;
+ want_write = 1;
+ }
+common:
+ error = may_open(&nd->path, acc_mode, open_flag);
+ if (error)
+ goto exit;
+ filp = nameidata_to_filp(nd);
+ if (!IS_ERR(filp)) {
+ error = ima_file_check(filp, op->acc_mode);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ if (!IS_ERR(filp)) {
+ if (will_truncate) {
+ error = handle_truncate(filp);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ }
+out:
+ if (want_write)
+ mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
return filp;
exit_mutex_unlock:
@@ -2389,199 +2283,103 @@ exit_mutex_unlock:
exit_dput:
path_put_conditional(path, nd);
exit:
- if (!IS_ERR(nd->intent.open.file))
- release_open_intent(nd);
- path_put(&nd->path);
- return ERR_PTR(error);
+ filp = ERR_PTR(error);
+ goto out;
}
-/*
- * Note that the low bits of the passed in "open_flag"
- * are not the same as in the local variable "flag". See
- * open_to_namei_flags() for more details.
- */
-struct file *do_filp_open(int dfd, const char *pathname,
- int open_flag, int mode, int acc_mode)
+static struct file *path_openat(int dfd, const char *pathname,
+ struct nameidata *nd, const struct open_flags *op, int flags)
{
+ struct file *base = NULL;
struct file *filp;
- struct nameidata nd;
- int error;
struct path path;
- int count = 0;
- int flag = open_to_namei_flags(open_flag);
- int flags;
-
- if (!(open_flag & O_CREAT))
- mode = 0;
-
- /* Must never be set by userspace */
- open_flag &= ~FMODE_NONOTIFY;
-
- /*
- * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
- * check for O_DSYNC if the need any syncing at all we enforce it's
- * always set instead of having to deal with possibly weird behaviour
- * for malicious applications setting only __O_SYNC.
- */
- if (open_flag & __O_SYNC)
- open_flag |= O_DSYNC;
-
- if (!acc_mode)
- acc_mode = MAY_OPEN | ACC_MODE(open_flag);
-
- /* O_TRUNC implies we need access checks for write permissions */
- if (open_flag & O_TRUNC)
- acc_mode |= MAY_WRITE;
-
- /* Allow the LSM permission hook to distinguish append
- access from general write access. */
- if (open_flag & O_APPEND)
- acc_mode |= MAY_APPEND;
-
- flags = LOOKUP_OPEN;
- if (open_flag & O_CREAT) {
- flags |= LOOKUP_CREATE;
- if (open_flag & O_EXCL)
- flags |= LOOKUP_EXCL;
- }
- if (open_flag & O_DIRECTORY)
- flags |= LOOKUP_DIRECTORY;
- if (!(open_flag & O_NOFOLLOW))
- flags |= LOOKUP_FOLLOW;
+ int error;
filp = get_empty_filp();
if (!filp)
return ERR_PTR(-ENFILE);
- filp->f_flags = open_flag;
- nd.intent.open.file = filp;
- nd.intent.open.flags = flag;
- nd.intent.open.create_mode = mode;
+ filp->f_flags = op->open_flag;
+ nd->intent.open.file = filp;
+ nd->intent.open.flags = open_to_namei_flags(op->open_flag);
+ nd->intent.open.create_mode = op->mode;
- if (open_flag & O_CREAT)
- goto creat;
-
- /* !O_CREAT, simple open */
- error = do_path_lookup(dfd, pathname, flags, &nd);
+ error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
goto out_filp;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW)) {
- if (nd.inode->i_op->follow_link)
- goto out_path;
- }
- error = -ENOTDIR;
- if (nd.flags & LOOKUP_DIRECTORY) {
- if (!nd.inode->i_op->lookup)
- goto out_path;
- }
- audit_inode(pathname, nd.path.dentry);
- filp = finish_open(&nd, open_flag, acc_mode);
- return filp;
-creat:
- /* OK, have to create the file. Find the parent. */
- error = path_init_rcu(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
- error = path_walk_rcu(pathname, &nd);
- path_finish_rcu(&nd);
- if (unlikely(error == -ECHILD || error == -ESTALE)) {
- /* slower, locked walk */
- if (error == -ESTALE) {
-reval:
- flags |= LOOKUP_REVAL;
- }
- error = path_init(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
-
- error = path_walk_simple(pathname, &nd);
- }
+ current->total_link_count = 0;
+ error = link_path_walk(pathname, nd);
if (unlikely(error))
goto out_filp;
- if (unlikely(!audit_dummy_context()))
- audit_inode(pathname, nd.path.dentry);
- /*
- * We have the parent and last component.
- */
- nd.flags = flags;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
+ filp = do_last(nd, &path, op, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path link = path;
- struct inode *linki = link.dentry->d_inode;
void *cookie;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW))
- goto exit_dput;
- if (count++ == 32)
- goto exit_dput;
- /*
- * This is subtle. Instead of calling do_follow_link() we do
- * the thing by hands. The reason is that this way we have zero
- * link_count and path_walk() (called from ->follow_link)
- * honoring LOOKUP_PARENT. After that we have the parent and
- * last component, i.e. we are in the same situation as after
- * the first path_walk(). Well, almost - if the last component
- * is normal we get its copy stored in nd->last.name and we will
- * have to putname() it when we are done. Procfs-like symlinks
- * just set LAST_BIND.
- */
- nd.flags |= LOOKUP_PARENT;
- error = security_inode_follow_link(link.dentry, &nd);
- if (error)
- goto exit_dput;
- error = __do_follow_link(&link, &nd, &cookie);
- if (unlikely(error)) {
- if (!IS_ERR(cookie) && linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- /* nd.path had been dropped */
- nd.path = link;
- goto out_path;
+ if (!(nd->flags & LOOKUP_FOLLOW)) {
+ path_put_conditional(&path, nd);
+ path_put(&nd->path);
+ filp = ERR_PTR(-ELOOP);
+ break;
}
- nd.flags &= ~LOOKUP_PARENT;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
- if (linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- path_put(&link);
+ nd->flags |= LOOKUP_PARENT;
+ nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
+ error = follow_link(&link, nd, &cookie);
+ if (unlikely(error))
+ filp = ERR_PTR(error);
+ else
+ filp = do_last(nd, &path, op, pathname);
+ put_link(nd, &link, cookie);
}
out:
- if (nd.root.mnt)
- path_put(&nd.root);
- if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
- goto reval;
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
+ path_put(&nd->root);
+ if (base)
+ fput(base);
+ release_open_intent(nd);
return filp;
-exit_dput:
- path_put_conditional(&path, &nd);
-out_path:
- path_put(&nd.path);
out_filp:
- if (!IS_ERR(nd.intent.open.file))
- release_open_intent(&nd);
filp = ERR_PTR(error);
goto out;
}
-/**
- * filp_open - open file and return file pointer
- *
- * @filename: path to open
- * @flags: open flags as per the open(2) second argument
- * @mode: mode for the new file if O_CREAT is set, else ignored
- *
- * This is the helper to open a file from kernelspace if you really
- * have to. But in generally you should not do this, so please move
- * along, nothing to see here..
- */
-struct file *filp_open(const char *filename, int flags, int mode)
+struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int flags)
{
- return do_filp_open(AT_FDCWD, filename, flags, mode, 0);
+ struct nameidata nd;
+ struct file *filp;
+
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(filp == ERR_PTR(-ECHILD)))
+ filp = path_openat(dfd, pathname, &nd, op, flags);
+ if (unlikely(filp == ERR_PTR(-ESTALE)))
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
+ return filp;
+}
+
+struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *name, const struct open_flags *op, int flags)
+{
+ struct nameidata nd;
+ struct file *file;
+
+ nd.root.mnt = mnt;
+ nd.root.dentry = dentry;
+
+ flags |= LOOKUP_ROOT;
+
+ if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+ return ERR_PTR(-ELOOP);
+
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(file == ERR_PTR(-ECHILD)))
+ file = path_openat(-1, name, &nd, op, flags);
+ if (unlikely(file == ERR_PTR(-ESTALE)))
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
+ return file;
}
-EXPORT_SYMBOL(filp_open);
/**
* lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3120,7 +2918,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
return error;
mutex_lock(&inode->i_mutex);
- error = dir->i_op->link(old_dentry, dir, new_dentry);
+ /* Make sure we don't allow creating hardlink to an unlinked file */
+ if (inode->i_nlink == 0)
+ error = -ENOENT;
+ else
+ error = dir->i_op->link(old_dentry, dir, new_dentry);
mutex_unlock(&inode->i_mutex);
if (!error)
fsnotify_link(dir, inode, new_dentry);
@@ -3142,15 +2944,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
struct dentry *new_dentry;
struct nameidata nd;
struct path old_path;
+ int how = 0;
int error;
char *to;
- if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
+ if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
return -EINVAL;
+ /*
+ * To use null names we require CAP_DAC_READ_SEARCH
+ * This ensures that not everyone will be able to create
+ * handlink using the passed filedescriptor.
+ */
+ if (flags & AT_EMPTY_PATH) {
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return -ENOENT;
+ how = LOOKUP_EMPTY;
+ }
+
+ if (flags & AT_SYMLINK_FOLLOW)
+ how |= LOOKUP_FOLLOW;
- error = user_path_at(olddfd, oldname,
- flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
- &old_path);
+ error = user_path_at(olddfd, oldname, how, &old_path);
if (error)
return error;
@@ -3587,7 +3401,7 @@ EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(__page_symlink);
EXPORT_SYMBOL(page_symlink);
EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(path_lookup);
+EXPORT_SYMBOL(kern_path_parent);
EXPORT_SYMBOL(kern_path);
EXPORT_SYMBOL(vfs_path_lookup);
EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b953..dffe6f4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
.show = show_vfsmnt
};
+static int uuid_is_nil(u8 *uuid)
+{
+ int i;
+ u8 *cp = (u8 *)uuid;
+
+ for (i = 0; i < 16; i++) {
+ if (*cp++)
+ return 0;
+ }
+ return 1;
+}
+
static int show_mountinfo(struct seq_file *m, void *v)
{
struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
if (IS_MNT_UNBINDABLE(mnt))
seq_puts(m, " unbindable");
+ if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
+ /* print the uuid */
+ seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
+
/* Filesystem specific data */
seq_puts(m, " - ");
show_type(m, sb);
@@ -1244,7 +1260,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
*/
br_write_lock(vfsmount_lock);
if (mnt_get_count(mnt) != 2) {
- br_write_lock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
return -EBUSY;
}
br_write_unlock(vfsmount_lock);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 1990165..e3d2942 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -135,33 +135,6 @@ out_err:
#if defined(CONFIG_NFS_V4_1)
/*
- * * CB_SEQUENCE operations will fail until the callback sessionid is set.
- * */
-int nfs4_set_callback_sessionid(struct nfs_client *clp)
-{
- struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
- struct nfs4_sessionid *bc_sid;
-
- if (!serv->sv_bc_xprt)
- return -EINVAL;
-
- /* on success freed in xprt_free */
- bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
- if (!bc_sid)
- return -ENOMEM;
- memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
- NFS4_MAX_SESSIONID_LEN);
- spin_lock_bh(&serv->sv_cb_lock);
- serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
- spin_unlock_bh(&serv->sv_cb_lock);
- dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
- ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
- ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
- serv->sv_bc_xprt);
- return 0;
-}
-
-/*
* The callback service for NFSv4.1 callbacks
*/
static int
@@ -266,10 +239,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
struct nfs_callback_data *cb_info)
{
}
-int nfs4_set_callback_sessionid(struct nfs_client *clp)
-{
- return 0;
-}
#endif /* CONFIG_NFS_V4_1 */
/*
@@ -359,78 +328,58 @@ void nfs_callback_down(int minorversion)
mutex_unlock(&nfs_callback_mutex);
}
-static int check_gss_callback_principal(struct nfs_client *clp,
- struct svc_rqst *rqstp)
+/* Boolean check of RPC_AUTH_GSS principal */
+int
+check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
{
struct rpc_clnt *r = clp->cl_rpcclient;
char *p = svc_gss_principal(rqstp);
+ if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
+ return 1;
+
/* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
if (clp->cl_minorversion != 0)
- return SVC_DROP;
+ return 0;
/*
* It might just be a normal user principal, in which case
* userspace won't bother to tell us the name at all.
*/
if (p == NULL)
- return SVC_DENIED;
+ return 0;
/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
if (memcmp(p, "nfs@", 4) != 0)
- return SVC_DENIED;
+ return 0;
p += 4;
if (strcmp(p, r->cl_server) != 0)
- return SVC_DENIED;
- return SVC_OK;
+ return 0;
+ return 1;
}
-/* pg_authenticate method helper */
-static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
-{
- struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
- int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
-
- dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
- if (svc_is_backchannel(rqstp))
- /* Sessionid (usually) set after CB_NULL ping */
- return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
- is_cb_compound);
- else
- /* No callback identifier in pg_authenticate */
- return nfs4_find_client_no_ident(svc_addr(rqstp));
-}
-
-/* pg_authenticate method for nfsv4 callback threads. */
+/*
+ * pg_authenticate method for nfsv4 callback threads.
+ *
+ * The authflavor has been negotiated, so an incorrect flavor is a server
+ * bug. Drop packets with incorrect authflavor.
+ *
+ * All other checking done after NFS decoding where the nfs_client can be
+ * found in nfs4_callback_compound
+ */
static int nfs_callback_authenticate(struct svc_rqst *rqstp)
{
- struct nfs_client *clp;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
- int ret = SVC_OK;
-
- /* Don't talk to strangers */
- clp = nfs_cb_find_client(rqstp);
- if (clp == NULL)
- return SVC_DROP;
-
- dprintk("%s: %s NFSv4 callback!\n", __func__,
- svc_print_addr(rqstp, buf, sizeof(buf)));
-
switch (rqstp->rq_authop->flavour) {
- case RPC_AUTH_NULL:
- if (rqstp->rq_proc != CB_NULL)
- ret = SVC_DENIED;
- break;
- case RPC_AUTH_UNIX:
- break;
- case RPC_AUTH_GSS:
- ret = check_gss_callback_principal(clp, rqstp);
- break;
- default:
- ret = SVC_DENIED;
+ case RPC_AUTH_NULL:
+ if (rqstp->rq_proc != CB_NULL)
+ return SVC_DROP;
+ break;
+ case RPC_AUTH_GSS:
+ /* No RPC_AUTH_GSS support yet in NFSv4.1 */
+ if (svc_is_backchannel(rqstp))
+ return SVC_DROP;
}
- nfs_put_client(clp);
- return ret;
+ return SVC_OK;
}
/*
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d3b44f9..46d93ce 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -7,6 +7,7 @@
*/
#ifndef __LINUX_FS_NFS_CALLBACK_H
#define __LINUX_FS_NFS_CALLBACK_H
+#include <linux/sunrpc/svc.h>
#define NFS4_CALLBACK 0x40000000
#define NFS4_CALLBACK_XDRSIZE 2048
@@ -37,7 +38,6 @@ enum nfs4_callback_opnum {
struct cb_process_state {
__be32 drc_status;
struct nfs_client *clp;
- struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */
};
struct cb_compound_hdr_arg {
@@ -168,7 +168,7 @@ extern unsigned nfs4_callback_layoutrecall(
extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
extern void nfs4_cb_take_slot(struct nfs_client *clp);
#endif /* CONFIG_NFS_V4_1 */
-
+extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
struct cb_getattrres *res,
struct cb_process_state *cps);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 4bb91cb..8958757 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -373,17 +373,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
{
struct nfs_client *clp;
int i;
- __be32 status;
+ __be32 status = htonl(NFS4ERR_BADSESSION);
cps->clp = NULL;
- status = htonl(NFS4ERR_BADSESSION);
- /* Incoming session must match the callback session */
- if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
- goto out;
-
- clp = nfs4_find_client_sessionid(args->csa_addr,
- &args->csa_sessionid, 1);
+ clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
if (clp == NULL)
goto out;
@@ -414,9 +408,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
nfs4_cb_take_slot(clp);
- cps->clp = clp; /* put in nfs4_callback_compound */
out:
+ cps->clp = clp; /* put in nfs4_callback_compound */
for (i = 0; i < args->csa_nrclists; i++)
kfree(args->csa_rclists[i].rcl_refcalls);
kfree(args->csa_rclists);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23112c2..14e0f93 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -794,10 +794,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
if (hdr_arg.minorversion == 0) {
cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
- if (!cps.clp)
+ if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
return rpc_drop_reply;
- } else
- cps.svc_sid = bc_xprt_sid(rqstp);
+ }
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 192f2f8..bd3ca32 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1206,16 +1206,11 @@ nfs4_find_client_ident(int cb_ident)
* For CB_COMPOUND calls, find a client by IP address, protocol version,
* minorversion, and sessionID
*
- * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
- * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
- * can arrive before the callback sessionid is set. For CB_NULL calls,
- * find a client by IP address protocol version, and minorversion.
- *
* Returns NULL if no such client
*/
struct nfs_client *
nfs4_find_client_sessionid(const struct sockaddr *addr,
- struct nfs4_sessionid *sid, int is_cb_compound)
+ struct nfs4_sessionid *sid)
{
struct nfs_client *clp;
@@ -1227,9 +1222,9 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
if (!nfs4_has_session(clp))
continue;
- /* Match sessionid unless cb_null call*/
- if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
- sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
+ /* Match sessionid*/
+ if (memcmp(clp->cl_session->sess_id.data,
+ sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
continue;
atomic_inc(&clp->cl_count);
@@ -1244,7 +1239,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
struct nfs_client *
nfs4_find_client_sessionid(const struct sockaddr *addr,
- struct nfs4_sessionid *sid, int is_cb_compound)
+ struct nfs4_sessionid *sid)
{
return NULL;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 364e432..bbbc6bf 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -23,8 +23,6 @@
static void nfs_do_free_delegation(struct nfs_delegation *delegation)
{
- if (delegation->cred)
- put_rpccred(delegation->cred);
kfree(delegation);
}
@@ -37,6 +35,10 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
+ if (delegation->cred) {
+ put_rpccred(delegation->cred);
+ delegation->cred = NULL;
+ }
call_rcu(&delegation->rcu, nfs_free_delegation_callback);
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e6ace0d..9943a75 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len;
}
+ /*
+ * If no bytes were started, return the error, and let the
+ * generic layer handle the completion.
+ */
+ if (requested_bytes == 0) {
+ nfs_direct_req_release(dreq);
+ return result < 0 ? result : -EIO;
+ }
+
if (put_dreq(dreq))
nfs_direct_complete(dreq);
-
- if (requested_bytes != 0)
- return 0;
-
- if (result < 0)
- return result;
- return -EIO;
+ return 0;
}
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
@@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len;
}
+ /*
+ * If no bytes were started, return the error, and let the
+ * generic layer handle the completion.
+ */
+ if (requested_bytes == 0) {
+ nfs_direct_req_release(dreq);
+ return result < 0 ? result : -EIO;
+ }
+
if (put_dreq(dreq))
nfs_direct_write_complete(dreq, dreq->inode);
-
- if (requested_bytes != 0)
- return 0;
-
- if (result < 0)
- return result;
- return -EIO;
+ return 0;
}
static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d851242..01768e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
*/
u64 nfs_compat_user_ino64(u64 fileid)
{
- int ino;
+#ifdef CONFIG_COMPAT
+ compat_ulong_t ino;
+#else
+ unsigned long ino;
+#endif
if (enable_ino64)
return fileid;
@@ -881,9 +886,10 @@ out:
return ret;
}
-static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long ret = 0;
if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -891,25 +897,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->change_attr = fattr->change_attr;
if (S_ISDIR(inode->i_mode))
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ ret |= NFS_INO_INVALID_ATTR;
}
/* If we have atomic WCC data, we may update some attributes */
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
- && timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
- memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+ && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
+ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+ ret |= NFS_INO_INVALID_ATTR;
+ }
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
&& timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
- memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
- if (S_ISDIR(inode->i_mode))
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+ if (S_ISDIR(inode->i_mode))
+ nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ ret |= NFS_INO_INVALID_ATTR;
}
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
- && nfsi->npages == 0)
- i_size_write(inode, nfs_size_to_loff_t(fattr->size));
+ && nfsi->npages == 0) {
+ i_size_write(inode, nfs_size_to_loff_t(fattr->size));
+ ret |= NFS_INO_INVALID_ATTR;
+ }
+ return ret;
}
/**
@@ -1223,7 +1236,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
| NFS_INO_REVAL_PAGECACHE);
/* Do atomic weak cache consistency updates */
- nfs_wcc_update_inode(inode, fattr);
+ invalid |= nfs_wcc_update_inode(inode, fattr);
/* More cache consistency checks */
if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
@@ -1505,7 +1518,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4644f04..cf9fdbd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -133,8 +133,7 @@ extern void nfs_put_client(struct nfs_client *);
extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
extern struct nfs_client *nfs4_find_client_ident(int);
extern struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
- int);
+nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *);
extern struct nfs_server *nfs_create_server(
const struct nfs_parsed_mount_data *,
struct nfs_fh *);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9f88c5f..2743427 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
goto out;
- /* We are doing this here, because XDR marshalling can only
- return -ENOMEM. */
+ /* We are doing this here because XDR marshalling does not
+ * return any results, it BUGs. */
status = -ENOSPC;
if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
goto out;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 01c5e8b..183c6b1 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1328,10 +1328,13 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
encode_nfs_fh3(xdr, NFS_FH(args->inode));
encode_uint32(xdr, args->mask);
+
+ base = req->rq_slen;
if (args->npages != 0)
xdr_write_pages(xdr, args->pages, 0, args->len);
+ else
+ xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
- base = req->rq_slen;
error = nfsacl_encode(xdr->buf, base, args->inode,
(args->mask & NFS_ACL) ?
args->acl_access : NULL, 1, 0);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a74740..1be36cf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+#else
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
-extern void nfs4_schedule_state_recovery(struct nfs_client *);
+extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
-extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
-extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
+extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 51fe64ac..b73c343 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -214,17 +214,26 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
/* ipv6 length plus port is legal */
if (rlen > INET6_ADDRSTRLEN + 8) {
- dprintk("%s Invalid address, length %d\n", __func__,
+ dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
goto out_err;
}
buf = kmalloc(rlen + 1, GFP_KERNEL);
+ if (!buf) {
+ dprintk("%s: Not enough memory\n", __func__);
+ goto out_err;
+ }
buf[rlen] = '\0';
memcpy(buf, r_addr, rlen);
/* replace the port dots with dashes for the in4_pton() delimiter*/
for (i = 0; i < 2; i++) {
char *res = strrchr(buf, '.');
+ if (!res) {
+ dprintk("%s: Failed finding expected dots in port\n",
+ __func__);
+ goto out_free;
+ }
*res = '-';
}
@@ -240,7 +249,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
port = htons((tmp[0] << 8) | (tmp[1]));
ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
- dprintk("%s Decoded address and port %s\n", __func__, buf);
+ dprintk("%s: Decoded address and port %s\n", __func__, buf);
out_free:
kfree(buf);
out_err:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9d992b0..0a07e35 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -50,6 +50,7 @@
#include <linux/module.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/xattr.h>
+#include <linux/utsname.h>
#include "nfs4_fs.h"
#include "delegation.h"
@@ -255,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -271,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -294,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
-do_state_recovery:
- nfs4_schedule_state_recovery(clp);
+wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
if (ret == 0)
exception->retry = 1;
@@ -434,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
clp = res->sr_session->clp;
do_renew_lease(clp, timestamp);
/* Check sequence flags */
- if (atomic_read(&clp->cl_count) > 1)
- nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+ if (res->sr_status_flags != 0)
+ nfs4_schedule_lease_recovery(clp);
break;
case -NFS4ERR_DELAY:
/* The server detected a resend of the RPC call and
@@ -1254,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(
- server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
/* Don't recall a delegation if it was lost */
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_lease_recovery(server->nfs_client);
goto out;
case -ERESTARTSYS:
/*
@@ -1270,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
*/
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
* User RPCSEC_GSS context has expired.
@@ -1586,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
!test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
break;
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
ret = -EIO;
}
return ret;
@@ -3177,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
return;
}
do_renew_lease(clp, timestamp);
@@ -3251,6 +3251,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
}
}
+static int buf_to_pages_noslab(const void *buf, size_t buflen,
+ struct page **pages, unsigned int *pgbase)
+{
+ struct page *newpage, **spages;
+ int rc = 0;
+ size_t len;
+ spages = pages;
+
+ do {
+ len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
+ newpage = alloc_page(GFP_KERNEL);
+
+ if (newpage == NULL)
+ goto unwind;
+ memcpy(page_address(newpage), buf, len);
+ buf += len;
+ buflen -= len;
+ *pages++ = newpage;
+ rc++;
+ } while (buflen != 0);
+
+ return rc;
+
+unwind:
+ for(; rc > 0; rc--)
+ __free_page(spages[rc-1]);
+ return -ENOMEM;
+}
+
struct nfs4_cached_acl {
int cached;
size_t len;
@@ -3419,13 +3448,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
.rpc_argp = &arg,
.rpc_resp = &res,
};
- int ret;
+ int ret, i;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
+ i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+ if (i < 0)
+ return i;
nfs_inode_return_delegation(inode);
- buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+
+ /*
+ * Free each page after tx, so the only ref left is
+ * held by the network stack
+ */
+ for (; i > 0; i--)
+ put_page(pages[i-1]);
+
/*
* Acl update can result in inode attribute update.
* so mark the attribute cache invalid.
@@ -3463,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3479,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3496,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-do_state_recovery:
+wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
- nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
task->tk_status = 0;
@@ -4109,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("%s: cancelling lock!\n", __func__);
} else
nfs_free_seqid(data->arg.lock_seqid);
@@ -4133,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_state *state = lsp->ls_state;
-
switch (error) {
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_EXPIRED:
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
if (new_lock_owner != 0 ||
(lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_nograce(clp, state);
- lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ nfs4_schedule_stateid_recovery(server, lsp->ls_state);
break;
case -NFS4ERR_STALE_STATEID:
- if (new_lock_owner != 0 ||
- (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_reboot(clp, state);
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
};
}
@@ -4365,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_EXPIRED:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ goto out;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -ERESTARTSYS:
/*
@@ -4380,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
err = 0;
goto out;
case -EKEYEXPIRED:
@@ -4572,27 +4608,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
*p = htonl((u32)clp->cl_boot_time.tv_nsec);
args.verifier = &verifier;
- while (1) {
- args.id_len = scnprintf(args.id, sizeof(args.id),
- "%s/%s %u",
- clp->cl_ipaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR),
- clp->cl_id_uniquifier);
-
- status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
-
- if (status != -NFS4ERR_CLID_INUSE)
- break;
-
- if (signalled())
- break;
-
- if (++clp->cl_id_uniquifier == 0)
- break;
- }
+ args.id_len = scnprintf(args.id, sizeof(args.id),
+ "%s/%s.%s/%u",
+ clp->cl_ipaddr,
+ init_utsname()->nodename,
+ init_utsname()->domainname,
+ clp->cl_rpcclient->cl_auth->au_flavor);
- status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+ if (!status)
+ status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
@@ -4998,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
int status;
unsigned *ptr;
struct nfs4_session *session = clp->cl_session;
+ long timeout = 0;
+ int err;
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ do {
+ status = _nfs4_proc_create_session(clp);
+ if (status == -NFS4ERR_DELAY) {
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ if (err)
+ status = err;
+ }
+ } while (status == -NFS4ERR_DELAY);
+
if (status)
goto out;
@@ -5110,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5197,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if (IS_ERR(task))
ret = PTR_ERR(task);
else
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
}
@@ -5213,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
}
ret = rpc_wait_for_completion_task(task);
- if (!ret)
+ if (!ret) {
+ struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
+
+ if (task->tk_status == 0)
+ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
ret = task->tk_status;
+ }
rpc_put_task(task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5251,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5319,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
status = PTR_ERR(task);
goto out;
}
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status == 0)
+ status = task->tk_status;
rpc_put_task(task);
return 0;
out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2336d53..0592288 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -232,12 +232,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
status = nfs4_proc_create_session(clp);
if (status != 0)
goto out;
- status = nfs4_set_callback_sessionid(clp);
- if (status != 0) {
- printk(KERN_WARNING "Sessionid not set. No callback service\n");
- nfs_callback_down(1);
- status = 0;
- }
nfs41_setup_state_renewal(clp);
nfs_mark_client_ready(clp, NFS_CS_READY);
out:
@@ -1013,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
}
/*
- * Schedule a state recovery attempt
+ * Schedule a lease recovery attempt
*/
-void nfs4_schedule_state_recovery(struct nfs_client *clp)
+void nfs4_schedule_lease_recovery(struct nfs_client *clp)
{
if (!clp)
return;
@@ -1024,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
-int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1038,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
return 1;
}
-int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1047,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
return 1;
}
+void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -1442,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
}
#ifdef CONFIG_NFS_V4_1
+void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+ nfs4_schedule_lease_recovery(session->clp);
+}
+
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1453,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
clp->cl_boot_time = CURRENT_TIME;
nfs4_state_start_reclaim_nograce(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1461,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1481,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2ab8e5c..94d50e8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
*p++ = cpu_to_be32(OP_CREATE_SESSION);
- p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ p = xdr_encode_hyper(p, clp->cl_clientid);
*p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
*p++ = cpu_to_be32(args->flags); /*flags */
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_ex_clid);
+ xdr_decode_hyper(p, &clp->cl_clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
@@ -6086,11 +6086,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
__be32 *p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
- if (!ntohl(*p++)) {
+ if (*p == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
- if (!ntohl(*p++))
+ if (*p == xdr_zero)
return -EAGAIN;
entry->eof = 1;
return -EBADCOOKIE;
@@ -6101,7 +6101,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
goto out_overflow;
entry->prev_cookie = entry->cookie;
p = xdr_decode_hyper(p, &entry->cookie);
- entry->len = ntohl(*p++);
+ entry->len = be32_to_cpup(p);
p = xdr_inline_decode(xdr, entry->len);
if (unlikely(!p))
@@ -6132,9 +6132,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
- if (verify_attr_len(xdr, p, len) < 0)
- goto out_overflow;
-
return 0;
out_overflow:
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a..c541093 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
+/* Default NFSROOT mount options. */
+#define NFS_DEF_OPTIONS "udp"
+
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
/* Text-based mount options passed to super.c */
-static char nfs_root_options[256] __initdata = "";
+static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
/* Address of NFS server */
static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
}
static int __init root_nfs_cat(char *dest, const char *src,
- const size_t destlen)
+ const size_t destlen)
{
+ size_t len = strlen(dest);
+
+ if (len && dest[len - 1] != ',')
+ if (strlcat(dest, ",", destlen) > destlen)
+ return -1;
+
if (strlcat(dest, src, destlen) > destlen)
return -1;
return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
if (root_nfs_cat(nfs_root_options, incoming,
sizeof(nfs_root_options)))
return -1;
-
- /*
- * Possibly prepare for more options to be appended
- */
- if (nfs_root_options[0] != '\0' &&
- nfs_root_options[strlen(nfs_root_options)] != ',')
- if (root_nfs_cat(nfs_root_options, ",",
- sizeof(nfs_root_options)))
- return -1;
-
return 0;
}
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
*/
static int __init root_nfs_data(char *cmdline)
{
- char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+ char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
int len, retval = -1;
char *tmp = NULL;
const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
* Append mandatory options for nfsroot so they override
* what has come before
*/
- snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+ snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
&servaddr);
- if (root_nfs_cat(nfs_root_options, addr_option,
+ if (root_nfs_cat(nfs_root_options, mand_options,
sizeof(nfs_root_options)))
goto out_optionstoolong;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bc40897..1b1bc1a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -951,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp)
{
struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
- dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
+ dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
int i;
/* Verify cache is empty */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51..6481d53 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
return 1;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 10d648ea..42b92d7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -932,7 +932,7 @@ out_bad:
while (!list_empty(&list)) {
data = list_entry(list.next, struct nfs_write_data, pages);
list_del(&data->pages);
- nfs_writedata_release(data);
+ nfs_writedata_free(data);
}
nfs_redirty_request(req);
return -ENOMEM;
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
+ if (how & FLUSH_SYNC)
+ rpc_wait_for_completion_task(task);
rpc_put_task(task);
return 0;
}
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index fc1c525..84c27d6 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -42,6 +42,11 @@ struct nfsacl_encode_desc {
gid_t gid;
};
+struct nfsacl_simple_acl {
+ struct posix_acl acl;
+ struct posix_acl_entry ace[4];
+};
+
static int
xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
{
@@ -72,9 +77,20 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
return 0;
}
-unsigned int
-nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
- struct posix_acl *acl, int encode_entries, int typeflag)
+/**
+ * nfsacl_encode - Encode an NFSv3 ACL
+ *
+ * @buf: destination xdr_buf to contain XDR encoded ACL
+ * @base: byte offset in xdr_buf where XDR'd ACL begins
+ * @inode: inode of file whose ACL this is
+ * @acl: posix_acl to encode
+ * @encode_entries: whether to encode ACEs as well
+ * @typeflag: ACL type: NFS_ACL_DEFAULT or zero
+ *
+ * Returns size of encoded ACL in bytes or a negative errno value.
+ */
+int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+ struct posix_acl *acl, int encode_entries, int typeflag)
{
int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
struct nfsacl_encode_desc nfsacl_desc = {
@@ -88,17 +104,22 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
.uid = inode->i_uid,
.gid = inode->i_gid,
};
+ struct nfsacl_simple_acl aclbuf;
int err;
- struct posix_acl *acl2 = NULL;
if (entries > NFS_ACL_MAX_ENTRIES ||
xdr_encode_word(buf, base, entries))
return -EINVAL;
if (encode_entries && acl && acl->a_count == 3) {
- /* Fake up an ACL_MASK entry. */
- acl2 = posix_acl_alloc(4, GFP_KERNEL);
- if (!acl2)
- return -ENOMEM;
+ struct posix_acl *acl2 = &aclbuf.acl;
+
+ /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is
+ * invoked in contexts where a memory allocation failure is
+ * fatal. Fortunately this fake ACL is small enough to
+ * construct on the stack. */
+ memset(acl2, 0, sizeof(acl2));
+ posix_acl_init(acl2, 4);
+
/* Insert entries in canonical order: other orders seem
to confuse Solaris VxFS. */
acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */
@@ -109,8 +130,6 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
nfsacl_desc.acl = acl2;
}
err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
- if (acl2)
- posix_acl_release(acl2);
if (!err)
err = 8 + nfsacl_desc.desc.elem_size *
nfsacl_desc.desc.array_len;
@@ -224,9 +243,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
return 0;
}
-unsigned int
-nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
- struct posix_acl **pacl)
+/**
+ * nfsacl_decode - Decode an NFSv3 ACL
+ *
+ * @buf: xdr_buf containing XDR'd ACL data to decode
+ * @base: byte offset in xdr_buf where XDR'd ACL begins
+ * @aclcnt: count of ACEs in decoded posix_acl
+ * @pacl: buffer in which to place decoded posix_acl
+ *
+ * Returns the length of the decoded ACL in bytes, or a negative errno value.
+ */
+int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+ struct posix_acl **pacl)
{
struct nfsacl_decode_desc nfsacl_desc = {
.desc = {
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd2..124e8fc 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
static struct file *do_open(char *name, int flags)
{
- struct nameidata nd;
struct vfsmount *mnt;
- int error;
+ struct file *file;
mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
if (IS_ERR(mnt))
return (struct file *)mnt;
- error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd);
- mntput(mnt); /* drop do_kern_mount reference */
- if (error)
- return ERR_PTR(error);
-
- if (flags == O_RDWR)
- error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
- else
- error = may_open(&nd.path, MAY_WRITE, flags);
+ file = file_open_root(mnt->mnt_root, mnt, name, flags);
- if (!error)
- return dentry_open(nd.path.dentry, nd.path.mnt, flags,
- current_cred());
-
- path_put(&nd.path);
- return ERR_PTR(error);
+ mntput(mnt); /* drop do_kern_mount reference */
+ return file;
}
static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3be975e..02eb4ed 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
@@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
out:
return status;
out_default:
- return nfs_cb_stat_to_errno(status);
+ return nfs_cb_stat_to_errno(nfserr);
}
/*
@@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
if (unlikely(status))
goto out;
if (unlikely(nfserr != NFS4_OK))
- goto out_default;
+ status = nfs_cb_stat_to_errno(nfserr);
out:
return status;
-out_default:
- return nfs_cb_stat_to_errno(status);
}
/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d98d021..7b566ec 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
dp->dl_client = clp;
get_nfs4_file(fp);
dp->dl_file = fp;
- dp->dl_vfs_file = find_readable_file(fp);
- get_file(dp->dl_vfs_file);
- dp->dl_flock = NULL;
dp->dl_type = type;
dp->dl_stateid.si_boot = boot_time;
dp->dl_stateid.si_stateownerid = current_delegid++;
@@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
dp->dl_time = 0;
atomic_set(&dp->dl_count, 1);
- list_add(&dp->dl_perfile, &fp->fi_delegations);
- list_add(&dp->dl_perclnt, &clp->cl_delegations);
INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
return dp;
}
@@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
if (atomic_dec_and_test(&dp->dl_count)) {
dprintk("NFSD: freeing dp %p\n",dp);
put_nfs4_file(dp->dl_file);
- fput(dp->dl_vfs_file);
kmem_cache_free(deleg_slab, dp);
num_delegations--;
}
}
-/* Remove the associated file_lock first, then remove the delegation.
- * lease_modify() is called to remove the FS_LEASE file_lock from
- * the i_flock list, eventually calling nfsd's lock_manager
- * fl_release_callback.
- */
-static void
-nfs4_close_delegation(struct nfs4_delegation *dp)
+static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
- dprintk("NFSD: close_delegation dp %p\n",dp);
- /* XXX: do we even need this check?: */
- if (dp->dl_flock)
- vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock);
+ if (atomic_dec_and_test(&fp->fi_delegees)) {
+ vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
+ fp->fi_lease = NULL;
+ fp->fi_deleg_file = NULL;
+ }
}
/* Called under the state lock. */
static void
unhash_delegation(struct nfs4_delegation *dp)
{
- list_del_init(&dp->dl_perfile);
list_del_init(&dp->dl_perclnt);
spin_lock(&recall_lock);
+ list_del_init(&dp->dl_perfile);
list_del_init(&dp->dl_recall_lru);
spin_unlock(&recall_lock);
- nfs4_close_delegation(dp);
+ nfs4_put_deleg_lease(dp->dl_file);
nfs4_put_delegation(dp);
}
@@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp)
spin_lock(&recall_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
- dp->dl_flock);
list_del_init(&dp->dl_perclnt);
list_move(&dp->dl_recall_lru, &reaplist);
}
@@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino)
fp->fi_inode = igrab(ino);
fp->fi_id = current_fileid++;
fp->fi_had_conflict = false;
+ fp->fi_lease = NULL;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
spin_lock(&recall_lock);
@@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
nfs4_file_put_access(fp, O_RDONLY);
}
-/*
- * Spawn a thread to perform a recall on the delegation represented
- * by the lease (file_lock)
- *
- * Called from break_lease() with lock_flocks() held.
- * Note: we assume break_lease will only call this *once* for any given
- * lease.
- */
-static
-void nfsd_break_deleg_cb(struct file_lock *fl)
+static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
- struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
-
- dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
- if (!dp)
- return;
-
/* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the kernel
@@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
* it's safe to take a reference: */
atomic_inc(&dp->dl_count);
- spin_lock(&recall_lock);
list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
- spin_unlock(&recall_lock);
/* only place dl_time is set. protected by lock_flocks*/
dp->dl_time = get_seconds();
+ nfsd4_cb_recall(dp);
+}
+
+/* Called from break_lease() with lock_flocks() held. */
+static void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+ struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
+ struct nfs4_delegation *dp;
+
+ BUG_ON(!fp);
+ /* We assume break_lease is only called once per lease: */
+ BUG_ON(fp->fi_had_conflict);
/*
* We don't want the locks code to timeout the lease for us;
- * we'll remove it ourself if the delegation isn't returned
- * in time.
+ * we'll remove it ourself if a delegation isn't returned
+ * in time:
*/
fl->fl_break_time = 0;
- dp->dl_file->fi_had_conflict = true;
- nfsd4_cb_recall(dp);
+ spin_lock(&recall_lock);
+ fp->fi_had_conflict = true;
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ nfsd_break_one_deleg(dp);
+ spin_unlock(&recall_lock);
}
static
@@ -2461,10 +2447,13 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
{
struct nfs4_delegation *dp;
- list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
- if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
+ spin_lock(&recall_lock);
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
+ spin_unlock(&recall_lock);
return dp;
- }
+ }
+ spin_unlock(&recall_lock);
return NULL;
}
@@ -2641,6 +2630,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
+static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
+{
+ struct file_lock *fl;
+
+ fl = locks_alloc_lock();
+ if (!fl)
+ return NULL;
+ locks_init_lock(fl);
+ fl->fl_lmops = &nfsd_lease_mng_ops;
+ fl->fl_flags = FL_LEASE;
+ fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_owner = (fl_owner_t)(dp->dl_file);
+ fl->fl_pid = current->tgid;
+ return fl;
+}
+
+static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
+{
+ struct nfs4_file *fp = dp->dl_file;
+ struct file_lock *fl;
+ int status;
+
+ fl = nfs4_alloc_init_lease(dp, flag);
+ if (!fl)
+ return -ENOMEM;
+ fl->fl_file = find_readable_file(fp);
+ list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
+ status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
+ if (status) {
+ list_del_init(&dp->dl_perclnt);
+ locks_free_lock(fl);
+ return -ENOMEM;
+ }
+ fp->fi_lease = fl;
+ fp->fi_deleg_file = fl->fl_file;
+ get_file(fp->fi_deleg_file);
+ atomic_set(&fp->fi_delegees, 1);
+ list_add(&dp->dl_perfile, &fp->fi_delegations);
+ return 0;
+}
+
+static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
+{
+ struct nfs4_file *fp = dp->dl_file;
+
+ if (!fp->fi_lease)
+ return nfs4_setlease(dp, flag);
+ spin_lock(&recall_lock);
+ if (fp->fi_had_conflict) {
+ spin_unlock(&recall_lock);
+ return -EAGAIN;
+ }
+ atomic_inc(&fp->fi_delegees);
+ list_add(&dp->dl_perfile, &fp->fi_delegations);
+ spin_unlock(&recall_lock);
+ list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
+ return 0;
+}
+
/*
* Attempt to hand out a delegation.
*/
@@ -2650,7 +2699,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
struct nfs4_delegation *dp;
struct nfs4_stateowner *sop = stp->st_stateowner;
int cb_up;
- struct file_lock *fl;
int status, flag = 0;
cb_up = nfsd4_cb_channel_good(sop->so_client);
@@ -2681,36 +2729,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
}
dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
- if (dp == NULL) {
- flag = NFS4_OPEN_DELEGATE_NONE;
- goto out;
- }
- status = -ENOMEM;
- fl = locks_alloc_lock();
- if (!fl)
- goto out;
- locks_init_lock(fl);
- fl->fl_lmops = &nfsd_lease_mng_ops;
- fl->fl_flags = FL_LEASE;
- fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
- fl->fl_end = OFFSET_MAX;
- fl->fl_owner = (fl_owner_t)dp;
- fl->fl_file = find_readable_file(stp->st_file);
- BUG_ON(!fl->fl_file);
- fl->fl_pid = current->tgid;
- dp->dl_flock = fl;
-
- /* vfs_setlease checks to see if delegation should be handed out.
- * the lock_manager callback fl_change is used
- */
- if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
- dprintk("NFSD: setlease failed [%d], no delegation\n", status);
- dp->dl_flock = NULL;
- locks_free_lock(fl);
- unhash_delegation(dp);
- flag = NFS4_OPEN_DELEGATE_NONE;
- goto out;
- }
+ if (dp == NULL)
+ goto out_no_deleg;
+ status = nfs4_set_delegation(dp, flag);
+ if (status)
+ goto out_free;
memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
@@ -2722,6 +2745,12 @@ out:
&& open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
dprintk("NFSD: WARNING: refusing delegation reclaim\n");
open->op_delegate_type = flag;
+ return;
+out_free:
+ nfs4_put_delegation(dp);
+out_no_deleg:
+ flag = NFS4_OPEN_DELEGATE_NONE;
+ goto out;
}
/*
@@ -2916,8 +2945,6 @@ nfs4_laundromat(void)
test_val = u;
break;
}
- dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
- dp, dp->dl_flock);
list_move(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&recall_lock);
@@ -3128,7 +3155,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
goto out;
renew_client(dp->dl_client);
if (filpp) {
- *filpp = find_readable_file(dp->dl_file);
+ *filpp = dp->dl_file->fi_deleg_file;
BUG_ON(!*filpp);
}
} else { /* open or lock stateid */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b..615f0a9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ_BUF(dummy32);
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
- if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
- goto out_nfserr;
+ if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+ return status;
iattr->ia_valid |= ATTR_UID;
}
if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ_BUF(dummy32);
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
- if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
- goto out_nfserr;
+ if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+ return status;
iattr->ia_valid |= ATTR_GID;
}
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
u32 dummy;
char *machine_name;
- int i;
+ int i, j;
int nr_secflavs;
READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(4);
READ32(dummy);
READ_BUF(dummy * 4);
- for (i = 0; i < dummy; ++i)
+ for (j = 0; j < dummy; ++j)
READ32(dummy);
break;
case RPC_AUTH_GSS:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3074656..2d31224 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -83,8 +83,6 @@ struct nfs4_delegation {
atomic_t dl_count; /* ref count */
struct nfs4_client *dl_client;
struct nfs4_file *dl_file;
- struct file *dl_vfs_file;
- struct file_lock *dl_flock;
u32 dl_type;
time_t dl_time;
/* For recall: */
@@ -379,6 +377,9 @@ struct nfs4_file {
*/
atomic_t fi_readers;
atomic_t fi_writers;
+ struct file *fi_deleg_file;
+ struct file_lock *fi_lease;
+ atomic_t fi_delegees;
struct inode *fi_inode;
u32 fi_id; /* used with stateowner->so_id
* for stateid_hashtbl hash */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 641117f..da1d970 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
if (ra->p_count == 0)
frap = rap;
}
- depth = nfsdstats.ra_size*11/10;
+ depth = nfsdstats.ra_size;
if (!frap) {
spin_unlock(&rab->pb_lock);
return NULL;
@@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
host_err = nfsd_break_lease(odentry->d_inode);
if (host_err)
goto out_drop_write;
+ if (ndentry->d_inode) {
+ host_err = nfsd_break_lease(ndentry->d_inode);
+ if (host_err)
+ goto out_drop_write;
+ }
+ if (host_err)
+ goto out_drop_write;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!host_err) {
host_err = commit_metadata(tfhp);
@@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
- goto out_nfserr;
+ goto out_put;
host_err = nfsd_break_lease(rdentry->d_inode);
if (host_err)
- goto out_put;
+ goto out_drop_write;
if (type != S_IFDIR)
host_err = vfs_unlink(dirp, rdentry);
else
host_err = vfs_rmdir(dirp, rdentry);
-out_put:
- dput(rdentry);
-
if (!host_err)
host_err = commit_metadata(fhp);
-
+out_drop_write:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
+out_put:
+ dput(rdentry);
+
out_nfserr:
err = nfserrno(host_err);
out:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8..85f7baa 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
#include "btnode.h"
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
- nilfs_mapping_init_once(btnc);
-}
-
static const struct address_space_operations def_btnode_aops = {
.sync_page = block_sync_page,
};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 7903749..1b8ebd8 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
-void nilfs_btnode_cache_init_once(struct address_space *);
void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a1..a0babd2 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct backing_dev_info *bdi = inode->i_sb->s_bdi;
INIT_LIST_HEAD(&shadow->frozen_buffers);
- nilfs_mapping_init_once(&shadow->frozen_data);
+ address_space_init_once(&shadow->frozen_data);
nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
- nilfs_mapping_init_once(&shadow->frozen_btnodes);
+ address_space_init_once(&shadow->frozen_btnodes);
nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
mi->mi_shadow = shadow;
return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 9803427..161791d 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inc_nlink(old_inode);
nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= NILFS_LINK_MAX)
goto out_dir;
}
- inc_nlink(old_inode);
err = nilfs_add_link(new_dentry, old_inode);
- if (err) {
- drop_nlink(old_inode);
- nilfs_mark_inode_dirty(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
nilfs_delete_entry(old_de, old_page);
- drop_nlink(old_inode);
if (dir_de) {
nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c43241..a585b35 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}
-void nilfs_mapping_init_once(struct address_space *mapping)
-{
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
-
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-}
-
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27..2a00953 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init_once(struct address_space *mapping);
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5..2de9f63 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
nilfs_segctor_map_segsum_entry(
sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
- if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
+ if (NILFS_I(inode)->i_root &&
+ !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
/* skip finfo */
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0994f6a7..1673b3d 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -704,7 +704,8 @@ skip_mount_setup:
sbp[0]->s_state =
cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
/* synchronize sbp[1] with sbp[0] */
- memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
+ if (sbp[1])
+ memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
}
@@ -1278,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
#ifdef CONFIG_NILFS_XATTR
init_rwsem(&ii->xattr_sem);
#endif
- nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+ address_space_init_once(&ii->i_btnode_cache);
ii->i_bmap = &ii->i_bmap_data;
inode_init_once(&ii->vfs_inode);
}
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b572b67..326e747 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
/**
* mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2006 Anton Altaparmakov
+ * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -2576,6 +2576,8 @@ mft_rec_already_initialized:
flush_dcache_page(page);
SetPageUptodate(page);
if (base_ni) {
+ MFT_RECORD *m_tmp;
+
/*
* Setup the base mft record in the extent mft record. This
* completes initialization of the allocated extent mft record
@@ -2588,11 +2590,11 @@ mft_rec_already_initialized:
* attach it to the base inode @base_ni and map, pin, and lock
* its, i.e. the allocated, mft record.
*/
- m = map_extent_mft_record(base_ni, bit, &ni);
- if (IS_ERR(m)) {
+ m_tmp = map_extent_mft_record(base_ni, bit, &ni);
+ if (IS_ERR(m_tmp)) {
ntfs_error(vol->sb, "Failed to map allocated extent "
"mft record 0x%llx.", (long long)bit);
- err = PTR_ERR(m);
+ err = PTR_ERR(m_tmp);
/* Set the mft record itself not in use. */
m->flags &= cpu_to_le16(
~le16_to_cpu(MFT_RECORD_IN_USE));
@@ -2603,6 +2605,7 @@ mft_rec_already_initialized:
ntfs_unmap_page(page);
goto undo_mftbmp_alloc;
}
+ BUG_ON(m != m_tmp);
/*
* Make sure the allocated mft record is written out to disk.
* No need to set the inode dirty because the caller is going
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc..7eb9040 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
int ret = 0; /* if all else fails, just return false */
struct ocfs2_super *osb;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc306..254652a 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
dentry->d_name.len, dentry->d_name.name,
fh, len, connectable);
- if (len < 3 || (connectable && len < 6)) {
- mlog(ML_ERROR, "fh buffer is too small for encoding\n");
+ if (connectable && (len < 6)) {
+ *max_len = 6;
+ type = 255;
+ goto bail;
+ } else if (len < 3) {
+ *max_len = 3;
type = 255;
goto bail;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b9..6180da1 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
ocfs2_quota_trans_credits(sb);
}
-/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
- * bitmap block for the new bit) dx_root update for free list */
-#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
+/* data block for new dir/symlink, allocation of directory block, dx_root
+ * update for free list */
+#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 196fcb5..d5ab56c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot);
extern const struct dquot_operations ocfs2_quota_operations;
extern struct quota_format_type ocfs2_quota_format;
-int ocfs2_quota_setup(void);
-void ocfs2_quota_shutdown(void);
-
#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923..a73f641 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -63,8 +63,6 @@
* write to gf
*/
-static struct workqueue_struct *ocfs2_quota_wq = NULL;
-
static void qsync_work_fn(struct work_struct *work);
static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
OCFS2_QBLK_RESERVED_SPACE;
oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
- queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- msecs_to_jiffies(oinfo->dqi_syncms));
+ schedule_delayed_work(&oinfo->dqi_sync_work,
+ msecs_to_jiffies(oinfo->dqi_syncms));
out_err:
mlog_exit(status);
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work)
struct super_block *sb = oinfo->dqi_gqinode->i_sb;
dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
- queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- msecs_to_jiffies(oinfo->dqi_syncms));
+ schedule_delayed_work(&oinfo->dqi_sync_work,
+ msecs_to_jiffies(oinfo->dqi_syncms));
}
/*
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = {
.alloc_dquot = ocfs2_alloc_dquot,
.destroy_dquot = ocfs2_destroy_dquot,
};
-
-int ocfs2_quota_setup(void)
-{
- ocfs2_quota_wq = create_workqueue("o2quot");
- if (!ocfs2_quota_wq)
- return -ENOMEM;
- return 0;
-}
-
-void ocfs2_quota_shutdown(void)
-{
- if (ocfs2_quota_wq) {
- flush_workqueue(ocfs2_quota_wq);
- destroy_workqueue(ocfs2_quota_wq);
- ocfs2_quota_wq = NULL;
- }
-}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160..29623da 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
u32 num_clusters, unsigned int e_flags)
{
int ret, delete, index, credits = 0;
- u32 new_bit, new_len;
+ u32 new_bit, new_len, orig_num_clusters;
unsigned int set_len;
struct ocfs2_super *osb = OCFS2_SB(sb);
handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
goto out;
}
+ orig_num_clusters = num_clusters;
+
while (num_clusters) {
ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
* in write-back mode.
*/
if (context->get_clusters == ocfs2_di_get_clusters) {
- ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+ ret = ocfs2_cow_sync_writeback(sb, context, cpos,
+ orig_num_clusters);
if (ret)
mlog_errno(ret);
}
@@ -4376,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path,
if (IS_ERR(s))
return PTR_ERR(s);
- error = path_lookup(s, LOOKUP_PARENT, nd);
+ error = kern_path_parent(s, nd);
if (error)
putname(s);
else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 06d1f74..236ed1b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -993,8 +993,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
}
/* Handle quota on quotactl */
-static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
- char *path)
+static int ocfs2_quota_on(struct super_block *sb, int type, int format_id)
{
unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
@@ -1013,7 +1012,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type)
}
static const struct quotactl_ops ocfs2_quotactl_ops = {
- .quota_on = ocfs2_quota_on,
+ .quota_on_meta = ocfs2_quota_on,
.quota_off = ocfs2_quota_off,
.quota_sync = dquot_quota_sync,
.get_info = dquot_get_dqinfo,
@@ -1317,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
struct mount_options *mopt,
int is_remount)
{
- int status;
+ int status, user_stack = 0;
char *p;
u32 tmp;
@@ -1460,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
memcpy(mopt->cluster_stack, args[0].from,
OCFS2_STACK_LABEL_LEN);
mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ /*
+ * Open code the memcmp here as we don't have
+ * an osb to pass to
+ * ocfs2_userspace_stack().
+ */
+ if (memcmp(mopt->cluster_stack,
+ OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ user_stack = 1;
break;
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1515,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
}
}
- /* Ensure only one heartbeat mode */
- tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
- OCFS2_MOUNT_HB_NONE);
- if (hweight32(tmp) != 1) {
- mlog(ML_ERROR, "Invalid heartbeat mount options\n");
- status = 0;
- goto bail;
+ if (user_stack == 0) {
+ /* Ensure only one heartbeat mode */
+ tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
+ OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE);
+ if (hweight32(tmp) != 1) {
+ mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+ status = 0;
+ goto bail;
+ }
}
status = 1;
@@ -1646,16 +1657,11 @@ static int __init ocfs2_init(void)
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
}
- status = ocfs2_quota_setup();
- if (status)
- goto leave;
-
ocfs2_set_locking_protocol();
status = register_quota_format(&ocfs2_quota_format);
leave:
if (status < 0) {
- ocfs2_quota_shutdown();
ocfs2_free_mem_caches();
exit_ocfs2_uptodate_cache();
}
@@ -1672,8 +1678,6 @@ static void __exit ocfs2_exit(void)
{
mlog_entry_void();
- ocfs2_quota_shutdown();
-
if (ocfs2_wq) {
flush_workqueue(ocfs2_wq);
destroy_workqueue(ocfs2_wq);
diff --git a/fs/open.c b/fs/open.c
index e52389e..3cac0bd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
+
+ /* It's not possible punch hole on append only file */
+ if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+ return -EPERM;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
/*
* Revalidate the write permissions, in case security policy has
* changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
{
struct path path;
int error = -EINVAL;
- int follow;
+ int lookup_flags;
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
goto out;
- follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
- error = user_path_at(dfd, filename, follow, &path);
+ lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
goto out;
error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int (*open)(struct inode *, struct file *),
const struct cred *cred)
{
+ static const struct file_operations empty_fops = {};
struct inode *inode;
int error;
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
+
+ if (unlikely(f->f_flags & O_PATH))
+ f->f_mode = FMODE_PATH;
+
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_path.dentry = dentry;
f->f_path.mnt = mnt;
f->f_pos = 0;
- f->f_op = fops_get(inode->i_fop);
file_sb_list_add(f, inode->i_sb);
+ if (unlikely(f->f_mode & FMODE_PATH)) {
+ f->f_op = &empty_fops;
+ return f;
+ }
+
+ f->f_op = fops_get(inode->i_fop);
+
error = security_dentry_open(f, cred);
if (error)
goto cleanup_all;
@@ -790,6 +811,8 @@ struct file *nameidata_to_filp(struct nameidata *nd)
/* Pick up the filp from the open intent */
filp = nd->intent.open.file;
+ nd->intent.open.file = NULL;
+
/* Has the filesystem initialised the file for us? */
if (filp->f_path.dentry == NULL) {
path_get(&nd->path);
@@ -880,15 +903,110 @@ void fd_install(unsigned int fd, struct file *file)
EXPORT_SYMBOL(fd_install);
+static inline int build_open_flags(int flags, int mode, struct open_flags *op)
+{
+ int lookup_flags = 0;
+ int acc_mode;
+
+ if (!(flags & O_CREAT))
+ mode = 0;
+ op->mode = mode;
+
+ /* Must never be set by userspace */
+ flags &= ~FMODE_NONOTIFY;
+
+ /*
+ * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
+ * check for O_DSYNC if the need any syncing at all we enforce it's
+ * always set instead of having to deal with possibly weird behaviour
+ * for malicious applications setting only __O_SYNC.
+ */
+ if (flags & __O_SYNC)
+ flags |= O_DSYNC;
+
+ /*
+ * If we have O_PATH in the open flag. Then we
+ * cannot have anything other than the below set of flags
+ */
+ if (flags & O_PATH) {
+ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
+ acc_mode = 0;
+ } else {
+ acc_mode = MAY_OPEN | ACC_MODE(flags);
+ }
+
+ op->open_flag = flags;
+
+ /* O_TRUNC implies we need access checks for write permissions */
+ if (flags & O_TRUNC)
+ acc_mode |= MAY_WRITE;
+
+ /* Allow the LSM permission hook to distinguish append
+ access from general write access. */
+ if (flags & O_APPEND)
+ acc_mode |= MAY_APPEND;
+
+ op->acc_mode = acc_mode;
+
+ op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
+
+ if (flags & O_CREAT) {
+ op->intent |= LOOKUP_CREATE;
+ if (flags & O_EXCL)
+ op->intent |= LOOKUP_EXCL;
+ }
+
+ if (flags & O_DIRECTORY)
+ lookup_flags |= LOOKUP_DIRECTORY;
+ if (!(flags & O_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
+ return lookup_flags;
+}
+
+/**
+ * filp_open - open file and return file pointer
+ *
+ * @filename: path to open
+ * @flags: open flags as per the open(2) second argument
+ * @mode: mode for the new file if O_CREAT is set, else ignored
+ *
+ * This is the helper to open a file from kernelspace if you really
+ * have to. But in generally you should not do this, so please move
+ * along, nothing to see here..
+ */
+struct file *filp_open(const char *filename, int flags, int mode)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
+ return do_filp_open(AT_FDCWD, filename, &op, lookup);
+}
+EXPORT_SYMBOL(filp_open);
+
+struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *filename, int flags)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, 0, &op);
+ if (flags & O_CREAT)
+ return ERR_PTR(-EINVAL);
+ if (!filename && (flags & O_DIRECTORY))
+ if (!dentry->d_inode->i_op->lookup)
+ return ERR_PTR(-ENOTDIR);
+ return do_file_open_root(dentry, mnt, filename, &op, lookup);
+}
+EXPORT_SYMBOL(file_open_root);
+
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd_flags(flags);
if (fd >= 0) {
- struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
+ struct file *f = do_filp_open(dfd, tmp, &op, lookup);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
@@ -958,8 +1076,10 @@ int filp_close(struct file *filp, fl_owner_t id)
if (filp->f_op && filp->f_op->flush)
retval = filp->f_op->flush(filp, id);
- dnotify_flush(filp, id);
- locks_remove_posix(filp, id);
+ if (likely(!(filp->f_mode & FMODE_PATH))) {
+ dnotify_flush(filp, id);
+ locks_remove_posix(filp, id);
+ }
fput(filp);
return retval;
}
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625..b10e354 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
}
vm->vblk_size = get_unaligned_be32(data + 0x08);
+ if (vm->vblk_size == 0) {
+ ldm_error ("Illegal VBLK size");
+ return false;
+ }
+
vm->vblk_offset = get_unaligned_be32(data + 0x0C);
vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a21..11f688b 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
int mac_partition(struct parsed_partitions *state)
{
- int slot = 1;
Sector sect;
unsigned char *data;
- int blk, blocks_in_map;
+ int slot, blocks_in_map;
unsigned secsize;
#ifdef CONFIG_PPC_PMAC
int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
put_dev_sector(sect);
return 0; /* not a MacOS disk */
}
- strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
blocks_in_map = be32_to_cpu(part->map_count);
- for (blk = 1; blk <= blocks_in_map; ++blk) {
- int pos = blk * secsize;
+ if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
+ for (slot = 1; slot <= blocks_in_map; ++slot) {
+ int pos = slot * secsize;
put_dev_sector(sect);
data = read_part_sector(state, pos/512, &sect);
if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
}
if (goodness > found_root_goodness) {
- found_root = blk;
+ found_root = slot;
found_root_goodness = goodness;
}
}
#endif /* CONFIG_PPC_PMAC */
-
- ++slot;
}
#ifdef CONFIG_PPC_PMAC
if (found_root_goodness)
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7c..764b86a 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
#include "check.h"
#include "osf.h"
+#define MAX_OSF_PARTITIONS 18
+
int osf_partition(struct parsed_partitions *state)
{
int i;
int slot = 1;
+ unsigned int npartitions;
Sector sect;
unsigned char *data;
struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
u8 p_fstype;
u8 p_frag;
__le16 p_cpg;
- } d_partitions[8];
+ } d_partitions[MAX_OSF_PARTITIONS];
} * label;
struct d_partition * partition;
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
put_dev_sector(sect);
return 0;
}
- for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) {
+ npartitions = le16_to_cpu(label->d_npartitions);
+ if (npartitions > MAX_OSF_PARTITIONS) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ for (i = 0 ; i < npartitions; i++, partition++) {
if (slot == state->limit)
break;
if (le32_to_cpu(partition->p_size))
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 39df95a..b1cf6bf 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,6 +22,7 @@
#include <linux/errno.h>
+EXPORT_SYMBOL(posix_acl_init);
EXPORT_SYMBOL(posix_acl_alloc);
EXPORT_SYMBOL(posix_acl_clone);
EXPORT_SYMBOL(posix_acl_valid);
@@ -32,6 +33,16 @@ EXPORT_SYMBOL(posix_acl_chmod_masq);
EXPORT_SYMBOL(posix_acl_permission);
/*
+ * Init a fresh posix_acl
+ */
+void
+posix_acl_init(struct posix_acl *acl, int count)
+{
+ atomic_set(&acl->a_refcount, 1);
+ acl->a_count = count;
+}
+
+/*
* Allocate a new ACL with the specified number of entries.
*/
struct posix_acl *
@@ -40,10 +51,8 @@ posix_acl_alloc(int count, gfp_t flags)
const size_t size = sizeof(struct posix_acl) +
count * sizeof(struct posix_acl_entry);
struct posix_acl *acl = kmalloc(size, flags);
- if (acl) {
- atomic_set(&acl->a_refcount, 1);
- acl->a_count = count;
- }
+ if (acl)
+ posix_acl_init(acl, count);
return acl;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index df2b703..7c99c1c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_cap(m, task);
task_cpus_allowed(m, task);
cpuset_task_status_allowed(m, task);
-#if defined(CONFIG_S390)
- task_show_regs(m, task);
-#endif
task_context_switch_counts(m, task);
return 0;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e8..d49c4b5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
&proc_self_inode_operations, NULL, {}),
};
-/*
- * Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- */
-static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *inode;
- struct task_struct *task;
-
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
-
- inode = dentry->d_inode;
- task = get_proc_task(inode);
- if (task) {
- put_task_struct(task);
- return 1;
- }
- d_drop(dentry);
- return 0;
-}
-
-static const struct dentry_operations proc_base_dentry_operations =
-{
- .d_revalidate = proc_base_revalidate,
- .d_delete = pid_delete_dentry,
-};
-
static struct dentry *proc_base_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index eafc22a..b701eaa 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -67,7 +67,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
struct console *con;
loff_t off = 0;
- acquire_console_sem();
+ console_lock();
for_each_console(con)
if (off++ == *pos)
break;
@@ -84,7 +84,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
static void c_stop(struct seq_file *m, void *v)
{
- release_console_sem();
+ console_unlock();
}
static const struct seq_operations consoles_op = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4c..d6a7ca1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
+ struct ctl_table_header *head;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
de = PROC_I(inode)->pde;
if (de)
pde_put(de);
- if (PROC_I(inode)->sysctl)
- sysctl_head_put(PROC_I(inode)->sysctl);
+ head = PROC_I(inode)->sysctl;
+ if (head) {
+ rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+ sysctl_head_put(head);
+ }
}
struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4..927cbd1 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
return;
root = of_find_node_by_path("/");
if (root == NULL) {
- printk(KERN_ERR "/proc/device-tree: can't find root\n");
+ pr_debug("/proc/device-tree: can't find root\n");
return;
}
proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92..8eb2522 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -408,15 +408,18 @@ static int proc_sys_compare(const struct dentry *parent,
const struct dentry *dentry, const struct inode *inode,
unsigned int len, const char *str, const struct qstr *name)
{
+ struct ctl_table_header *head;
/* Although proc doesn't have negative dentries, rcu-walk means
* that inode here can be NULL */
+ /* AV: can it, indeed? */
if (!inode)
- return 0;
+ return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- return !sysctl_is_seen(PROC_I(inode)->sysctl);
+ head = rcu_dereference(PROC_I(inode)->sysctl);
+ return !head || !sysctl_is_seen(head);
}
static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 84becd3..a2a622e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2189,8 +2189,8 @@ int dquot_resume(struct super_block *sb, int type)
}
EXPORT_SYMBOL(dquot_resume);
-int dquot_quota_on_path(struct super_block *sb, int type, int format_id,
- struct path *path)
+int dquot_quota_on(struct super_block *sb, int type, int format_id,
+ struct path *path)
{
int error = security_quota_on(path->dentry);
if (error)
@@ -2204,20 +2204,6 @@ int dquot_quota_on_path(struct super_block *sb, int type, int format_id,
DQUOT_LIMITS_ENABLED);
return error;
}
-EXPORT_SYMBOL(dquot_quota_on_path);
-
-int dquot_quota_on(struct super_block *sb, int type, int format_id, char *name)
-{
- struct path path;
- int error;
-
- error = kern_path(name, LOOKUP_FOLLOW, &path);
- if (!error) {
- error = dquot_quota_on_path(sb, type, format_id, &path);
- path_put(&path);
- }
- return error;
-}
EXPORT_SYMBOL(dquot_quota_on);
/*
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b299961..b34bdb2 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -64,18 +64,15 @@ static int quota_sync_all(int type)
}
static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
- void __user *addr)
+ struct path *path)
{
- char *pathname;
- int ret = -ENOSYS;
-
- pathname = getname(addr);
- if (IS_ERR(pathname))
- return PTR_ERR(pathname);
- if (sb->s_qcop->quota_on)
- ret = sb->s_qcop->quota_on(sb, type, id, pathname);
- putname(pathname);
- return ret;
+ if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_on_meta)
+ return -ENOSYS;
+ if (sb->s_qcop->quota_on_meta)
+ return sb->s_qcop->quota_on_meta(sb, type, id);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+ return sb->s_qcop->quota_on(sb, type, id, path);
}
static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
@@ -241,7 +238,7 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
/* Copy parameters and call proper function */
static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
- void __user *addr)
+ void __user *addr, struct path *path)
{
int ret;
@@ -256,7 +253,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
switch (cmd) {
case Q_QUOTAON:
- return quota_quotaon(sb, type, cmd, id, addr);
+ return quota_quotaon(sb, type, cmd, id, path);
case Q_QUOTAOFF:
if (!sb->s_qcop->quota_off)
return -ENOSYS;
@@ -335,6 +332,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
{
uint cmds, type;
struct super_block *sb = NULL;
+ struct path path, *pathp = NULL;
int ret;
cmds = cmd >> SUBCMDSHIFT;
@@ -351,12 +349,27 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
return -ENODEV;
}
+ /*
+ * Path for quotaon has to be resolved before grabbing superblock
+ * because that gets s_umount sem which is also possibly needed by path
+ * resolution (think about autofs) and thus deadlocks could arise.
+ */
+ if (cmds == Q_QUOTAON) {
+ ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path);
+ if (ret)
+ pathp = ERR_PTR(ret);
+ else
+ pathp = &path;
+ }
+
sb = quotactl_block(special);
if (IS_ERR(sb))
return PTR_ERR(sb);
- ret = do_quotactl(sb, type, cmds, id, addr);
+ ret = do_quotactl(sb, type, cmds, id, addr, pathp);
drop_super(sb);
+ if (pathp && !IS_ERR(pathp))
+ path_put(pathp);
return ret;
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036..1bba24b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
struct inode *inode = dentry->d_inode;
int maxlen = *lenp;
- if (maxlen < 3)
+ if (need_parent && (maxlen < 5)) {
+ *lenp = 5;
return 255;
+ } else if (maxlen < 3) {
+ *lenp = 3;
+ return 255;
+ }
data[0] = inode->i_ino;
data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3eea859..c77514b 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
reiserfs_mounted_fs_count++;
if (reiserfs_mounted_fs_count <= 1) {
reiserfs_write_unlock(sb);
- commit_wq = create_workqueue("reiserfs");
+ commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
reiserfs_write_lock(sb);
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51e..4b2eb56 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
dentry, inode, &security);
if (retval) {
- dir->i_nlink--;
+ DEC_DIR_INODE_NLINK(dir)
goto out_failed;
}
@@ -1122,10 +1122,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
reiserfs_write_unlock(dir->i_sb);
return -EMLINK;
}
- if (inode->i_nlink == 0) {
- reiserfs_write_unlock(dir->i_sb);
- return -ENOENT;
- }
/* inc before scheduling so reiserfs_unlink knows we are here */
inc_nlink(inode);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2575682..0aab04f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -632,7 +632,7 @@ static int reiserfs_acquire_dquot(struct dquot *);
static int reiserfs_release_dquot(struct dquot *);
static int reiserfs_mark_dquot_dirty(struct dquot *);
static int reiserfs_write_info(struct super_block *, int);
-static int reiserfs_quota_on(struct super_block *, int, int, char *);
+static int reiserfs_quota_on(struct super_block *, int, int, struct path *);
static const struct dquot_operations reiserfs_quota_operations = {
.write_dquot = reiserfs_write_dquot,
@@ -2048,25 +2048,21 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type)
* Standard function to be called on quota_on
*/
static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
- char *name)
+ struct path *path)
{
int err;
- struct path path;
struct inode *inode;
struct reiserfs_transaction_handle th;
if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
return -EINVAL;
- err = kern_path(name, LOOKUP_FOLLOW, &path);
- if (err)
- return err;
/* Quotafile not on the same filesystem? */
- if (path.mnt->mnt_sb != sb) {
+ if (path->mnt->mnt_sb != sb) {
err = -EXDEV;
goto out;
}
- inode = path.dentry->d_inode;
+ inode = path->dentry->d_inode;
/* We must not pack tails for quota files on reiserfs for quota IO to work */
if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
err = reiserfs_unpack(inode, NULL);
@@ -2082,7 +2078,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
/* Journaling quota? */
if (REISERFS_SB(sb)->s_qf_names[type]) {
/* Quotafile not of fs root? */
- if (path.dentry->d_parent != sb->s_root)
+ if (path->dentry->d_parent != sb->s_root)
reiserfs_warning(sb, "super-6521",
"Quota file not on filesystem root. "
"Journalled quota will not work.");
@@ -2101,9 +2097,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (err)
goto out;
}
- err = dquot_quota_on_path(sb, type, format_id, &path);
+ err = dquot_quota_on(sb, type, format_id, path);
out:
- path_put(&path);
return err;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e9..5c11ca8 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
return -EPERM;
}
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2fb2882..8ab48bc 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -63,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb,
*length = (unsigned char) bh->b_data[*offset] |
(unsigned char) bh->b_data[*offset + 1] << 8;
*offset += 2;
+
+ if (*offset == msblk->devblksize) {
+ put_bh(bh);
+ bh = sb_bread(sb, ++(*cur_index));
+ if (bh == NULL)
+ return NULL;
+ *offset = 0;
+ }
}
return bh;
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 856756c..c4eb400 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -95,12 +95,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
if (!buffer_uptodate(bh[k]))
goto release_mutex;
- if (avail == 0) {
- offset = 0;
- put_bh(bh[k++]);
- continue;
- }
-
stream->buf.in = bh[k]->b_data + offset;
stream->buf.in_size = avail;
stream->buf.in_pos = 0;
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 818a5e0..4661ae2 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -82,12 +82,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
if (!buffer_uptodate(bh[k]))
goto release_mutex;
- if (avail == 0) {
- offset = 0;
- put_bh(bh[k++]);
- continue;
- }
-
stream->next_in = bh[k]->b_data + offset;
stream->avail_in = avail;
offset = 0;
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf..9610391 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int error = -EINVAL;
int lookup_flags = 0;
- if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
+ AT_EMPTY_PATH)) != 0)
goto out;
if (!(flag & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flag & AT_NO_AUTOMOUNT)
lookup_flags |= LOOKUP_NO_AUTOMOUNT;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
if (bufsiz <= 0)
return -EINVAL;
- error = user_path_at(dfd, pathname, 0, &path);
+ error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
if (!error) {
struct inode *inode = path.dentry->d_inode;
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8..8244924 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
}
EXPORT_SYMBOL(vfs_statfs);
-static int do_statfs_native(struct path *path, struct statfs *buf)
+int user_statfs(const char __user *pathname, struct kstatfs *st)
{
- struct kstatfs st;
- int retval;
+ struct path path;
+ int error = user_path(pathname, &path);
+ if (!error) {
+ error = vfs_statfs(&path, st);
+ path_put(&path);
+ }
+ return error;
+}
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
+int fd_statfs(int fd, struct kstatfs *st)
+{
+ struct file *file = fget(fd);
+ int error = -EBADF;
+ if (file) {
+ error = vfs_statfs(&file->f_path, st);
+ fput(file);
+ }
+ return error;
+}
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
+{
+ struct statfs buf;
+
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- if (sizeof buf->f_blocks == 4) {
- if ((st.f_blocks | st.f_bfree | st.f_bavail |
- st.f_bsize | st.f_frsize) &
+ if (sizeof buf.f_blocks == 4) {
+ if ((st->f_blocks | st->f_bfree | st->f_bavail |
+ st->f_bsize | st->f_frsize) &
0xffffffff00000000ULL)
return -EOVERFLOW;
/*
* f_files and f_ffree may be -1; it's okay to stuff
* that into 32 bits
*/
- if (st.f_files != -1 &&
- (st.f_files & 0xffffffff00000000ULL))
+ if (st->f_files != -1 &&
+ (st->f_files & 0xffffffff00000000ULL))
return -EOVERFLOW;
- if (st.f_ffree != -1 &&
- (st.f_ffree & 0xffffffff00000000ULL))
+ if (st->f_ffree != -1 &&
+ (st->f_ffree & 0xffffffff00000000ULL))
return -EOVERFLOW;
}
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
-static int do_statfs64(struct path *path, struct statfs64 *buf)
+static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
{
- struct kstatfs st;
- int retval;
-
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
-
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+ struct statfs64 buf;
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs tmp;
- error = do_statfs_native(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ struct kstatfs st;
+ int error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
{
- struct path path;
- long error;
-
+ struct kstatfs st;
+ int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs64 tmp;
- error = do_statfs64(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
{
- struct file *file;
- struct statfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs_native(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ struct kstatfs st;
+ int error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
{
- struct file *file;
- struct statfs64 tmp;
+ struct kstatfs st;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs64(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
diff --git a/fs/super.c b/fs/super.c
index 74e149e..7e9dd4c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s)
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
fs->kill_sb(s);
+ /*
+ * We need to call rcu_barrier so all the delayed rcu free
+ * inodes are flushed before we release the fs module.
+ */
+ rcu_barrier();
put_filesystem(fs);
put_super(s);
} else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b12..e474fbc 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
new_de = sysv_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
sysv_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = sysv_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
sysv_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b6..7217d67 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- *
- * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
- * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
- * lock 'dirA->i_mutex', so this is possible. Both of the functions
- * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
- * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
- * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
- * to the list of orphans. After this, 'vfs_link()' will link
- * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
- * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
- * to the list of orphans.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
err = dbg_check_synced_i_size(inode);
if (err)
return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9e..f1dce84 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
#include <linux/crc-itu-t.h>
#include <linux/exportfs.h>
+enum { UDF_MAX_LINKS = 0xffff };
+
static inline int udf_match(int len1, const unsigned char *name1, int len2,
const unsigned char *name2)
{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *iinfo;
err = -EMLINK;
- if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
+ if (dir->i_nlink >= UDF_MAX_LINKS)
goto out;
err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
struct fileIdentDesc cfi, *fi;
int err;
- if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
+ if (inode->i_nlink >= UDF_MAX_LINKS)
return -EMLINK;
- }
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
retval = -EMLINK;
- if (!new_inode &&
- new_dir->i_nlink >=
- (256 << sizeof(new_dir->i_nlink)) - 1)
+ if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
goto end_rename;
}
if (!nfi) {
@@ -1287,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
struct fid *fid = (struct fid *)fh;
int type = FILEID_UDF_WITHOUT_PARENT;
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *lenp = 5;
return 255;
+ } else if (len < 3) {
+ *lenp = 3;
+ return 255;
+ }
*lenp = 3;
fid->udf.block = location.logicalBlockNum;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9..d6f6815 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ufs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= UFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ufs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
ufs_delete_entry(old_dir, old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8..f83a4c8 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -2022,11 +2022,12 @@ xfs_buf_init(void)
if (!xfslogd_workqueue)
goto out_free_buf_zone;
- xfsdatad_workqueue = create_workqueue("xfsdatad");
+ xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
- xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+ xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
+ WQ_MEM_RECLAIM, 1);
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae..d61611c 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
+ if (!blk_queue_discard(q))
+ return -XFS_ERROR(EOPNOTSUPP);
if (copy_from_user(&range, urange, sizeof(range)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114d..f4f878f 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
* seven combinations work. The real answer is "don't use v2".
*/
len = xfs_fileid_length(fileid_type);
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
*max_len = len;
switch (fileid_type) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index b06ede1..0ca0e3c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
xfs_mount_t *mp,
void __user *arg)
{
- xfs_fsop_geom_v1_t fsgeo;
+ xfs_fsop_geom_t fsgeo;
int error;
- error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+ error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
return -error;
- if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+ /*
+ * Caller should have passed an argument of type
+ * xfs_fsop_geom_v1_t. This is a proper subset of the
+ * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+ */
+ if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
return -XFS_ERROR(EFAULT);
return 0;
}
@@ -985,10 +990,22 @@ xfs_ioctl_setattr(
/*
* Extent size must be a multiple of the appropriate block
- * size, if set at all.
+ * size, if set at all. It must also be smaller than the
+ * maximum extent size supported by the filesystem.
+ *
+ * Also, for non-realtime files, limit the extent size hint to
+ * half the size of the AGs in the filesystem so alignment
+ * doesn't result in extents larger than an AG.
*/
if (fa->fsx_extsize != 0) {
- xfs_extlen_t size;
+ xfs_extlen_t size;
+ xfs_fsblock_t extsize_fsb;
+
+ extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+ if (extsize_fsb > MAXEXTLEN) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
if (XFS_IS_REALTIME_INODE(ip) ||
((mask & FSX_XFLAGS) &&
@@ -997,6 +1014,10 @@ xfs_ioctl_setattr(
mp->m_sb.sb_blocklog;
} else {
size = mp->m_sb.sb_blocksize;
+ if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
}
if (fa->fsx_extsize % size) {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index f8e854b..206a281 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1863,12 +1863,14 @@ xfs_qm_dqreclaim_one(void)
xfs_dquot_t *dqpout;
xfs_dquot_t *dqp;
int restarts;
+ int startagain;
restarts = 0;
dqpout = NULL;
/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-startagain:
+again:
+ startagain = 0;
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
@@ -1885,13 +1887,10 @@ startagain:
ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
trace_xfs_dqreclaim_want(dqp);
-
- xfs_dqunlock(dqp);
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
- return NULL;
XQM_STATS_INC(xqmstats.xs_qm_dqwants);
- goto startagain;
+ restarts++;
+ startagain = 1;
+ goto dqunlock;
}
/*
@@ -1906,23 +1905,20 @@ startagain:
ASSERT(list_empty(&dqp->q_mplist));
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
- xfs_dqunlock(dqp);
dqpout = dqp;
XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
- break;
+ goto dqunlock;
}
ASSERT(dqp->q_hash);
ASSERT(!list_empty(&dqp->q_mplist));
/*
- * Try to grab the flush lock. If this dquot is in the process of
- * getting flushed to disk, we don't want to reclaim it.
+ * Try to grab the flush lock. If this dquot is in the process
+ * of getting flushed to disk, we don't want to reclaim it.
*/
- if (!xfs_dqflock_nowait(dqp)) {
- xfs_dqunlock(dqp);
- continue;
- }
+ if (!xfs_dqflock_nowait(dqp))
+ goto dqunlock;
/*
* We have the flush lock so we know that this is not in the
@@ -1944,8 +1940,7 @@ startagain:
xfs_fs_cmn_err(CE_WARN, mp,
"xfs_qm_dqreclaim: dquot %p flush failed", dqp);
}
- xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
- continue;
+ goto dqunlock;
}
/*
@@ -1967,13 +1962,8 @@ startagain:
*/
if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
restarts++;
- mutex_unlock(&dqp->q_hash->qh_lock);
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
- return NULL;
- goto startagain;
+ startagain = 1;
+ goto qhunlock;
}
ASSERT(dqp->q_nrefs == 0);
@@ -1986,14 +1976,20 @@ startagain:
xfs_Gqm->qm_dqfrlist_cnt--;
dqpout = dqp;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+qhunlock:
mutex_unlock(&dqp->q_hash->qh_lock);
dqfunlock:
xfs_dqfunlock(dqp);
+dqunlock:
xfs_dqunlock(dqp);
if (dqpout)
break;
if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
- return NULL;
+ break;
+ if (startagain) {
+ mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+ goto again;
+ }
}
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
return dqpout;
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 0ab56b3..d0b3bc7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -75,6 +75,22 @@ typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
/*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ * - the AG superblock, AGF, AGI and AGFL
+ * - the AGF (bno and cnt) and AGI btree root blocks
+ * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+#define XFS_ALLOC_AG_MAX_USABLE(mp) \
+ ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
+
+
+/*
* Argument structure for xfs_alloc routines.
* This is turned into a structure to avoid having 20 arguments passed
* down several levels of the stack.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 4111cd3..dc3afd77 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1038,17 +1038,34 @@ xfs_bmap_add_extent_delay_real(
* Filling in the middle part of a previous delayed allocation.
* Contiguity is impossible here.
* This case is avoided almost all the time.
+ *
+ * We start with a delayed allocation:
+ *
+ * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
+ * PREV @ idx
+ *
+ * and we are allocating:
+ * +rrrrrrrrrrrrrrrrr+
+ * new
+ *
+ * and we set it up for insertion as:
+ * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
+ * new
+ * PREV @ idx LEFT RIGHT
+ * inserted at idx + 1
*/
temp = new->br_startoff - PREV.br_startoff;
- trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
- xfs_bmbt_set_blockcount(ep, temp);
- r[0] = *new;
- r[1].br_state = PREV.br_state;
- r[1].br_startblock = 0;
- r[1].br_startoff = new_endoff;
temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
- r[1].br_blockcount = temp2;
- xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
+ trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
+ xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */
+ LEFT = *new;
+ RIGHT.br_state = PREV.br_state;
+ RIGHT.br_startblock = nullstartblock(
+ (int)xfs_bmap_worst_indlen(ip, temp2));
+ RIGHT.br_startoff = new_endoff;
+ RIGHT.br_blockcount = temp2;
+ /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
+ xfs_iext_insert(ip, idx + 1, 2, &LEFT, state);
ip->i_df.if_lastex = idx + 1;
ip->i_d.di_nextents++;
if (cur == NULL)
@@ -2430,7 +2447,7 @@ xfs_bmap_btalloc_nullfb(
startag = ag = 0;
pag = xfs_perag_get(mp, ag);
- while (*blen < ap->alen) {
+ while (*blen < args->maxlen) {
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, args->tp, ag,
XFS_ALLOC_FLAG_TRYLOCK);
@@ -2452,7 +2469,7 @@ xfs_bmap_btalloc_nullfb(
notinit = 1;
if (xfs_inode_is_filestream(ap->ip)) {
- if (*blen >= ap->alen)
+ if (*blen >= args->maxlen)
break;
if (ap->userdata) {
@@ -2498,14 +2515,14 @@ xfs_bmap_btalloc_nullfb(
* If the best seen length is less than the request
* length, use the best as the minimum.
*/
- else if (*blen < ap->alen)
+ else if (*blen < args->maxlen)
args->minlen = *blen;
/*
- * Otherwise we've seen an extent as big as alen,
+ * Otherwise we've seen an extent as big as maxlen,
* use that as the minimum.
*/
else
- args->minlen = ap->alen;
+ args->minlen = args->maxlen;
/*
* set the failure fallback case to look in the selected
@@ -2573,7 +2590,9 @@ xfs_bmap_btalloc(
args.tp = ap->tp;
args.mp = mp;
args.fsbno = ap->rval;
- args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+
+ /* Trim the allocation back to the maximum an AG can fit. */
+ args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp));
args.firstblock = ap->firstblock;
blen = 0;
if (nullfb) {
@@ -2621,7 +2640,7 @@ xfs_bmap_btalloc(
/*
* Adjust for alignment
*/
- if (blen > args.alignment && blen <= ap->alen)
+ if (blen > args.alignment && blen <= args.maxlen)
args.minlen = blen - args.alignment;
args.minalignslop = 0;
} else {
@@ -2640,7 +2659,7 @@ xfs_bmap_btalloc(
* of minlen+alignment+slop doesn't go up
* between the calls.
*/
- if (blen > mp->m_dalign && blen <= ap->alen)
+ if (blen > mp->m_dalign && blen <= args.maxlen)
nextminlen = blen - mp->m_dalign;
else
nextminlen = args.minlen;
@@ -4485,6 +4504,16 @@ xfs_bmapi(
/* Figure out the extent size, adjust alen */
extsz = xfs_get_extsz_hint(ip);
if (extsz) {
+ /*
+ * make sure we don't exceed a single
+ * extent length when we align the
+ * extent by reducing length we are
+ * going to allocate by the maximum
+ * amount extent size aligment may
+ * require.
+ */
+ alen = XFS_FILBLKS_MIN(len,
+ MAXEXTLEN - (2 * extsz - 1));
error = xfs_bmap_extsize_align(mp,
&got, &prev, extsz,
rt, eof,
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 98c6f73..6f8c21c 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -427,13 +427,15 @@ xfs_buf_item_unpin(
if (remove) {
/*
- * We have to remove the log item from the transaction
- * as we are about to release our reference to the
- * buffer. If we don't, the unlock that occurs later
- * in xfs_trans_uncommit() will ry to reference the
+ * If we are in a transaction context, we have to
+ * remove the log item from the transaction as we are
+ * about to release our reference to the buffer. If we
+ * don't, the unlock that occurs later in
+ * xfs_trans_uncommit() will try to reference the
* buffer which we no longer have a hold on.
*/
- xfs_trans_del_item(lip);
+ if (lip->li_desc)
+ xfs_trans_del_item(lip);
/*
* Since the transaction no longer refers to the buffer,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 75f2ef6..d22e626 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -138,7 +138,8 @@ xfs_efi_item_unpin(
if (remove) {
ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
- xfs_trans_del_item(lip);
+ if (lip->li_desc)
+ xfs_trans_del_item(lip);
xfs_efi_item_free(efip);
return;
}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd..85668ef 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
xfs_fsop_geom_t *geo,
int new_version)
{
+
+ memset(geo, 0, sizeof(*geo));
+
geo->blocksize = mp->m_sb.sb_blocksize;
geo->rtextsize = mp->m_sb.sb_rextsize;
geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 55582bd..8a0f044 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -337,7 +337,12 @@ xfs_iomap_prealloc_size(
int shift = 0;
int64_t freesp;
- alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size);
+ /*
+ * rounddown_pow_of_two() returns an undefined result
+ * if we pass in alloc_blocks = 0. Hence the "+ 1" to
+ * ensure we always pass in a non-zero value.
+ */
+ alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1;
alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
rounddown_pow_of_two(alloc_blocks));
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 916eb7d..3bd3291 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -191,7 +191,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket);
xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
-int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_log_vec *log_vector,
xfs_lsn_t *commit_lsn, int flags);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9dc8125..9ca59be 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -543,7 +543,7 @@ xlog_cil_push(
error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
if (error)
- goto out_abort;
+ goto out_abort_free_ticket;
/*
* now that we've written the checkpoint into the log, strictly
@@ -569,8 +569,9 @@ restart:
}
spin_unlock(&cil->xc_cil_lock);
+ /* xfs_log_done always frees the ticket on error. */
commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
- if (error || commit_lsn == -1)
+ if (commit_lsn == -1)
goto out_abort;
/* attach all the transactions w/ busy extents to iclog */
@@ -600,6 +601,8 @@ out_free_ticket:
kmem_free(new_ctx);
return 0;
+out_abort_free_ticket:
+ xfs_log_ticket_put(tic);
out_abort:
xlog_cil_committed(ctx, XFS_LI_ABORTED);
return XFS_ERROR(EIO);
@@ -622,7 +625,7 @@ out_abort:
* background commit, returns without it held once background commits are
* allowed again.
*/
-int
+void
xfs_log_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
@@ -637,11 +640,6 @@ xfs_log_commit_cil(
if (flags & XFS_TRANS_RELEASE_LOG_RES)
log_flags = XFS_LOG_REL_PERM_RESERV;
- if (XLOG_FORCED_SHUTDOWN(log)) {
- xlog_cil_free_logvec(log_vector);
- return XFS_ERROR(EIO);
- }
-
/*
* do all the hard work of formatting items (including memory
* allocation) outside the CIL context lock. This prevents stalling CIL
@@ -701,7 +699,6 @@ xfs_log_commit_cil(
*/
if (push)
xlog_cil_push(log, 0);
- return 0;
}
/*
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index edfa178..4aff563 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void)
if (!xfs_mru_elem_zone)
goto out;
- xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
+ xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
if (!xfs_mru_reap_wq)
goto out_destroy_mru_elem_zone;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 33dbc4e..7692279 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert(
* Bulk operation version of xfs_trans_committed that takes a log vector of
* items to insert into the AIL. This uses bulk AIL insertion techniques to
* minimise lock traffic.
+ *
+ * If we are called with the aborted flag set, it is because a log write during
+ * a CIL checkpoint commit has failed. In this case, all the items in the
+ * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which
+ * means that checkpoint commit abort handling is treated exactly the same
+ * as an iclog write error even though we haven't started any IO yet. Hence in
+ * this case all we need to do is IOP_COMMITTED processing, followed by an
+ * IOP_UNPIN(aborted) call.
*/
void
xfs_trans_committed_bulk(
@@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk(
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
continue;
+ /*
+ * if we are aborting the operation, no point in inserting the
+ * object into the AIL as we are in a shutdown situation.
+ */
+ if (aborted) {
+ ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
+ IOP_UNPIN(lip, 1);
+ continue;
+ }
+
if (item_lsn != commit_lsn) {
/*
@@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk(
}
/*
- * Called from the trans_commit code when we notice that
- * the filesystem is in the middle of a forced shutdown.
+ * Called from the trans_commit code when we notice that the filesystem is in
+ * the middle of a forced shutdown.
+ *
+ * When we are called here, we have already pinned all the items in the
+ * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
+ * so we can simply walk the items in the transaction, unpin them with an abort
+ * flag and then free the items. Note that unpinning the items can result in
+ * them being freed immediately, so we need to use a safe list traversal method
+ * here.
*/
STATIC void
xfs_trans_uncommit(
struct xfs_trans *tp,
uint flags)
{
- struct xfs_log_item_desc *lidp;
+ struct xfs_log_item_desc *lidp, *n;
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- /*
- * Unpin all but those that aren't dirty.
- */
+ list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
if (lidp->lid_flags & XFS_LID_DIRTY)
IOP_UNPIN(lidp->lid_item, 1);
}
@@ -1733,7 +1755,6 @@ xfs_trans_commit_cil(
int flags)
{
struct xfs_log_vec *log_vector;
- int error;
/*
* Get each log item to allocate a vector structure for
@@ -1744,9 +1765,7 @@ xfs_trans_commit_cil(
if (!log_vector)
return ENOMEM;
- error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
- if (error)
- return error;
+ xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp);
OpenPOWER on IntegriCloud