From 0a6be6555302eebb14510fd6b35bb17e8dfa1386 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:31:07 -0500 Subject: nfs: include xattr.h from fs/nfs/nfs3proc.c fs/nfs/nfs3proc.c is making use of xattr but was getting linux/xattr.h indirectly through linux/cgroup.h, which will soon drop the inclusion of xattr.h. Explicitly include linux/xattr.h from nfs3proc.c so that compilation doesn't fail when linux/cgroup.h drops linux/xattr.h. As the following cgroup changes will depend on these changes, it probably would be easier to route this through cgroup branch. Would that be okay? Signed-off-by: Tejun Heo Acked-by: Trond Myklebust Cc: linux-nfs@vger.kernel.org --- fs/nfs/nfs3proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index aa9bc973..a462ef0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "iostat.h" #include "internal.h" -- cgit v1.1 From fe60a8a0919eeee862054137fed49f00b710d9cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 10:35:22 +1100 Subject: xfs: ensure correct timestamp updates from truncate The VFS doesn't set the proper ATTR_CTIME and ATTR_MTIME values for truncate, so filesystems have to manually add them. The introduction of xfs_setattr_time accidentally broke this special case an caused a regression in generic/313. Fix this by removing the local mask variable in xfs_setattr_size so that we only have a single place to keep the attribute information. cc: Signed-off-by: Christoph Hellwig Reported-by: Fengguang Wu Reviewed-by: Brian Foster Reviewed-by: Jie Liu Signed-off-by: Dave Chinner --- fs/xfs/xfs_iops.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f35d5c9..9ddfb81 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -705,7 +705,6 @@ xfs_setattr_size( { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; xfs_off_t oldsize, newsize; struct xfs_trans *tp; int error; @@ -726,8 +725,8 @@ xfs_setattr_size( ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; @@ -736,7 +735,7 @@ xfs_setattr_size( * Short circuit the truncate case for zero length files. */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) + if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) return 0; /* @@ -824,10 +823,11 @@ xfs_setattr_size( * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ - if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + if (newsize != oldsize && + !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; + iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } /* @@ -863,9 +863,9 @@ xfs_setattr_size( xfs_inode_clear_eofblocks_tag(ip); } - if (mask & ATTR_MODE) + if (iattr->ia_valid & ATTR_MODE) xfs_setattr_mode(ip, iattr); - if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -- cgit v1.1 From 3895e51f6dbf6610519be070a3bede811f6ac4fb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Feb 2014 10:37:18 +1100 Subject: xfs: ensure correct log item buffer alignment On 32 bit platforms, the log item vector headers are not 64 bit aligned or sized. hence if we don't take care to align them correctly or pad the buffer appropriately for 8 byte alignment, we can end up with alignment issues when accessing the user buffer directly as a structure. To solve this, simply pad the buffer headers to 64 bit offset so that the data section is always 8 byte aligned. Signed-off-by: Dave Chinner Reported-by: Michael L. Semon Tested-by: Michael L. Semon Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_cil.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index cdebd83..4ef6fdb 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -205,16 +205,25 @@ xlog_cil_insert_format_items( /* * We 64-bit align the length of each iovec so that the start * of the next one is naturally aligned. We'll need to - * account for that slack space here. + * account for that slack space here. Then round nbytes up + * to 64-bit alignment so that the initial buffer alignment is + * easy to calculate and verify. */ nbytes += niovecs * sizeof(uint64_t); + nbytes = round_up(nbytes, sizeof(uint64_t)); /* grab the old item if it exists for reservation accounting */ old_lv = lip->li_lv; - /* calc buffer size */ - buf_size = sizeof(struct xfs_log_vec) + nbytes + - niovecs * sizeof(struct xfs_log_iovec); + /* + * The data buffer needs to start 64-bit aligned, so round up + * that space to ensure we can align it appropriately and not + * overrun the buffer. + */ + buf_size = nbytes + + round_up((sizeof(struct xfs_log_vec) + + niovecs * sizeof(struct xfs_log_iovec)), + sizeof(uint64_t)); /* compare to existing item size */ if (lip->li_lv && buf_size <= lip->li_lv->lv_size) { @@ -251,6 +260,8 @@ xlog_cil_insert_format_items( /* The allocated data region lies beyond the iovec region */ lv->lv_buf_len = 0; lv->lv_buf = (char *)lv + buf_size - nbytes; + ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); + lip->li_ops->iop_format(lip, lv); insert: ASSERT(lv->lv_buf_len <= nbytes); -- cgit v1.1 From fd1defc257e2b12ab69bc0b379105c00eca4e112 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 6 Feb 2014 14:38:53 -0500 Subject: NFS: Do not set NFS_INO_INVALID_LABEL unless server supports labeled NFS Commit aa9c2669626c (NFS: Client implementation of Labeled-NFS) introduces a performance regression. When nfs_zap_caches_locked is called, it sets the NFS_INO_INVALID_LABEL flag irrespectively of whether or not the NFS server supports security labels. Since that flag is never cleared, it means that all calls to nfs_revalidate_inode() will now trigger an on-the-wire GETATTR call. This patch ensures that we never set the NFS_INO_INVALID_LABEL unless the server advertises support for labeled NFS. It also causes nfs_setsecurity() to clear NFS_INO_INVALID_LABEL when it has successfully set the security label for the inode. Finally it gets rid of the NFS_INO_INVALID_LABEL cruft from nfs_update_inode, which has nothing to do with labeled NFS. Reported-by: Neil Brown Cc: stable@vger.kernel.org # 3.11+ Tested-by: Neil Brown Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 14 ++++++++++---- fs/nfs/internal.h | 9 +++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 28a0a3c..360114a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -164,17 +164,16 @@ static void nfs_zap_caches_locked(struct inode *inode) if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfs_fscache_invalidate(inode); nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_LABEL | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_PAGECACHE; } else nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_LABEL | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_PAGECACHE; + nfs_zap_label_cache_locked(nfsi); } void nfs_zap_caches(struct inode *inode) @@ -266,6 +265,13 @@ nfs_init_locked(struct inode *inode, void *opaque) } #ifdef CONFIG_NFS_V4_SECURITY_LABEL +static void nfs_clear_label_invalid(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL; + spin_unlock(&inode->i_lock); +} + void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label) { @@ -283,6 +289,7 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, __func__, (char *)label->label, label->len, error); + nfs_clear_label_invalid(inode); } } @@ -1648,7 +1655,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = fattr->du.nfs2.blocks; /* Update attrtimeo value if we're out of the unstable period */ - if (invalid & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) { + if (invalid & NFS_INO_INVALID_ATTR) { nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -1661,7 +1668,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } } invalid &= ~NFS_INO_INVALID_ATTR; - invalid &= ~NFS_INO_INVALID_LABEL; /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8b5cc04..fafddda 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -279,9 +279,18 @@ static inline void nfs4_label_free(struct nfs4_label *label) } return; } + +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ + if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL)) + nfsi->cache_validity |= NFS_INO_INVALID_LABEL; +} #else static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } static inline void nfs4_label_free(void *label) {} +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ +} #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ /* proc.c */ -- cgit v1.1 From 42eacf9e57b65ffa768af72cb7fc86cc6f6af042 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 10 Feb 2014 14:08:16 -0600 Subject: [CIFS] Fix cifsacl mounts over smb2 to not call cifs When mounting with smb2/smb3 (e.g. vers=2.1) and cifsacl mount option, it was trying to get the mode by querying the acl over the cifs rather than smb2 protocol. This patch makes that protocol independent and makes cifsacl smb2 mounts return a more intuitive operation not supported error (until we add a worker function for smb2_get_acl). Note that a previous patch fixed getxattr/setxattr for the CIFSACL xattr which would unconditionally call cifs_get_acl and cifs_set_acl (even when mounted smb2). I made those protocol independent last week (new protocol version operations "get_acl" and "set_acl" but did not add an smb2_get_acl and smb2_set_acl yet so those now simply return EOPNOTSUPP which at least is better than sending cifs requests on smb2 mount) The previous patches did not fix the one remaining case though ie mounting with "cifsacl" when getting mode from acl would unconditionally end up calling "cifs_get_acl_from_fid" even for smb2 - so made that protocol independent but to make that protocol independent had to make sure that the callers were passing the protocol independent handle structure (cifs_fid) instead of cifs specific _u16 network file handle (ie cifs_fid instead of cifs_fid->fid) Now mount with smb2 and cifsacl mount options will return EOPNOTSUP (instead of timing out) and a future patch will add smb2 operations (e.g. get_smb2_acl) to enable this. Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 33 ++++++++++++++++++++++++--------- fs/cifs/cifsglob.h | 2 ++ fs/cifs/cifsproto.h | 6 ++++-- fs/cifs/dir.c | 2 +- fs/cifs/file.c | 2 +- fs/cifs/inode.c | 2 +- fs/cifs/smb1ops.c | 1 + 7 files changed, 34 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index c819b0b..7ff866d 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -865,8 +865,8 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, return rc; } -static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, - __u16 fid, u32 *pacllen) +struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, + const struct cifs_fid *cifsfid, u32 *pacllen) { struct cifs_ntsd *pntsd = NULL; unsigned int xid; @@ -877,7 +877,8 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, return ERR_CAST(tlink); xid = get_xid(); - rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen); + rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), cifsfid->netfid, &pntsd, + pacllen); free_xid(xid); cifs_put_tlink(tlink); @@ -946,7 +947,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, if (!open_file) return get_cifs_acl_by_path(cifs_sb, path, pacllen); - pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->fid.netfid, pacllen); + pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen); cifsFileInfo_put(open_file); return pntsd; } @@ -1006,19 +1007,31 @@ out: /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, - struct inode *inode, const char *path, const __u16 *pfid) + struct inode *inode, const char *path, + const struct cifs_fid *pfid) { struct cifs_ntsd *pntsd = NULL; u32 acllen = 0; int rc = 0; + struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); + struct cifs_tcon *tcon; cifs_dbg(NOISY, "converting ACL to mode for %s\n", path); - if (pfid) - pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen); - else - pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + if (pfid && (tcon->ses->server->ops->get_acl_by_fid)) + pntsd = tcon->ses->server->ops->get_acl_by_fid(cifs_sb, pfid, + &acllen); + else if (tcon->ses->server->ops->get_acl) + pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path, + &acllen); + else { + cifs_put_tlink(tlink); + return -EOPNOTSUPP; + } /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); @@ -1030,6 +1043,8 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, cifs_dbg(VFS, "parse sec desc failed rc = %d\n", rc); } + cifs_put_tlink(tlink); + return rc; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 86dc28c..cf32f03 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -398,6 +398,8 @@ struct smb_version_operations { const struct nls_table *, int); struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *, const char *, u32 *); + struct cifs_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *, + const struct cifs_fid *, u32 *); int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, int); }; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index d00e09d..acc4ee8 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -151,7 +151,7 @@ extern struct inode *cifs_iget(struct super_block *sb, extern int cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, - int xid, const __u16 *fid); + int xid, const struct cifs_fid *fid); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, unsigned int xid); @@ -162,11 +162,13 @@ extern int cifs_rename_pending_delete(const char *full_path, const unsigned int xid); extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, - const char *path, const __u16 *pfid); + const char *path, const struct cifs_fid *pfid); extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64, kuid_t, kgid_t); extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, const char *, u32 *); +extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *, + const struct cifs_fid *, u32 *); extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, const char *, int); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d3a6796..3db0c5f 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -378,7 +378,7 @@ cifs_create_get_file_info: xid); else { rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb, - xid, &fid->netfid); + xid, fid); if (newinode) { if (server->ops->set_lease_key) server->ops->set_lease_key(newinode, fid); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a301edb..290b496 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -244,7 +244,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, xid); else rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, - xid, &fid->netfid); + xid, fid); out: kfree(buf); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index be58b8f..aadc2b6 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -677,7 +677,7 @@ cgfi_exit: int cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, int xid, - const __u16 *fid) + const struct cifs_fid *fid) { bool validinum = false; __u16 srchflgs; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index bfd66d8..526fb89 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1073,6 +1073,7 @@ struct smb_version_operations smb1_operations = { #endif /* CIFS_XATTR */ #ifdef CONFIG_CIFS_ACL .get_acl = get_cifs_acl, + .get_acl_by_fid = get_cifs_acl_by_fid, .set_acl = set_cifs_acl, #endif /* CIFS_ACL */ }; -- cgit v1.1 From 15cc17678547676c82a5da9ccf357447333fc342 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Wed, 12 Feb 2014 10:42:45 -0500 Subject: ext4: fix xfstest generic/299 block validity failures Commit a115f749c1 (ext4: remove wait for unwritten extent conversion from ext4_truncate) exposed a bug in ext4_ext_handle_uninitialized_extents(). It can be triggered by xfstest generic/299 when run on a test file system created without a journal. This test continuously fallocates and truncates files to which random dio/aio writes are simultaneously performed by a separate process. The test completes successfully, but if the test filesystem is mounted with the block_validity option, a warning message stating that a logical block has been mapped to an illegal physical block is posted in the kernel log. The bug occurs when an extent is being converted to the written state by ext4_end_io_dio() and ext4_ext_handle_uninitialized_extents() discovers a mapping for an existing uninitialized extent. Although it sets EXT4_MAP_MAPPED in map->m_flags, it fails to set map->m_pblk to the discovered physical block number. Because map->m_pblk is not otherwise initialized or set by this function or its callers, its uninitialized value is returned to ext4_map_blocks(), where it is stored as a bogus mapping in the extent status tree. Since map->m_pblk can accidentally contain illegal values that are larger than the physical size of the file system, calls to check_block_validity() in ext4_map_blocks() that are enabled if the block_validity mount option is used can fail, resulting in the logged warning message. Signed-off-by: Eric Whitney Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org # 3.11+ --- fs/ext4/extents.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 10cff47..74bc2d5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3906,6 +3906,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, } else err = ret; map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = newblock; if (allocated > map->m_len) allocated = map->m_len; map->m_len = allocated; -- cgit v1.1 From 30d29b119ef01776e0a301444ab24defe8d8bef3 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Wed, 12 Feb 2014 11:48:31 -0500 Subject: ext4: fix error paths in swap_inode_boot_loader() In swap_inode_boot_loader() we forgot to release ->i_mutex and resume unlocked dio for inode and inode_bl if there is an error starting the journal handle. This commit fixes this issue. Reported-by: Ahmed Tamrawi Cc: Andreas Dilger Cc: Dr. Tilmann Bubeck Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org # v3.10+ --- fs/ext4/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 6bea806..a2a837f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -140,7 +140,7 @@ static long swap_inode_boot_loader(struct super_block *sb, handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); if (IS_ERR(handle)) { err = -EINVAL; - goto swap_boot_out; + goto journal_err_out; } /* Protect extent tree against block allocations via delalloc */ @@ -198,6 +198,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_double_up_write_data_sem(inode, inode_bl); +journal_err_out: ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode_bl); -- cgit v1.1 From 23301410972330c0ae9a8afc379ba2005e249cc6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 12 Feb 2014 12:16:04 -0500 Subject: ext4: don't try to modify s_flags if the the file system is read-only If an ext4 file system is created by some tool other than mke2fs (perhaps by someone who has a pathalogical fear of the GPL) that doesn't set one or the other of the EXT2_FLAGS_{UN}SIGNED_HASH flags, and that file system is then mounted read-only, don't try to modify the s_flags field. Otherwise, if dm_verity is in use, the superblock will change, causing an dm_verity failure. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/super.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1f7784d..710fed2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3695,16 +3695,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; - i = le32_to_cpu(es->s_flags); - if (i & EXT2_FLAGS_UNSIGNED_HASH) - sbi->s_hash_unsigned = 3; - else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + i = le32_to_cpu(es->s_flags); + if (i & EXT2_FLAGS_UNSIGNED_HASH) + sbi->s_hash_unsigned = 3; + else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { #ifdef __CHAR_UNSIGNED__ - es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); - sbi->s_hash_unsigned = 3; + if (!(sb->s_flags & MS_RDONLY)) + es->s_flags |= + cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); + sbi->s_hash_unsigned = 3; #else - es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); + if (!(sb->s_flags & MS_RDONLY)) + es->s_flags |= + cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); #endif + } } /* Handle clustersize */ -- cgit v1.1 From 844fa1b5f8493cff4b976fa7a5b9ebeeafdd75cc Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 13 Feb 2014 15:40:13 -0600 Subject: jfs: set i_ctime when setting ACL This fixes a regression in 3.14-rc1 where xfstests generic/307 fails. jfs sets the ctime on the inode when writing an xattr. Previously, jfs went ahead and stored an acl that can be completely represented in the traditional permission bits, so the ctime was always set in the xattr code. The new code doesn't bother storing the acl in that case, thus the ctime isn't getting set. Signed-off-by: Dave Kleikamp Reported-by: Michael L. Semon --- fs/jfs/acl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index e973b85..5a8ea16 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -86,6 +86,8 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type, rc = posix_acl_equiv_mode(acl, &inode->i_mode); if (rc < 0) return rc; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); if (rc == 0) acl = NULL; break; -- cgit v1.1 From 5d81de8e8667da7135d3a32a964087c0faf5483f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Feb 2014 07:20:35 -0500 Subject: cifs: ensure that uncached writes handle unmapped areas correctly It's possible for userland to pass down an iovec via writev() that has a bogus user pointer in it. If that happens and we're doing an uncached write, then we can end up getting less bytes than we expect from the call to iov_iter_copy_from_user. This is CVE-2014-0069 cifs_iovec_write isn't set up to handle that situation however. It'll blindly keep chugging through the page array and not filling those pages with anything useful. Worse yet, we'll later end up with a negative number in wdata->tailsz, which will confuse the sending routines and cause an oops at the very least. Fix this by having the copy phase of cifs_iovec_write stop copying data in this situation and send the last write as a short one. At the same time, we want to avoid sending a zero-length write to the server, so break out of the loop and set rc to -EFAULT if that happens. This also allows us to handle the case where no address in the iovec is valid. [Note: Marking this for stable on v3.4+ kernels, but kernels as old as v2.6.38 may have a similar problem and may need similar fix] Cc: # v3.4+ Reviewed-by: Pavel Shilovsky Reported-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/file.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 290b496..18758bc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2389,7 +2389,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *poffset) { unsigned long nr_pages, i; - size_t copied, len, cur_len; + size_t bytes, copied, len, cur_len; ssize_t total_written = 0; loff_t offset; struct iov_iter it; @@ -2444,14 +2444,45 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, save_len = cur_len; for (i = 0; i < nr_pages; i++) { - copied = min_t(const size_t, cur_len, PAGE_SIZE); + bytes = min_t(const size_t, cur_len, PAGE_SIZE); copied = iov_iter_copy_from_user(wdata->pages[i], &it, - 0, copied); + 0, bytes); cur_len -= copied; iov_iter_advance(&it, copied); + /* + * If we didn't copy as much as we expected, then that + * may mean we trod into an unmapped area. Stop copying + * at that point. On the next pass through the big + * loop, we'll likely end up getting a zero-length + * write and bailing out of it. + */ + if (copied < bytes) + break; } cur_len = save_len - cur_len; + /* + * If we have no data to send, then that probably means that + * the copy above failed altogether. That's most likely because + * the address in the iovec was bogus. Set the rc to -EFAULT, + * free anything we allocated and bail out. + */ + if (!cur_len) { + for (i = 0; i < nr_pages; i++) + put_page(wdata->pages[i]); + kfree(wdata); + rc = -EFAULT; + break; + } + + /* + * i + 1 now represents the number of pages we actually used in + * the copy phase above. Bring nr_pages down to that, and free + * any pages that we didn't use. + */ + for ( ; nr_pages > i + 1; nr_pages--) + put_page(wdata->pages[nr_pages - 1]); + wdata->sync_mode = WB_SYNC_ALL; wdata->nr_pages = nr_pages; wdata->offset = (__u64)offset; -- cgit v1.1 From 2365c4eaf077c48574ab6f143960048fc0f31518 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 14 Feb 2014 13:31:02 +0400 Subject: CIFS: Fix too big maxBuf size for SMB3 mounts SMB3 servers can respond with MaxTransactSize of more than 4M that can cause a memory allocation error returned from kmalloc in a lock codepath. Also the client doesn't support multicredit requests now and allows buffer sizes of 65536 bytes only. Set MaxTransactSize to this maximum supported value. Cc: stable@vger.kernel.org # 3.7+ Signed-off-by: Pavel Shilovsky Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/smb2glob.h | 3 +++ fs/cifs/smb2ops.c | 14 ++++---------- fs/cifs/smb2pdu.c | 4 +++- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index c383508..bc0bb9c 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -57,4 +57,7 @@ #define SMB2_CMACAES_SIZE (16) #define SMB3_SIGNKEY_SIZE (16) +/* Maximum buffer size value we can send with 1 credit */ +#define SMB2_MAX_BUFFER_SIZE 65536 + #endif /* _SMB2_GLOB_H */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 757da3e..192f51a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -182,11 +182,8 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified wsize, or default */ wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); - /* - * limit write size to 2 ** 16, because we don't support multicredit - * requests now. - */ - wsize = min_t(unsigned int, wsize, 2 << 15); + /* set it to the maximum buffer size value we can send with 1 credit */ + wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); return wsize; } @@ -200,11 +197,8 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified rsize, or default */ rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); - /* - * limit write size to 2 ** 16, because we don't support multicredit - * requests now. - */ - rsize = min_t(unsigned int, rsize, 2 << 15); + /* set it to the maximum buffer size value we can send with 1 credit */ + rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); return rsize; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a3f7a9c..8603447 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -413,7 +413,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) /* SMB2 only has an extended negflavor */ server->negflavor = CIFS_NEGFLAVOR_EXTENDED; - server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); + /* set it to the maximum buffer size value we can send with 1 credit */ + server->maxBuf = min_t(unsigned int, le32_to_cpu(rsp->MaxTransactSize), + SMB2_MAX_BUFFER_SIZE); server->max_read = le32_to_cpu(rsp->MaxReadSize); server->max_write = le32_to_cpu(rsp->MaxWriteSize); /* BB Do we need to validate the SecurityMode? */ -- cgit v1.1 From b93c95353413041a8cebad915a8109619f66bcc6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 15 Feb 2014 21:33:13 -0500 Subject: ext4: fix online resize with very large inode tables If a file system has a large number of inodes per block group, all of the metadata blocks in a flex_bg may be larger than what can fit in a single block group. Unfortunately, ext4_alloc_group_tables() in resize.c was never tested to see if it would handle this case correctly, and there were a large number of bugs which caused the following sequence to result in a BUG_ON: kernel bug at fs/ext4/resize.c:409! ... call trace: [] ext4_flex_group_add+0x1448/0x1830 [] ext4_resize_fs+0x7b2/0xe80 [] ext4_ioctl+0xbf0/0xf00 [] do_vfs_ioctl+0x2dd/0x4b0 [] ? final_putname+0x22/0x50 [] sys_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x1b code: c8 4c 89 df e8 41 96 f8 ff 44 89 e8 49 01 c4 44 29 6d d4 0 rip [] set_flexbg_block_bitmap+0x171/0x180 This can be reproduced with the following command sequence: mke2fs -t ext4 -i 4096 /dev/vdd 1G mount -t ext4 /dev/vdd /vdd resize2fs /dev/vdd 8G To fix this, we need to make sure the right thing happens when a block group's inode table straddles two block groups, which means the following bugs had to be fixed: 1) Not clearing the BLOCK_UNINIT flag in the second block group in ext4_alloc_group_tables --- the was proximate cause of the BUG_ON. 2) Incorrectly determining how many block groups contained contiguous free blocks in ext4_alloc_group_tables(). 3) Incorrectly setting the start of the next block range to be marked in use after a discontinuity in setup_new_flex_group_blocks(). Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c5adbb3..69a6261 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -243,6 +243,7 @@ static int ext4_alloc_group_tables(struct super_block *sb, ext4_group_t group; ext4_group_t last_group; unsigned overhead; + __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; BUG_ON(flex_gd->count == 0 || group_data == NULL); @@ -266,7 +267,7 @@ next_group: src_group++; for (; src_group <= last_group; src_group++) { overhead = ext4_group_overhead_blocks(sb, src_group); - if (overhead != 0) + if (overhead == 0) last_blk += group_data[src_group - group].blocks_count; else break; @@ -280,8 +281,7 @@ next_group: group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; group_data[group].free_blocks_count--; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode bitmaps */ @@ -292,22 +292,30 @@ next_group: group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; group_data[group].free_blocks_count--; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode tables */ for (; it_index < flex_gd->count; it_index++) { - if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) + unsigned int itb = EXT4_SB(sb)->s_itb_per_group; + ext4_fsblk_t next_group_start; + + if (start_blk + itb > last_blk) goto next_group; group_data[it_index].inode_table = start_blk; - group = ext4_get_group_number(sb, start_blk - 1); + group = ext4_get_group_number(sb, start_blk); + next_group_start = ext4_group_first_block_no(sb, group + 1); group -= group_data[0].group; - group_data[group].free_blocks_count -= - EXT4_SB(sb)->s_itb_per_group; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + if (start_blk + itb > next_group_start) { + flex_gd->bg_flags[group + 1] &= uninit_mask; + overhead = start_blk + itb - next_group_start; + group_data[group + 1].free_blocks_count -= overhead; + itb -= overhead; + } + + group_data[group].free_blocks_count -= itb; + flex_gd->bg_flags[group] &= uninit_mask; start_blk += EXT4_SB(sb)->s_itb_per_group; } @@ -620,7 +628,7 @@ handle_ib: if (err) goto out; count = group_table_count[j]; - start = group_data[i].block_bitmap; + start = (&group_data[i].block_bitmap)[j]; block = start; } -- cgit v1.1 From 3d2660d0c9c2f296837078c189b68a47f6b2e3b5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 15 Feb 2014 22:42:25 -0500 Subject: ext4: fix online resize with a non-standard blocks per group setting The set_flexbg_block_bitmap() function assumed that the number of blocks in a blockgroup was sb->blocksize * 8, which is normally true, but not always! Use EXT4_BLOCKS_PER_GROUP(sb) instead, to fix block bitmap corruption after: mke2fs -t ext4 -g 3072 -i 4096 /dev/vdd 1G mount -t ext4 /dev/vdd /vdd resize2fs /dev/vdd 8G Signed-off-by: "Theodore Ts'o" Reported-by: Jon Bernard Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 69a6261..f3b84cd 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -409,7 +409,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, start = ext4_group_first_block_no(sb, group); group -= flex_gd->groups[0].group; - count2 = sb->s_blocksize * 8 - (block - start); + count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start); if (count2 > count) count2 = count; -- cgit v1.1 From 19ea80603715d473600cd993b9987bc97d042e02 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 Feb 2014 19:29:32 -0500 Subject: ext4: don't leave i_crtime.tv_sec uninitialized If the i_crtime field is not present in the inode, don't leave the field uninitialized. Fixes: ef7f38359 ("ext4: Add nanosecond timestamps") Reported-by: Vegard Nossum Tested-by: Vegard Nossum Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/ext4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ece5556..d3a534f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -771,6 +771,8 @@ do { \ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ (einode)->xtime.tv_sec = \ (signed)le32_to_cpu((raw_inode)->xtime); \ + else \ + (einode)->xtime.tv_sec = 0; \ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ ext4_decode_extra_time(&(einode)->xtime, \ raw_inode->xtime ## _extra); \ -- cgit v1.1 From 292f503cade2b1d966239ef56a851e6897d1ba92 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Feb 2014 21:42:56 -0500 Subject: NFSv4: Use the correct net namespace in nfs4_update_server We need to use the same net namespace that was used to resolve the hostname and sockaddr arguments. Fixes: 32e62b7c3ef09 (NFS: Add nfs4_update_server) Cc: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 3 ++- fs/nfs/nfs4client.c | 7 ++++--- fs/nfs/nfs4namespace.c | 12 ++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fafddda..b46cf5a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -176,7 +176,8 @@ extern struct nfs_server *nfs4_create_server( extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen); + struct sockaddr *sap, size_t salen, + struct net *net); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 860ad26..0e46d3d 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1135,6 +1135,7 @@ static int nfs_probe_destination(struct nfs_server *server) * @hostname: new end-point's hostname * @sap: new end-point's socket address * @salen: size of "sap" + * @net: net namespace * * The nfs_server must be quiescent before this function is invoked. * Either its session is drained (NFSv4.1+), or its transport is @@ -1143,13 +1144,13 @@ static int nfs_probe_destination(struct nfs_server *server) * Returns zero on success, or a negative errno value. */ int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen) + struct sockaddr *sap, size_t salen, struct net *net) { struct nfs_client *clp = server->nfs_client; struct rpc_clnt *clnt = server->client; struct xprt_create xargs = { .ident = clp->cl_proto, - .net = &init_net, + .net = net, .dstaddr = sap, .addrlen = salen, .servername = hostname, @@ -1189,7 +1190,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_rpcclient->cl_auth->au_flavor, clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, clp->cl_net); + clp->cl_minorversion, net); nfs_put_client(clp); if (error != 0) { nfs_server_insert_lists(server); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 4e7f05d..3d5dbf8 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -121,9 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry, } static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct nfs_server *server) + struct sockaddr *sa, size_t salen, struct net *net) { - struct net *net = rpc_net_ns(server->client); ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); @@ -223,6 +222,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client); struct vfsmount *mnt = ERR_PTR(-ENOENT); char *mnt_path; unsigned int maxbuflen; @@ -248,8 +248,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, continue; mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, - NFS_SB(mountdata->sb)); + mountdata->addr, addr_bufsize, net); if (mountdata->addrlen == 0) continue; @@ -419,6 +418,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(server->client); struct sockaddr *sap; unsigned int s; size_t salen; @@ -440,7 +440,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, continue; salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, server); + sap, addr_bufsize, net); if (salen == 0) continue; rpc_set_port(sap, NFS_PORT); @@ -450,7 +450,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, if (hostname == NULL) break; - error = nfs4_update_server(server, hostname, sap, salen); + error = nfs4_update_server(server, hostname, sap, salen, net); kfree(hostname); if (error == 0) break; -- cgit v1.1 From fbc0b970ddfab4b35dad27ebaae712af680bdc7e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Feb 2014 13:01:19 +0800 Subject: ceph: properly handle XATTR_CREATE and XATTR_REPLACE return -EEXIST if XATTR_CREATE is set and xattr alread exists. return -ENODATA if XATTR_REPLACE is set but xattr does not exist. Signed-off-by: Yan, Zheng --- fs/ceph/xattr.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 898b656..28f9793 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -319,8 +319,7 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, static int __set_xattr(struct ceph_inode_info *ci, const char *name, int name_len, const char *val, int val_len, - int dirty, - int should_free_name, int should_free_val, + int flags, int update_xattr, struct ceph_inode_xattr **newxattr) { struct rb_node **p; @@ -349,12 +348,25 @@ static int __set_xattr(struct ceph_inode_info *ci, xattr = NULL; } + if (update_xattr) { + int err = 0; + if (xattr && (flags & XATTR_CREATE)) + err = -EEXIST; + else if (!xattr && (flags & XATTR_REPLACE)) + err = -ENODATA; + if (err) { + kfree(name); + kfree(val); + return err; + } + } + if (!xattr) { new = 1; xattr = *newxattr; xattr->name = name; xattr->name_len = name_len; - xattr->should_free_name = should_free_name; + xattr->should_free_name = update_xattr; ci->i_xattrs.count++; dout("__set_xattr count=%d\n", ci->i_xattrs.count); @@ -364,7 +376,7 @@ static int __set_xattr(struct ceph_inode_info *ci, if (xattr->should_free_val) kfree((void *)xattr->val); - if (should_free_name) { + if (update_xattr) { kfree((void *)name); name = xattr->name; } @@ -379,8 +391,8 @@ static int __set_xattr(struct ceph_inode_info *ci, xattr->val = ""; xattr->val_len = val_len; - xattr->dirty = dirty; - xattr->should_free_val = (val && should_free_val); + xattr->dirty = update_xattr; + xattr->should_free_val = (val && update_xattr); if (new) { rb_link_node(&xattr->node, parent, p); @@ -588,7 +600,7 @@ start: p += len; err = __set_xattr(ci, name, namelen, val, len, - 0, 0, 0, &xattrs[numattr]); + 0, 0, &xattrs[numattr]); if (err < 0) goto bad; @@ -892,7 +904,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, struct ceph_inode_info *ci = ceph_inode(inode); int issued; int err; - int dirty; + int dirty = 0; int name_len = strlen(name); int val_len = size; char *newname = NULL; @@ -954,11 +966,13 @@ retry: } err = __set_xattr(ci, newname, name_len, newval, - val_len, 1, 1, 1, &xattr); + val_len, flags, 1, &xattr); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); - ci->i_xattrs.dirty = true; - inode->i_ctime = CURRENT_TIME; + if (!err) { + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + ci->i_xattrs.dirty = true; + inode->i_ctime = CURRENT_TIME; + } spin_unlock(&ci->i_ceph_lock); if (dirty) -- cgit v1.1 From bcdfeb2eb4e42b811950b9cd226109291051732a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Feb 2014 13:04:19 +0800 Subject: ceph: remove xattr when null value is given to setxattr() For the setxattr request, introduce a new flag CEPH_XATTR_REMOVE to distinguish null value case from the zero-length value case. Signed-off-by: Yan, Zheng --- fs/ceph/xattr.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 28f9793..231c02b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -12,6 +12,9 @@ #define XATTR_CEPH_PREFIX "ceph." #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1) +static int __remove_xattr(struct ceph_inode_info *ci, + struct ceph_inode_xattr *xattr); + /* * List of handlers for synthetic system.* attributes. Other * attributes are handled directly. @@ -359,6 +362,12 @@ static int __set_xattr(struct ceph_inode_info *ci, kfree(val); return err; } + if (update_xattr < 0) { + if (xattr) + __remove_xattr(ci, xattr); + kfree(name); + return 0; + } } if (!xattr) { @@ -862,6 +871,9 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, dout("setxattr value=%.*s\n", (int)size, value); + if (!value) + flags |= CEPH_XATTR_REMOVE; + /* do request */ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR, USE_AUTH_MDS); @@ -965,8 +977,8 @@ retry: goto retry; } - err = __set_xattr(ci, newname, name_len, newval, - val_len, flags, 1, &xattr); + err = __set_xattr(ci, newname, name_len, newval, val_len, + flags, value ? 1 : -1, &xattr); if (!err) { dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); -- cgit v1.1 From 524186ace6c4dcc83975b858622a66888b018fd0 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Feb 2014 13:23:09 +0800 Subject: ceph: fix ceph_removexattr() Signed-off-by: Yan, Zheng --- fs/ceph/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 231c02b..a55ec37 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -463,7 +463,7 @@ static int __remove_xattr(struct ceph_inode_info *ci, struct ceph_inode_xattr *xattr) { if (!xattr) - return -EOPNOTSUPP; + return -ENODATA; rb_erase(&xattr->node, &ci->i_xattrs.index); -- cgit v1.1 From 7a92d64760541e66bf5d1131f029b82773ce3922 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Feb 2014 13:08:51 +0800 Subject: ceph: fix ceph_set_acl() If acl is equivalent to file mode permission bits, ceph_set_acl() needs to remove any existing acl xattr. Use __ceph_setxattr() to handle both setting and removing acl xattr cases, it doesn't return -ENODATA when there is no acl xattr. Signed-off-by: Yan, Zheng --- fs/ceph/acl.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 4c2d452..accc9f2 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -160,11 +160,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) goto out_dput; } - if (value) - ret = __ceph_setxattr(dentry, name, value, size, 0); - else - ret = __ceph_removexattr(dentry, name); - + ret = __ceph_setxattr(dentry, name, value, size, 0); if (ret) { if (new_mode != old_mode) { newattrs.ia_mode = old_mode; -- cgit v1.1 From b20a95a0dd47c56c5d20e1c9e260293d0b87abe0 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Feb 2014 12:55:05 +0800 Subject: ceph: add missing init_acl() for mkdir() and atomic_open() Signed-off-by: Yan, Zheng --- fs/ceph/dir.c | 13 ++++++++----- fs/ceph/file.c | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6da4df8..2e3b30d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -695,9 +695,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, ceph_mdsc_put_request(req); if (!err) - err = ceph_init_acl(dentry, dentry->d_inode, dir); - - if (err) + ceph_init_acl(dentry, dentry->d_inode, dir); + else d_drop(dentry); return err; } @@ -735,7 +734,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (!err && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); - if (err) + if (!err) + ceph_init_acl(dentry, dentry->d_inode, dir); + else d_drop(dentry); return err; } @@ -776,7 +777,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); out: - if (err < 0) + if (!err) + ceph_init_acl(dentry, dentry->d_inode, dir); + else d_drop(dentry); return err; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index dfd2ce3..09c7afe 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -286,6 +286,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, } else { dout("atomic_open finish_open on dn %p\n", dn); if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { + ceph_init_acl(dentry, dentry->d_inode, dir); *opened |= FILE_CREATED; } err = finish_open(file, dentry, ceph_open, opened); -- cgit v1.1 From c969d9bf91e1868d823351993216cd64dfab6a4c Mon Sep 17 00:00:00 2001 From: Guangliang Zhao Date: Sun, 16 Feb 2014 08:35:52 -0800 Subject: ceph: make ceph_forget_all_cached_acls() static inline Signed-off-by: Guangliang Zhao Reviewed-by: Alex Elder Signed-off-by: Sage Weil --- fs/ceph/acl.c | 5 ----- fs/ceph/super.h | 7 ++++++- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index accc9f2..21887d6 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -54,11 +54,6 @@ static inline struct posix_acl *ceph_get_cached_acl(struct inode *inode, return acl; } -void ceph_forget_all_cached_acls(struct inode *inode) -{ - forget_all_cached_acls(inode); -} - struct posix_acl *ceph_get_acl(struct inode *inode, int type) { int size; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 19793b5..d8801a9 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -743,7 +744,11 @@ extern const struct xattr_handler *ceph_xattr_handlers[]; struct posix_acl *ceph_get_acl(struct inode *, int); int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type); int ceph_init_acl(struct dentry *, struct inode *, struct inode *); -void ceph_forget_all_cached_acls(struct inode *inode); + +static inline void ceph_forget_all_cached_acls(struct inode *inode) +{ + forget_all_cached_acls(inode); +} #else -- cgit v1.1 From 45195e42c78ea91135108207dbcaf75e5556a309 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 16 Feb 2014 10:05:29 -0800 Subject: ceph: add acl, noacl options for cephfs mount Make the 'acl' option dependent on having ACL support compiled in. Make the 'noacl' option work even without it so that one can always ask it to be off and not error out on mount when it is not supported. Signed-off-by: Guangliang Zhao Signed-off-by: Sage Weil --- fs/ceph/super.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2df963f..10a4ccb 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -144,7 +144,11 @@ enum { Opt_ino32, Opt_noino32, Opt_fscache, - Opt_nofscache + Opt_nofscache, +#ifdef CONFIG_CEPH_FS_POSIX_ACL + Opt_acl, +#endif + Opt_noacl }; static match_table_t fsopt_tokens = { @@ -172,6 +176,10 @@ static match_table_t fsopt_tokens = { {Opt_noino32, "noino32"}, {Opt_fscache, "fsc"}, {Opt_nofscache, "nofsc"}, +#ifdef CONFIG_CEPH_FS_POSIX_ACL + {Opt_acl, "acl"}, +#endif + {Opt_noacl, "noacl"}, {-1, NULL} }; @@ -271,6 +279,14 @@ static int parse_fsopt_token(char *c, void *private) case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; break; +#ifdef CONFIG_CEPH_FS_POSIX_ACL + case Opt_acl: + fsopt->sb_flags |= MS_POSIXACL; + break; +#endif + case Opt_noacl: + fsopt->sb_flags &= ~MS_POSIXACL; + break; default: BUG_ON(token); } @@ -438,6 +454,13 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) else seq_puts(m, ",nofsc"); +#ifdef CONFIG_CEPH_FS_POSIX_ACL + if (fsopt->sb_flags & MS_POSIXACL) + seq_puts(m, ",acl"); + else + seq_puts(m, ",noacl"); +#endif + if (fsopt->wsize) seq_printf(m, ",wsize=%d", fsopt->wsize); if (fsopt->rsize != CEPH_RSIZE_DEFAULT) @@ -819,9 +842,6 @@ static int ceph_set_super(struct super_block *s, void *data) s->s_flags = fsc->mount_options->sb_flags; s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ -#ifdef CONFIG_CEPH_FS_POSIX_ACL - s->s_flags |= MS_POSIXACL; -#endif s->s_xattr = ceph_xattr_handlers; s->s_fs_info = fsc; @@ -911,6 +931,10 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, struct ceph_options *opt = NULL; dout("ceph_mount\n"); + +#ifdef CONFIG_CEPH_FS_POSIX_ACL + flags |= MS_POSIXACL; +#endif err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); if (err < 0) { res = ERR_PTR(err); -- cgit v1.1 From 4d5f5df673ee673851986b5a492a9752fbb39dc5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 13 Feb 2014 19:40:26 +0800 Subject: ceph: fix __dcache_readdir() If directory is fragmented, readdir() read its dirfrags one by one. After reading all dirfrags, the corresponding dentries are sorted in (frag_t, off) order in the dcache. If dentries of a directory are all cached, __dcache_readdir() can use the cached dentries to satisfy readdir syscall. But when checking if a given dentry is after the position of readdir, __dcache_readdir() compares numerical value of frag_t directly. This is wrong, it should use ceph_frag_compare(). Signed-off-by: Yan, Zheng --- fs/ceph/dir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2e3b30d..45eda6d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -100,6 +100,14 @@ static unsigned fpos_off(loff_t p) return p & 0xffffffff; } +static int fpos_cmp(loff_t l, loff_t r) +{ + int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r)); + if (v) + return v; + return (int)(fpos_off(l) - fpos_off(r)); +} + /* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on @@ -156,7 +164,7 @@ more: if (!d_unhashed(dentry) && dentry->d_inode && ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && - ctx->pos <= di->offset) + fpos_cmp(ctx->pos, di->offset) <= 0) break; dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, dentry->d_name.len, dentry->d_name.name, di->offset, -- cgit v1.1 From 416e2abd925d0f41dc877d1fe01489d79bdecf4c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 17 Feb 2014 16:21:24 -0500 Subject: reiserfs: fix utterly brain-damaged indentation. This has been this way for years, and every time I stumble across it I lose my lunch. After coming across it for the nth time in the Coverity results, I had to overcome the bystander effect and do something about it. This ignores the 79 column limit in favor of making it look like C instead of gibberish. The correct thing to do here would be to lose some of the indentation by breaking this function up into several smaller ones. I might do that at some point if I have the stomach to look at this again. (Also some of those overlong ternary operations would likely be more readable as regular if's) Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- fs/reiserfs/do_balan.c | 895 +++++++++++-------------------------------------- 1 file changed, 195 insertions(+), 700 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 2b7882b..9a3c68c 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -324,23 +324,17 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h switch (flag) { case M_INSERT: /* insert item into L[0] */ - if (item_pos == tb->lnum[0] - 1 - && tb->lbytes != -1) { + if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { /* part of new item falls into L[0] */ int new_item_len; int version; - ret_val = - leaf_shift_left(tb, tb->lnum[0] - 1, - -1); + ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1); /* Calculate item length to insert to S[0] */ - new_item_len = - ih_item_len(ih) - tb->lbytes; + new_item_len = ih_item_len(ih) - tb->lbytes; /* Calculate and check item length to insert to L[0] */ - put_ih_item_len(ih, - ih_item_len(ih) - - new_item_len); + put_ih_item_len(ih, ih_item_len(ih) - new_item_len); RFALSE(ih_item_len(ih) <= 0, "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", @@ -349,30 +343,18 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h /* Insert new item into L[0] */ buffer_info_init_left(tb, &bi); leaf_insert_into_buf(&bi, - n + item_pos - - ret_val, ih, body, - zeros_num > - ih_item_len(ih) ? - ih_item_len(ih) : - zeros_num); + n + item_pos - ret_val, ih, body, + zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); version = ih_version(ih); /* Calculate key component, item length and body to insert into S[0] */ - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - (tb-> - lbytes << - (is_indirect_le_ih - (ih) ? tb->tb_sb-> - s_blocksize_bits - - UNFM_P_SHIFT : - 0))); + set_le_ih_k_offset(ih, le_ih_k_offset(ih) + + (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); put_ih_item_len(ih, new_item_len); if (tb->lbytes > zeros_num) { - body += - (tb->lbytes - zeros_num); + body += (tb->lbytes - zeros_num); zeros_num = 0; } else zeros_num -= tb->lbytes; @@ -383,15 +365,10 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h } else { /* new item in whole falls into L[0] */ /* Shift lnum[0]-1 items to L[0] */ - ret_val = - leaf_shift_left(tb, tb->lnum[0] - 1, - tb->lbytes); + ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes); /* Insert new item into L[0] */ buffer_info_init_left(tb, &bi); - leaf_insert_into_buf(&bi, - n + item_pos - - ret_val, ih, body, - zeros_num); + leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num); tb->insert_size[0] = 0; zeros_num = 0; } @@ -399,264 +376,117 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h case M_PASTE: /* append item in L[0] */ - if (item_pos == tb->lnum[0] - 1 - && tb->lbytes != -1) { + if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { /* we must shift the part of the appended item */ - if (is_direntry_le_ih - (B_N_PITEM_HEAD(tbS0, item_pos))) { + if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { RFALSE(zeros_num, "PAP-12090: invalid parameter in case of a directory"); /* directory item */ if (tb->lbytes > pos_in_item) { /* new directory entry falls into L[0] */ - struct item_head - *pasted; - int l_pos_in_item = - pos_in_item; + struct item_head *pasted; + int l_pos_in_item = pos_in_item; /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ - ret_val = - leaf_shift_left(tb, - tb-> - lnum - [0], - tb-> - lbytes - - - 1); - if (ret_val - && !item_pos) { - pasted = - B_N_PITEM_HEAD - (tb->L[0], - B_NR_ITEMS - (tb-> - L[0]) - - 1); - l_pos_in_item += - I_ENTRY_COUNT - (pasted) - - (tb-> - lbytes - - 1); + ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1); + if (ret_val && !item_pos) { + pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1); + l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1); } /* Append given directory entry to directory item */ buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer - (&bi, - n + item_pos - - ret_val, - l_pos_in_item, - tb->insert_size[0], - body, zeros_num); + leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num); /* previous string prepared space for pasting new entry, following string pastes this entry */ /* when we have merge directory item, pos_in_item has been changed too */ /* paste new directory entry. 1 is entry number */ - leaf_paste_entries(&bi, - n + - item_pos - - - ret_val, - l_pos_in_item, - 1, - (struct - reiserfs_de_head - *) - body, - body - + - DEH_SIZE, - tb-> - insert_size - [0] - ); + leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item, + 1, (struct reiserfs_de_head *) body, + body + DEH_SIZE, tb->insert_size[0]); tb->insert_size[0] = 0; } else { /* new directory item doesn't fall into L[0] */ /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ - leaf_shift_left(tb, - tb-> - lnum[0], - tb-> - lbytes); + leaf_shift_left(tb, tb->lnum[0], tb->lbytes); } /* Calculate new position to append in item body */ pos_in_item -= tb->lbytes; } else { /* regular object */ - RFALSE(tb->lbytes <= 0, - "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", - tb->lbytes); - RFALSE(pos_in_item != - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)), + RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes); + RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)), "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)), - pos_in_item); + ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item); if (tb->lbytes >= pos_in_item) { /* appended item will be in L[0] in whole */ int l_n; /* this bytes number must be appended to the last item of L[h] */ - l_n = - tb->lbytes - - pos_in_item; + l_n = tb->lbytes - pos_in_item; /* Calculate new insert_size[0] */ - tb->insert_size[0] -= - l_n; + tb->insert_size[0] -= l_n; - RFALSE(tb-> - insert_size[0] <= - 0, + RFALSE(tb->insert_size[0] <= 0, "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", - tb-> - insert_size[0]); - ret_val = - leaf_shift_left(tb, - tb-> - lnum - [0], - ih_item_len - (B_N_PITEM_HEAD - (tbS0, - item_pos))); + tb->insert_size[0]); + ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len + (B_N_PITEM_HEAD(tbS0, item_pos))); /* Append to body of item in L[0] */ buffer_info_init_left(tb, &bi); leaf_paste_in_buffer - (&bi, - n + item_pos - - ret_val, - ih_item_len - (B_N_PITEM_HEAD - (tb->L[0], - n + item_pos - - ret_val)), l_n, - body, - zeros_num > - l_n ? l_n : - zeros_num); + (&bi, n + item_pos - ret_val, ih_item_len + (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)), + l_n, body, + zeros_num > l_n ? l_n : zeros_num); /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ { int version; - int temp_l = - l_n; - - RFALSE - (ih_item_len - (B_N_PITEM_HEAD - (tbS0, - 0)), + int temp_l = l_n; + + RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)), "PAP-12106: item length must be 0"); - RFALSE - (comp_short_le_keys - (B_N_PKEY - (tbS0, 0), - B_N_PKEY - (tb->L[0], - n + - item_pos - - - ret_val)), + RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY + (tb->L[0], n + item_pos - ret_val)), "PAP-12107: items must be of the same file"); if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { - temp_l = - l_n - << - (tb-> - tb_sb-> - s_blocksize_bits - - - UNFM_P_SHIFT); + temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT); } /* update key of first item in S0 */ - version = - ih_version - (B_N_PITEM_HEAD - (tbS0, 0)); - set_le_key_k_offset - (version, - B_N_PKEY - (tbS0, 0), - le_key_k_offset - (version, - B_N_PKEY - (tbS0, - 0)) + - temp_l); + version = ih_version(B_N_PITEM_HEAD(tbS0, 0)); + set_le_key_k_offset(version, B_N_PKEY(tbS0, 0), + le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l); /* update left delimiting key */ - set_le_key_k_offset - (version, - B_N_PDELIM_KEY - (tb-> - CFL[0], - tb-> - lkey[0]), - le_key_k_offset - (version, - B_N_PDELIM_KEY - (tb-> - CFL[0], - tb-> - lkey[0])) - + temp_l); + set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), + le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l); } /* Calculate new body, position in item and insert_size[0] */ if (l_n > zeros_num) { - body += - (l_n - - zeros_num); + body += (l_n - zeros_num); zeros_num = 0; } else - zeros_num -= - l_n; + zeros_num -= l_n; pos_in_item = 0; - RFALSE - (comp_short_le_keys - (B_N_PKEY(tbS0, 0), - B_N_PKEY(tb->L[0], - B_NR_ITEMS - (tb-> - L[0]) - - 1)) - || - !op_is_left_mergeable - (B_N_PKEY(tbS0, 0), - tbS0->b_size) - || - !op_is_left_mergeable - (B_N_PDELIM_KEY - (tb->CFL[0], - tb->lkey[0]), - tbS0->b_size), + RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1)) + || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size) + || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size), "PAP-12120: item must be merge-able with left neighboring item"); } else { /* only part of the appended item will be in L[0] */ /* Calculate position in item for append in S[0] */ - pos_in_item -= - tb->lbytes; + pos_in_item -= tb->lbytes; - RFALSE(pos_in_item <= 0, - "PAP-12125: no place for paste. pos_in_item=%d", - pos_in_item); + RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ - leaf_shift_left(tb, - tb-> - lnum[0], - tb-> - lbytes); + leaf_shift_left(tb, tb->lnum[0], tb->lbytes); } } } else { /* appended item will be in L[0] in whole */ @@ -665,52 +495,30 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ /* then increment pos_in_item by the size of the last item in L[0] */ - pasted = - B_N_PITEM_HEAD(tb->L[0], - n - 1); + pasted = B_N_PITEM_HEAD(tb->L[0], n - 1); if (is_direntry_le_ih(pasted)) - pos_in_item += - ih_entry_count - (pasted); + pos_in_item += ih_entry_count(pasted); else - pos_in_item += - ih_item_len(pasted); + pos_in_item += ih_item_len(pasted); } /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ - ret_val = - leaf_shift_left(tb, tb->lnum[0], - tb->lbytes); + ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes); /* Append to body of item in L[0] */ buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer(&bi, - n + item_pos - - ret_val, + leaf_paste_in_buffer(&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0], body, zeros_num); /* if appended item is directory, paste entry */ - pasted = - B_N_PITEM_HEAD(tb->L[0], - n + item_pos - - ret_val); + pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val); if (is_direntry_le_ih(pasted)) - leaf_paste_entries(&bi, - n + - item_pos - - ret_val, - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] - ); + leaf_paste_entries(&bi, n + item_pos - ret_val, + pos_in_item, 1, + (struct reiserfs_de_head *) body, + body + DEH_SIZE, + tb->insert_size[0]); /* if appended item is indirect item, put unformatted node into un list */ if (is_indirect_le_ih(pasted)) set_ih_free_space(pasted, 0); @@ -722,13 +530,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h reiserfs_panic(tb->tb_sb, "PAP-12130", "lnum > 0: unexpected mode: " " %s(%d)", - (flag == - M_DELETE) ? "DELETE" : ((flag == - M_CUT) - ? "CUT" - : - "UNKNOWN"), - flag); + (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); } } else { /* new item doesn't fall into L[0] */ @@ -748,14 +550,12 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h case M_INSERT: /* insert item */ if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ - loff_t old_key_comp, old_len, - r_zeros_number; + loff_t old_key_comp, old_len, r_zeros_number; const char *r_body; int version; loff_t offset; - leaf_shift_right(tb, tb->rnum[0] - 1, - -1); + leaf_shift_right(tb, tb->rnum[0] - 1, -1); version = ih_version(ih); /* Remember key component and item length */ @@ -763,29 +563,17 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h old_len = ih_item_len(ih); /* Calculate key component and item length to insert into R[0] */ - offset = - le_ih_k_offset(ih) + - ((old_len - - tb-> - rbytes) << (is_indirect_le_ih(ih) - ? tb->tb_sb-> - s_blocksize_bits - - UNFM_P_SHIFT : 0)); + offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0)); set_le_ih_k_offset(ih, offset); put_ih_item_len(ih, tb->rbytes); /* Insert part of the item into R[0] */ buffer_info_init_right(tb, &bi); if ((old_len - tb->rbytes) > zeros_num) { r_zeros_number = 0; - r_body = - body + (old_len - - tb->rbytes) - - zeros_num; + r_body = body + (old_len - tb->rbytes) - zeros_num; } else { r_body = body; - r_zeros_number = - zeros_num - (old_len - - tb->rbytes); + r_zeros_number = zeros_num - (old_len - tb->rbytes); zeros_num -= r_zeros_number; } @@ -798,25 +586,18 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h /* Calculate key component and item length to insert into S[0] */ set_le_ih_k_offset(ih, old_key_comp); - put_ih_item_len(ih, - old_len - tb->rbytes); + put_ih_item_len(ih, old_len - tb->rbytes); tb->insert_size[0] -= tb->rbytes; } else { /* whole new item falls into R[0] */ /* Shift rnum[0]-1 items to R[0] */ - ret_val = - leaf_shift_right(tb, - tb->rnum[0] - 1, - tb->rbytes); + ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes); /* Insert new item into R[0] */ buffer_info_init_right(tb, &bi); - leaf_insert_into_buf(&bi, - item_pos - n + - tb->rnum[0] - 1, - ih, body, - zeros_num); + leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1, + ih, body, zeros_num); if (item_pos - n + tb->rnum[0] - 1 == 0) { replace_key(tb, tb->CFR[0], @@ -841,200 +622,97 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h RFALSE(zeros_num, "PAP-12145: invalid parameter in case of a directory"); - entry_count = - I_ENTRY_COUNT(B_N_PITEM_HEAD - (tbS0, - item_pos)); + entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD + (tbS0, item_pos)); if (entry_count - tb->rbytes < pos_in_item) /* new directory entry falls into R[0] */ { int paste_entry_position; - RFALSE(tb->rbytes - 1 >= - entry_count - || !tb-> - insert_size[0], + RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0], "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", - tb->rbytes, - entry_count); + tb->rbytes, entry_count); /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ - leaf_shift_right(tb, - tb-> - rnum - [0], - tb-> - rbytes - - 1); + leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1); /* Paste given directory entry to directory item */ - paste_entry_position = - pos_in_item - - entry_count + - tb->rbytes - 1; + paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1; buffer_info_init_right(tb, &bi); - leaf_paste_in_buffer - (&bi, 0, - paste_entry_position, - tb->insert_size[0], - body, zeros_num); + leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num); /* paste entry */ - leaf_paste_entries(&bi, - 0, - paste_entry_position, - 1, - (struct - reiserfs_de_head - *) - body, - body - + - DEH_SIZE, - tb-> - insert_size - [0] - ); - - if (paste_entry_position - == 0) { + leaf_paste_entries(&bi, 0, paste_entry_position, 1, + (struct reiserfs_de_head *) body, + body + DEH_SIZE, tb->insert_size[0]); + + if (paste_entry_position == 0) { /* change delimiting keys */ - replace_key(tb, - tb-> - CFR - [0], - tb-> - rkey - [0], - tb-> - R - [0], - 0); + replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0); } tb->insert_size[0] = 0; pos_in_item++; } else { /* new directory entry doesn't fall into R[0] */ - leaf_shift_right(tb, - tb-> - rnum - [0], - tb-> - rbytes); + leaf_shift_right(tb, tb->rnum[0], tb->rbytes); } } else { /* regular object */ - int n_shift, n_rem, - r_zeros_number; + int n_shift, n_rem, r_zeros_number; const char *r_body; /* Calculate number of bytes which must be shifted from appended item */ - if ((n_shift = - tb->rbytes - - tb->insert_size[0]) < 0) + if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0) n_shift = 0; - RFALSE(pos_in_item != - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)), + RFALSE(pos_in_item != ih_item_len + (B_N_PITEM_HEAD(tbS0, item_pos)), "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", - pos_in_item, - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos))); - - leaf_shift_right(tb, - tb->rnum[0], - n_shift); + pos_in_item, ih_item_len + (B_N_PITEM_HEAD(tbS0, item_pos))); + + leaf_shift_right(tb, tb->rnum[0], n_shift); /* Calculate number of bytes which must remain in body after appending to R[0] */ - if ((n_rem = - tb->insert_size[0] - - tb->rbytes) < 0) + if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0) n_rem = 0; { int version; - unsigned long temp_rem = - n_rem; - - version = - ih_version - (B_N_PITEM_HEAD - (tb->R[0], 0)); - if (is_indirect_le_key - (version, - B_N_PKEY(tb->R[0], - 0))) { - temp_rem = - n_rem << - (tb->tb_sb-> - s_blocksize_bits - - - UNFM_P_SHIFT); + unsigned long temp_rem = n_rem; + + version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0)); + if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) { + temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT); } - set_le_key_k_offset - (version, - B_N_PKEY(tb->R[0], - 0), - le_key_k_offset - (version, - B_N_PKEY(tb->R[0], - 0)) + - temp_rem); - set_le_key_k_offset - (version, - B_N_PDELIM_KEY(tb-> - CFR - [0], - tb-> - rkey - [0]), - le_key_k_offset - (version, - B_N_PDELIM_KEY - (tb->CFR[0], - tb->rkey[0])) + - temp_rem); + set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0), + le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem); + set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]), + le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem); } /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ - do_balance_mark_internal_dirty - (tb, tb->CFR[0], 0); + do_balance_mark_internal_dirty(tb, tb->CFR[0], 0); /* Append part of body into R[0] */ buffer_info_init_right(tb, &bi); if (n_rem > zeros_num) { r_zeros_number = 0; - r_body = - body + n_rem - - zeros_num; + r_body = body + n_rem - zeros_num; } else { r_body = body; - r_zeros_number = - zeros_num - n_rem; - zeros_num -= - r_zeros_number; + r_zeros_number = zeros_num - n_rem; + zeros_num -= r_zeros_number; } - leaf_paste_in_buffer(&bi, 0, - n_shift, - tb-> - insert_size - [0] - - n_rem, - r_body, - r_zeros_number); - - if (is_indirect_le_ih - (B_N_PITEM_HEAD - (tb->R[0], 0))) { + leaf_paste_in_buffer(&bi, 0, n_shift, + tb->insert_size[0] - n_rem, + r_body, r_zeros_number); + + if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) { #if 0 RFALSE(n_rem, "PAP-12160: paste more than one unformatted node pointer"); #endif - set_ih_free_space - (B_N_PITEM_HEAD - (tb->R[0], 0), 0); + set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0); } tb->insert_size[0] = n_rem; if (!n_rem) @@ -1044,58 +722,28 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h struct item_head *pasted; - ret_val = - leaf_shift_right(tb, tb->rnum[0], - tb->rbytes); + ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes); /* append item in R[0] */ if (pos_in_item >= 0) { buffer_info_init_right(tb, &bi); - leaf_paste_in_buffer(&bi, - item_pos - - n + - tb-> - rnum[0], - pos_in_item, - tb-> - insert_size - [0], body, - zeros_num); + leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item, + tb->insert_size[0], body, zeros_num); } /* paste new entry, if item is directory item */ - pasted = - B_N_PITEM_HEAD(tb->R[0], - item_pos - n + - tb->rnum[0]); - if (is_direntry_le_ih(pasted) - && pos_in_item >= 0) { - leaf_paste_entries(&bi, - item_pos - - n + - tb->rnum[0], - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] - ); + pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]); + if (is_direntry_le_ih(pasted) && pos_in_item >= 0) { + leaf_paste_entries(&bi, item_pos - n + tb->rnum[0], + pos_in_item, 1, + (struct reiserfs_de_head *) body, + body + DEH_SIZE, tb->insert_size[0]); if (!pos_in_item) { - RFALSE(item_pos - n + - tb->rnum[0], + RFALSE(item_pos - n + tb->rnum[0], "PAP-12165: directory item must be first item of node when pasting is in 0th position"); /* update delimiting keys */ - replace_key(tb, - tb->CFR[0], - tb->rkey[0], - tb->R[0], - 0); + replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); } } @@ -1111,22 +759,16 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h default: /* cases d and t */ reiserfs_panic(tb->tb_sb, "PAP-12175", "rnum > 0: unexpected mode: %s(%d)", - (flag == - M_DELETE) ? "DELETE" : ((flag == - M_CUT) ? "CUT" - : "UNKNOWN"), - flag); + (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); } } /* tb->rnum[0] > 0 */ RFALSE(tb->blknum[0] > 3, - "PAP-12180: blknum can not be %d. It must be <= 3", - tb->blknum[0]); + "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); RFALSE(tb->blknum[0] < 0, - "PAP-12185: blknum can not be %d. It must be >= 0", - tb->blknum[0]); + "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); /* if while adding to a node we discover that it is possible to split it in two, and merge the left part into the left neighbor and the @@ -1177,8 +819,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ - int old_key_comp, old_len, - r_zeros_number; + int old_key_comp, old_len, r_zeros_number; const char *r_body; int version; @@ -1192,15 +833,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h old_len = ih_item_len(ih); /* Calculate key component and item length to insert into S_new[i] */ - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - ((old_len - - sbytes[i]) << - (is_indirect_le_ih - (ih) ? tb->tb_sb-> - s_blocksize_bits - - UNFM_P_SHIFT : - 0))); + set_le_ih_k_offset(ih, le_ih_k_offset(ih) + + ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); put_ih_item_len(ih, sbytes[i]); @@ -1209,39 +843,29 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h if ((old_len - sbytes[i]) > zeros_num) { r_zeros_number = 0; - r_body = - body + (old_len - - sbytes[i]) - - zeros_num; + r_body = body + (old_len - sbytes[i]) - zeros_num; } else { r_body = body; - r_zeros_number = - zeros_num - (old_len - - sbytes[i]); + r_zeros_number = zeros_num - (old_len - sbytes[i]); zeros_num -= r_zeros_number; } - leaf_insert_into_buf(&bi, 0, ih, r_body, - r_zeros_number); + leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number); /* Calculate key component and item length to insert into S[i] */ set_le_ih_k_offset(ih, old_key_comp); - put_ih_item_len(ih, - old_len - sbytes[i]); + put_ih_item_len(ih, old_len - sbytes[i]); tb->insert_size[0] -= sbytes[i]; } else { /* whole new item falls into S_new[i] */ /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - snum[i] - 1, sbytes[i], - S_new[i]); + snum[i] - 1, sbytes[i], S_new[i]); /* Insert new item into S_new[i] */ buffer_info_init_bh(tb, &bi, S_new[i]); - leaf_insert_into_buf(&bi, - item_pos - n + - snum[i] - 1, ih, - body, zeros_num); + leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1, + ih, body, zeros_num); zeros_num = tb->insert_size[0] = 0; } @@ -1268,150 +892,73 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h int entry_count; - entry_count = - ih_entry_count(aux_ih); + entry_count = ih_entry_count(aux_ih); - if (entry_count - sbytes[i] < - pos_in_item - && pos_in_item <= - entry_count) { + if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) { /* new directory entry falls into S_new[i] */ - RFALSE(!tb-> - insert_size[0], - "PAP-12215: insert_size is already 0"); - RFALSE(sbytes[i] - 1 >= - entry_count, + RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0"); + RFALSE(sbytes[i] - 1 >= entry_count, "PAP-12220: there are no so much entries (%d), only %d", - sbytes[i] - 1, - entry_count); + sbytes[i] - 1, entry_count); /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ - leaf_move_items - (LEAF_FROM_S_TO_SNEW, - tb, snum[i], - sbytes[i] - 1, - S_new[i]); + leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]); /* Paste given directory entry to directory item */ buffer_info_init_bh(tb, &bi, S_new[i]); - leaf_paste_in_buffer - (&bi, 0, - pos_in_item - - entry_count + - sbytes[i] - 1, - tb->insert_size[0], - body, zeros_num); + leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, + tb->insert_size[0], body, zeros_num); /* paste new directory entry */ - leaf_paste_entries(&bi, - 0, - pos_in_item - - - entry_count - + - sbytes - [i] - - 1, 1, - (struct - reiserfs_de_head - *) - body, - body - + - DEH_SIZE, - tb-> - insert_size - [0] - ); + leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1, + (struct reiserfs_de_head *) body, + body + DEH_SIZE, tb->insert_size[0]); tb->insert_size[0] = 0; pos_in_item++; } else { /* new directory entry doesn't fall into S_new[i] */ - leaf_move_items - (LEAF_FROM_S_TO_SNEW, - tb, snum[i], - sbytes[i], - S_new[i]); + leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]); } } else { /* regular object */ - int n_shift, n_rem, - r_zeros_number; + int n_shift, n_rem, r_zeros_number; const char *r_body; - RFALSE(pos_in_item != - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)) - || tb->insert_size[0] <= - 0, + RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0, "PAP-12225: item too short or insert_size <= 0"); /* Calculate number of bytes which must be shifted from appended item */ - n_shift = - sbytes[i] - - tb->insert_size[0]; + n_shift = sbytes[i] - tb->insert_size[0]; if (n_shift < 0) n_shift = 0; - leaf_move_items - (LEAF_FROM_S_TO_SNEW, tb, - snum[i], n_shift, - S_new[i]); + leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); /* Calculate number of bytes which must remain in body after append to S_new[i] */ - n_rem = - tb->insert_size[0] - - sbytes[i]; + n_rem = tb->insert_size[0] - sbytes[i]; if (n_rem < 0) n_rem = 0; /* Append part of body into S_new[0] */ buffer_info_init_bh(tb, &bi, S_new[i]); if (n_rem > zeros_num) { r_zeros_number = 0; - r_body = - body + n_rem - - zeros_num; + r_body = body + n_rem - zeros_num; } else { r_body = body; - r_zeros_number = - zeros_num - n_rem; - zeros_num -= - r_zeros_number; + r_zeros_number = zeros_num - n_rem; + zeros_num -= r_zeros_number; } - leaf_paste_in_buffer(&bi, 0, - n_shift, - tb-> - insert_size - [0] - - n_rem, - r_body, - r_zeros_number); + leaf_paste_in_buffer(&bi, 0, n_shift, + tb->insert_size[0] - n_rem, + r_body, r_zeros_number); { struct item_head *tmp; - tmp = - B_N_PITEM_HEAD(S_new - [i], - 0); + tmp = B_N_PITEM_HEAD(S_new[i], 0); if (is_indirect_le_ih (tmp)) { - set_ih_free_space - (tmp, 0); - set_le_ih_k_offset - (tmp, - le_ih_k_offset - (tmp) + - (n_rem << - (tb-> - tb_sb-> - s_blocksize_bits - - - UNFM_P_SHIFT))); + set_ih_free_space(tmp, 0); + set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT))); } else { - set_le_ih_k_offset - (tmp, - le_ih_k_offset - (tmp) + - n_rem); + set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem); } } @@ -1426,8 +973,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h struct item_head *pasted; #ifdef CONFIG_REISERFS_CHECK - struct item_head *ih_check = - B_N_PITEM_HEAD(tbS0, item_pos); + struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos); if (!is_direntry_le_ih(ih_check) && (pos_in_item != ih_item_len(ih_check) @@ -1439,8 +985,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h "to ih_item_len"); #endif /* CONFIG_REISERFS_CHECK */ - leaf_mi = - leaf_move_items(LEAF_FROM_S_TO_SNEW, + leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); @@ -1452,30 +997,19 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h /* paste into item */ buffer_info_init_bh(tb, &bi, S_new[i]); leaf_paste_in_buffer(&bi, - item_pos - n + - snum[i], + item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num); - pasted = - B_N_PITEM_HEAD(S_new[i], - item_pos - n + - snum[i]); + pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]); if (is_direntry_le_ih(pasted)) { leaf_paste_entries(&bi, - item_pos - - n + snum[i], - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] + item_pos - n + snum[i], + pos_in_item, 1, + (struct reiserfs_de_head *)body, + body + DEH_SIZE, + tb->insert_size[0] ); } @@ -1495,11 +1029,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h default: /* cases d and t */ reiserfs_panic(tb->tb_sb, "PAP-12245", "blknum > 2: unexpected mode: %s(%d)", - (flag == - M_DELETE) ? "DELETE" : ((flag == - M_CUT) ? "CUT" - : "UNKNOWN"), - flag); + (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); } memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); @@ -1524,9 +1054,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h /* If we insert the first key change the delimiting key */ if (item_pos == 0) { if (tb->CFL[0]) /* can be 0 in reiserfsck */ - replace_key(tb, tb->CFL[0], tb->lkey[0], - tbS0, 0); - + replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); } break; @@ -1536,53 +1064,27 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h pasted = B_N_PITEM_HEAD(tbS0, item_pos); /* when directory, may be new entry already pasted */ if (is_direntry_le_ih(pasted)) { - if (pos_in_item >= 0 && - pos_in_item <= - ih_entry_count(pasted)) { + if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) { RFALSE(!tb->insert_size[0], "PAP-12260: insert_size is 0 already"); /* prepare space */ buffer_info_init_tbS0(tb, &bi); - leaf_paste_in_buffer(&bi, - item_pos, - pos_in_item, - tb-> - insert_size - [0], body, + leaf_paste_in_buffer(&bi, item_pos, pos_in_item, + tb->insert_size[0], body, zeros_num); /* paste entry */ - leaf_paste_entries(&bi, - item_pos, - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] - ); + leaf_paste_entries(&bi, item_pos, pos_in_item, 1, + (struct reiserfs_de_head *)body, + body + DEH_SIZE, + tb->insert_size[0]); if (!item_pos && !pos_in_item) { - RFALSE(!tb->CFL[0] - || !tb->L[0], + RFALSE(!tb->CFL[0] || !tb->L[0], "PAP-12270: CFL[0]/L[0] must be specified"); - if (tb->CFL[0]) { - replace_key(tb, - tb-> - CFL - [0], - tb-> - lkey - [0], - tbS0, - 0); - - } + if (tb->CFL[0]) + replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); } tb->insert_size[0] = 0; } @@ -1593,13 +1095,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h "PAP-12275: insert size must not be %d", tb->insert_size[0]); buffer_info_init_tbS0(tb, &bi); - leaf_paste_in_buffer(&bi, - item_pos, - pos_in_item, - tb-> - insert_size - [0], body, - zeros_num); + leaf_paste_in_buffer(&bi, item_pos, pos_in_item, + tb->insert_size[0], body, zeros_num); if (is_indirect_le_ih(pasted)) { #if 0 @@ -1611,8 +1108,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h tb-> insert_size[0]); #endif - set_ih_free_space - (pasted, 0); + set_ih_free_space(pasted, 0); } tb->insert_size[0] = 0; } @@ -1620,8 +1116,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h else { if (tb->insert_size[0]) { print_cur_tb("12285"); - reiserfs_panic(tb-> - tb_sb, + reiserfs_panic(tb->tb_sb, "PAP-12285", "insert_size " "must be 0 " -- cgit v1.1 From 7026f1929e18921fd67bf478f475a8fdfdff16ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Feb 2014 15:01:47 +0000 Subject: FS-Cache: Handle removal of unadded object to the fscache_object_list rb tree When FS-Cache allocates an object, the following sequence of events can occur: -->fscache_alloc_object() -->cachefiles_alloc_object() [via cache->ops->alloc_object] <--[returns new object] -->fscache_attach_object() <--[failed] -->cachefiles_put_object() [via cache->ops->put_object] -->fscache_object_destroy() -->fscache_objlist_remove() -->rb_erase() to remove the object from fscache_object_list. resulting in a crash in the rbtree code. The problem is that the object is only added to fscache_object_list on the success path of fscache_attach_object() where it calls fscache_objlist_add(). So if fscache_attach_object() fails, the object won't have been added to the objlist rbtree. We do, however, unconditionally try to remove the object from the tree. Thanks to NeilBrown for finding this and suggesting this solution. Reported-by: NeilBrown Signed-off-by: David Howells Tested-by: (a customer of) NeilBrown Signed-off-by: Linus Torvalds --- fs/fscache/object-list.c | 5 +++++ fs/fscache/object.c | 3 +++ 2 files changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index e1959ef..b5ebc2d 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -50,6 +50,8 @@ void fscache_objlist_add(struct fscache_object *obj) struct fscache_object *xobj; struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL; + ASSERT(RB_EMPTY_NODE(&obj->objlist_link)); + write_lock(&fscache_object_list_lock); while (*p) { @@ -75,6 +77,9 @@ void fscache_objlist_add(struct fscache_object *obj) */ void fscache_objlist_remove(struct fscache_object *obj) { + if (RB_EMPTY_NODE(&obj->objlist_link)) + return; + write_lock(&fscache_object_list_lock); BUG_ON(RB_EMPTY_ROOT(&fscache_object_list)); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 53d35c5..d3b4539 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -314,6 +314,9 @@ void fscache_object_init(struct fscache_object *object, object->cache = cache; object->cookie = cookie; object->parent = NULL; +#ifdef CONFIG_FSCACHE_OBJECT_LIST + RB_CLEAR_NODE(&object->objlist_link); +#endif object->oob_event_mask = 0; for (t = object->oob_table; t->events; t++) -- cgit v1.1 From 92e3b40537707001d17bbad800d150ab04e53bf4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Feb 2014 20:33:01 -0500 Subject: jbd2: fix use after free in jbd2_journal_start_reserved() If start_this_handle() fails then it leads to a use after free of "handle". Signed-off-by: Dan Carpenter Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/jbd2/transaction.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8360674..60bb365 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -514,11 +514,13 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, * similarly constrained call sites */ ret = start_this_handle(journal, handle, GFP_NOFS); - if (ret < 0) + if (ret < 0) { jbd2_journal_free_reserved(handle); + return ret; + } handle->h_type = type; handle->h_line_no = line_no; - return ret; + return 0; } EXPORT_SYMBOL(jbd2_journal_start_reserved); -- cgit v1.1 From 45a22f4c11fef4ecd5c61c0a299cd3f23d77be8e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 Feb 2014 13:09:50 +0100 Subject: inotify: Fix reporting of cookies for inotify events My rework of handling of notification events (namely commit 7053aee26a35 "fsnotify: do not share events between notification groups") broke sending of cookies with inotify events. We didn't propagate the value passed to fsnotify() properly and passed 4 uninitialized bytes to userspace instead (so it is also an information leak). Sadly I didn't notice this during my testing because inotify cookies aren't used very much and LTP inotify tests ignore them. Fix the problem by passing the cookie value properly. Fixes: 7053aee26a3548ebaba046ae2e52396ccf56ac6c Reported-by: Vegard Nossum Signed-off-by: Jan Kara --- fs/notify/dnotify/dnotify.c | 2 +- fs/notify/fanotify/fanotify.c | 2 +- fs/notify/fsnotify.c | 2 +- fs/notify/inotify/inotify.h | 2 +- fs/notify/inotify/inotify_fsnotify.c | 3 ++- fs/notify/inotify/inotify_user.c | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 0b9ff43..abc8cbc 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -86,7 +86,7 @@ static int dnotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 0e792f5..205dc21 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -147,7 +147,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *fanotify_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { int ret = 0; struct fanotify_event_info *event; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 1d4e1ea..9d3e9c5 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -179,7 +179,7 @@ static int send_to_group(struct inode *to_tell, return group->ops->handle_event(group, to_tell, inode_mark, vfsmount_mark, mask, data, data_is, - file_name); + file_name, cookie); } /* diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 485eef3..ed855ef 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,6 +27,6 @@ extern int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name); + const unsigned char *file_name, u32 cookie); extern const struct fsnotify_ops inotify_fsnotify_ops; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index d5ee563..43ab1e1 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -67,7 +67,7 @@ int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -103,6 +103,7 @@ int inotify_handle_event(struct fsnotify_group *group, fsn_event = &event->fse; fsnotify_init_event(fsn_event, inode, mask); event->wd = i_mark->wd; + event->sync_cookie = cookie; event->name_len = len; if (len) strcpy(event->name, file_name); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 497395c..6528b5a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -495,7 +495,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* Queue ignore event for the watch */ inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL); + NULL, FSNOTIFY_EVENT_NONE, NULL, 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ -- cgit v1.1 From 7a01e707a324a4585949ca3df6c7f7485d8783f2 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 19 Feb 2014 15:33:05 +1100 Subject: xfs: xfs_sb_read_verify() doesn't flag bad crcs on primary sb My earlier commit 10e6e65 deserves a layer or two of brown paper bags. The logic in that commit means that a CRC failure on the primary superblock will *never* result in an error return. Hopefully this fixes it, so that we always return the error if it's a primary superblock, otherwise only if the filesystem has CRCs enabled. Signed-off-by: Eric Sandeen Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_sb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index b7c9aea..5071ccb 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -614,7 +614,7 @@ xfs_sb_read_verify( if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), offsetof(struct xfs_sb, sb_crc))) { /* Only fail bad secondaries on a known V5 filesystem */ - if (bp->b_bn != XFS_SB_DADDR && + if (bp->b_bn == XFS_SB_DADDR || xfs_sb_version_hascrc(&mp->m_sb)) { error = EFSCORRUPTED; goto out_error; -- cgit v1.1 From daba5427dad6b260256053f914de2c0b79f7a79f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 19 Feb 2014 15:39:16 +1100 Subject: xfs: skip verification on initial "guess" superblock read When xfs_readsb() does the very first read of the superblock, it makes a guess at the length of the buffer, based on the sector size of the underlying storage. This may or may not match the filesystem sector size in sb_sectsize, so we can't i.e. do a CRC check on it; it might be too short. In fact, mounting a filesystem with sb_sectsize larger than the device sector size will cause a mount failure if CRCs are enabled, because we are checksumming a length which exceeds the buffer passed to it. So always read twice; the first time we read with NULL buffer ops to skip verification; then set the proper read length, hook up the proper verifier, and give it another go. Once we are sure that we've got the right buffer length, we can also use bp->b_length in the xfs_sb_read_verify, rather than the less-trusted on-disk sectorsize for secondary superblocks. Before this we ran the risk of passing junk to the crc32c routines, which didn't always handle extreme values. Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.c | 24 ++++++++++++++++-------- fs/xfs/xfs_sb.c | 3 +-- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 02df7b4..f96c056 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -282,22 +282,29 @@ xfs_readsb( struct xfs_sb *sbp = &mp->m_sb; int error; int loud = !(flags & XFS_MFSI_QUIET); + const struct xfs_buf_ops *buf_ops; ASSERT(mp->m_sb_bp == NULL); ASSERT(mp->m_ddev_targp != NULL); /* + * For the initial read, we must guess at the sector + * size based on the block device. It's enough to + * get the sb_sectsize out of the superblock and + * then reread with the proper length. + * We don't verify it yet, because it may not be complete. + */ + sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); + buf_ops = NULL; + + /* * Allocate a (locked) buffer to hold the superblock. * This will be kept around at all times to optimize * access to the superblock. */ - sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - reread: bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), 0, - loud ? &xfs_sb_buf_ops - : &xfs_sb_quiet_buf_ops); + BTOBB(sector_size), 0, buf_ops); if (!bp) { if (loud) xfs_warn(mp, "SB buffer read failed"); @@ -328,12 +335,13 @@ reread: } /* - * If device sector size is smaller than the superblock size, - * re-read the superblock so the buffer is correctly sized. + * Re-read the superblock so the buffer is correctly sized, + * and properly verified. */ - if (sector_size < sbp->sb_sectsize) { + if (buf_ops == NULL) { xfs_buf_relse(bp); sector_size = sbp->sb_sectsize; + buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops; goto reread; } diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index 5071ccb..359b19a 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -611,7 +611,7 @@ xfs_sb_read_verify( XFS_SB_VERSION_5) || dsb->sb_crc != 0)) { - if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), + if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), offsetof(struct xfs_sb, sb_crc))) { /* Only fail bad secondaries on a known V5 filesystem */ if (bp->b_bn == XFS_SB_DADDR || @@ -644,7 +644,6 @@ xfs_sb_quiet_read_verify( { struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); - if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ xfs_sb_read_verify(bp); -- cgit v1.1 From 5ef11eb0700f806c4671ba33e5befa784a2f70ef Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 19 Feb 2014 15:39:35 +1100 Subject: xfs: limit superblock corruption errors to actual corruption Today, if xfs_sb_read_verify xfs_sb_verify xfs_mount_validate_sb detects superblock corruption, it'll be extremely noisy, dumping 2 stacks, 2 hexdumps, etc. This is because we call XFS_CORRUPTION_ERROR in xfs_mount_validate_sb as well as in xfs_sb_read_verify. Also, *any* errors in xfs_mount_validate_sb which are not corruption per se; things like too-big-blocksize, bad version, bad magic, v1 dirs, rw-incompat etc - things which do not return EFSCORRUPTED - will still do the whole XFS_CORRUPTION_ERROR spew when xfs_sb_read_verify sees any error at all. And it suggests to the user that they should run xfs_repair, even if the root cause of the mount failure is a simple incompatibility. I'll submit that the probably-not-corrupted errors don't warrant this much noise, so this patch removes the warning for anything other than EFSCORRUPTED returns, and replaces the lower-level XFS_CORRUPTION_ERROR with an xfs_notice(). Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_sb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index 359b19a..1e11679 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -295,8 +295,7 @@ xfs_mount_validate_sb( sbp->sb_dblocks == 0 || sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { - XFS_CORRUPTION_ERROR("SB sanity check failed", - XFS_ERRLEVEL_LOW, mp, sbp); + xfs_notice(mp, "SB sanity check failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -625,7 +624,7 @@ xfs_sb_read_verify( out_error: if (error) { - if (error != EWRONGFS) + if (error == EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, error); -- cgit v1.1 From 146d70caaa1b87f64597743429d7da4b8073d0c9 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 18 Feb 2014 10:36:05 -0500 Subject: NFS fix error return in nfs4_select_rw_stateid Do not return an error when nfs4_copy_delegation_stateid succeeds. Signed-off-by: Andy Adamson Link: http://lkml.kernel.org/r/1392737765-41942-1-git-send-email-andros@netapp.com Fixes: ef1820f9be27b (NFSv4: Don't try to recover NFSv4 locks when...) Cc: NeilBrown Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e5be725..e1a4721 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1015,8 +1015,11 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, if (ret == -EIO) /* A lost lock - don't even consider delegations */ goto out; - if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) + /* returns true if delegation stateid found and copied */ + if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) { + ret = 0; goto out; + } if (ret != -ENOENT) /* nfs4_copy_delegation_stateid() didn't over-write * dst, so it still has the lock stateid which we now -- cgit v1.1 From 09ebb17ab476b6ac1cc07b53d07e88f4d31ee4d3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 18 Feb 2014 12:00:21 +0100 Subject: udf: Fix data corruption on file type conversion UDF has two types of files - files with data stored in inode (ICB in UDF terminology) and files with data stored in external data blocks. We convert file from in-inode format to external format in udf_file_aio_write() when we find out data won't fit into inode any longer. However the following race between two O_APPEND writes can happen: CPU1 CPU2 udf_file_aio_write() udf_file_aio_write() down_write(&iinfo->i_data_sem); checks that i_size + count1 fits within inode => no need to convert up_write(&iinfo->i_data_sem); down_write(&iinfo->i_data_sem); checks that i_size + count2 fits within inode => no need to convert up_write(&iinfo->i_data_sem); generic_file_aio_write() - extends file by count1 bytes generic_file_aio_write() - extends file by count2 bytes Clearly if count1 + count2 doesn't fit into the inode, we overwrite kernel buffers beyond inode, possibly corrupting the filesystem as well. Fix the problem by acquiring i_mutex before checking whether write fits into the inode and using __generic_file_aio_write() afterwards which puts check and write into one critical section. Reported-by: Al Viro Signed-off-by: Jan Kara --- fs/udf/file.c | 14 ++++++++++++-- fs/udf/inode.c | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/udf/file.c b/fs/udf/file.c index c02a27a..1037637 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -144,6 +144,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, size_t count = iocb->ki_nbytes; struct udf_inode_info *iinfo = UDF_I(inode); + mutex_lock(&inode->i_mutex); down_write(&iinfo->i_data_sem); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { if (file->f_flags & O_APPEND) @@ -156,6 +157,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos + count)) { err = udf_expand_file_adinicb(inode); if (err) { + mutex_unlock(&inode->i_mutex); udf_debug("udf_expand_adinicb: err=%d\n", err); return err; } @@ -169,9 +171,17 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } else up_write(&iinfo->i_data_sem); - retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); - if (retval > 0) + retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + + if (retval > 0) { + ssize_t err; + mark_inode_dirty(inode); + err = generic_write_sync(file, iocb->ki_pos - retval, retval); + if (err < 0) + retval = err; + } return retval; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 062b792..982ce05 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -265,6 +265,7 @@ int udf_expand_file_adinicb(struct inode *inode) .nr_to_write = 1, }; + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); if (!iinfo->i_lenAlloc) { if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; -- cgit v1.1 From 1362f4ea20fa63688ba6026e586d9746ff13a846 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 20 Feb 2014 17:02:27 +0100 Subject: quota: Fix race between dqput() and dquot_scan_active() Currently last dqput() can race with dquot_scan_active() causing it to call callback for an already deactivated dquot. The race is as follows: CPU1 CPU2 dqput() spin_lock(&dq_list_lock); if (atomic_read(&dquot->dq_count) > 1) { - not taken if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); ->release_dquot(dquot); if (atomic_read(&dquot->dq_count) > 1) - not taken dquot_scan_active() spin_lock(&dq_list_lock); if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) - not taken atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); - proceeds to release dquot ret = fn(dquot, priv); - called for inactive dquot Fix the problem by making sure possible ->release_dquot() is finished by the time we call the callback and new calls to it will notice reference dquot_scan_active() has taken and bail out. CC: stable@vger.kernel.org # >= 2.6.29 Signed-off-by: Jan Kara --- fs/quota/dquot.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 831d49a..cfc8dcc 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb, dqstats_inc(DQST_LOOKUPS); dqput(old_dquot); old_dquot = dquot; - ret = fn(dquot, priv); - if (ret < 0) - goto out; + /* + * ->release_dquot() can be racing with us. Our reference + * protects us from new calls to it so just wait for any + * outstanding call and recheck the DQ_ACTIVE_B after that. + */ + wait_on_dquot(dquot); + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + ret = fn(dquot, priv); + if (ret < 0) + goto out; + } spin_lock(&dq_list_lock); /* We are safe to continue now because our dquot could not * be moved out of the inuse list while we hold the reference */ -- cgit v1.1 From 0dc83bd30b0bf5410c0933cfbbf8853248eff0a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 11:19:04 +0100 Subject: Revert "writeback: do not sync data dirtied after sync start" This reverts commit c4a391b53a72d2df4ee97f96f78c1d5971b47489. Dave Chinner has reported the commit may cause some inodes to be left out from sync(2). This is because we can call redirty_tail() for some inode (which sets i_dirtied_when to current time) after sync(2) has started or similarly requeue_inode() can set i_dirtied_when to current time if writeback had to skip some pages. The real problem is in the functions clobbering i_dirtied_when but fixing that isn't trivial so revert is a safer choice for now. CC: stable@vger.kernel.org # >= 3.13 Signed-off-by: Jan Kara --- fs/fs-writeback.c | 33 +++++++++++---------------------- fs/sync.c | 15 ++++++--------- fs/xfs/xfs_super.c | 2 +- 3 files changed, 18 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e0259a1..d754e3c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -40,18 +40,13 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; - /* - * Write only inodes dirtied before this time. Don't forget to set - * older_than_this_is_set when you set this. - */ - unsigned long older_than_this; + unsigned long *older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ - unsigned int older_than_this_is_set:1; enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -252,10 +247,10 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; - WARN_ON_ONCE(!work->older_than_this_is_set); while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (inode_dirtied_after(inode, work->older_than_this)) + if (work->older_than_this && + inode_dirtied_after(inode, *work->older_than_this)) break; list_move(&inode->i_wb_list, &tmp); moved++; @@ -742,8 +737,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = reason, - .older_than_this = jiffies, - .older_than_this_is_set = 1, }; spin_lock(&wb->list_lock); @@ -802,13 +795,12 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; + unsigned long oldest_jif; struct inode *inode; long progress; - if (!work->older_than_this_is_set) { - work->older_than_this = jiffies; - work->older_than_this_is_set = 1; - } + oldest_jif = jiffies; + work->older_than_this = &oldest_jif; spin_lock(&wb->list_lock); for (;;) { @@ -842,10 +834,10 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - work->older_than_this = jiffies - + oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - work->older_than_this = jiffies; + oldest_jif = jiffies; trace_writeback_start(wb->bdi, work); if (list_empty(&wb->b_io)) @@ -1357,21 +1349,18 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages - * @sb: the superblock - * @older_than_this: timestamp + * @sb: the superblock * * This function writes and waits on any dirty inode belonging to this - * superblock that has been dirtied before given timestamp. + * super_block. */ -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this) +void sync_inodes_sb(struct super_block *sb) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, - .older_than_this = older_than_this, - .older_than_this_is_set = 1, .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, diff --git a/fs/sync.c b/fs/sync.c index e8ba024..b28d1dd 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,11 +27,10 @@ * wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. */ -static int __sync_filesystem(struct super_block *sb, int wait, - unsigned long start) +static int __sync_filesystem(struct super_block *sb, int wait) { if (wait) - sync_inodes_sb(sb, start); + sync_inodes_sb(sb); else writeback_inodes_sb(sb, WB_REASON_SYNC); @@ -48,7 +47,6 @@ static int __sync_filesystem(struct super_block *sb, int wait, int sync_filesystem(struct super_block *sb) { int ret; - unsigned long start = jiffies; /* * We need to be protected against the filesystem going from @@ -62,17 +60,17 @@ int sync_filesystem(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - ret = __sync_filesystem(sb, 0, start); + ret = __sync_filesystem(sb, 0); if (ret < 0) return ret; - return __sync_filesystem(sb, 1, start); + return __sync_filesystem(sb, 1); } EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - sync_inodes_sb(sb, *((unsigned long *)arg)); + sync_inodes_sb(sb); } static void sync_fs_one_sb(struct super_block *sb, void *arg) @@ -104,10 +102,9 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; - unsigned long start = jiffies; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(sync_inodes_one_sb, &start); + iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f317488..d971f49 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -913,7 +913,7 @@ xfs_flush_inodes( struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb, jiffies); + sync_inodes_sb(sb); up_read(&sb->s_umount); } } -- cgit v1.1 From 2513190a926f093dbdc301c68e6ade0bcf293f9a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 19:02:34 +0100 Subject: fsnotify: Fix detection whether overflow event is queued Currently we didn't initialize event's list head when we removed it from the event list. Thus a detection whether overflow event is already queued wasn't working. Fix it by always initializing the list head when deleting event from a list. Signed-off-by: Jan Kara --- fs/notify/notification.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 18b3c44..6bec2f4 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -132,7 +132,11 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group event = list_first_entry(&group->notification_list, struct fsnotify_event, list); - list_del(&event->list); + /* + * We need to init list head for the case of overflow event so that + * check in fsnotify_add_notify_events() works + */ + list_del_init(&event->list); group->q_len--; return event; -- cgit v1.1 From 482ef06c5e946aae360f247dc69471ec031e09d2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 19:07:54 +0100 Subject: fanotify: Handle overflow in case of permission events If the event queue overflows when we are handling permission event, we will never get response from userspace. So we must avoid waiting for it. Change fsnotify_add_notify_event() to return whether overflow has happened so that we can detect it in fanotify_handle_event() and act accordingly. Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 6 ++++-- fs/notify/notification.c | 14 ++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 205dc21..dc638f7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -192,10 +192,12 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); if (ret) { - BUG_ON(mask & FAN_ALL_PERM_EVENTS); + /* Permission events shouldn't be merged */ + BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); - ret = 0; + + return 0; } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 6bec2f4..6a4ba17 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -80,7 +80,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, /* * Add an event to the group notification queue. The group can later pull this * event off the queue to deal with. The function returns 0 if the event was - * added to the queue, 1 if the event was merged with some other queued event. + * added to the queue, 1 if the event was merged with some other queued event, + * 2 if the queue of events has overflown. */ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, @@ -95,10 +96,14 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, mutex_lock(&group->notification_mutex); if (group->q_len >= group->max_events) { + ret = 2; /* Queue overflow event only if it isn't already queued */ - if (list_empty(&group->overflow_event.list)) - event = &group->overflow_event; - ret = 1; + if (!list_empty(&group->overflow_event.list)) { + mutex_unlock(&group->notification_mutex); + return ret; + } + event = &group->overflow_event; + goto queue; } if (!list_empty(list) && merge) { @@ -109,6 +114,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, } } +queue: group->q_len++; list_add_tail(&event->list, list); mutex_unlock(&group->notification_mutex); -- cgit v1.1 From ff57cd5863cf3014c1c5ed62ce2715294f065b17 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 19:14:11 +0100 Subject: fsnotify: Allocate overflow events with proper type Commit 7053aee26a35 "fsnotify: do not share events between notification groups" used overflow event statically allocated in a group with the size of the generic notification event. This causes problems because some code looks at type specific parts of event structure and gets confused by a random data it sees there and causes crashes. Fix the problem by allocating overflow event with type corresponding to the group type so code cannot get confused. Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 13 +++++++++++++ fs/notify/group.c | 8 +++++++- fs/notify/inotify/inotify_user.c | 12 ++++++++++++ fs/notify/notification.c | 4 ++-- 4 files changed, 34 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b6175fa..287a22c 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -698,6 +698,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct fsnotify_group *group; int f_flags, fd; struct user_struct *user; + struct fanotify_event_info *oevent; pr_debug("%s: flags=%d event_f_flags=%d\n", __func__, flags, event_f_flags); @@ -730,8 +731,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); + oevent = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); + if (unlikely(!oevent)) { + fd = -ENOMEM; + goto out_destroy_group; + } + group->overflow_event = &oevent->fse; + fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + oevent->tgid = get_pid(task_tgid(current)); + oevent->path.mnt = NULL; + oevent->path.dentry = NULL; + group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + oevent->response = 0; mutex_init(&group->fanotify_data.access_mutex); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); diff --git a/fs/notify/group.c b/fs/notify/group.c index ee674fe..ad19959 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -55,6 +55,13 @@ void fsnotify_destroy_group(struct fsnotify_group *group) /* clear the notification queue of all events */ fsnotify_flush_notify(group); + /* + * Destroy overflow event (we cannot use fsnotify_destroy_event() as + * that deliberately ignores overflow events. + */ + if (group->overflow_event) + group->ops->free_event(group->overflow_event); + fsnotify_put_group(group); } @@ -99,7 +106,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) INIT_LIST_HEAD(&group->marks_list); group->ops = ops; - fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW); return group; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 6528b5a..78a2ca3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -633,11 +633,23 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod static struct fsnotify_group *inotify_new_group(unsigned int max_events) { struct fsnotify_group *group; + struct inotify_event_info *oevent; group = fsnotify_alloc_group(&inotify_fsnotify_ops); if (IS_ERR(group)) return group; + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); + if (unlikely(!oevent)) { + fsnotify_destroy_group(group); + return ERR_PTR(-ENOMEM); + } + group->overflow_event = &oevent->fse; + fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + oevent->wd = -1; + oevent->sync_cookie = 0; + oevent->name_len = 0; + group->max_events = max_events; spin_lock_init(&group->inotify_data.idr_lock); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 6a4ba17..1e58402 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -98,11 +98,11 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, if (group->q_len >= group->max_events) { ret = 2; /* Queue overflow event only if it isn't already queued */ - if (!list_empty(&group->overflow_event.list)) { + if (!list_empty(&group->overflow_event->list)) { mutex_unlock(&group->notification_mutex); return ret; } - event = &group->overflow_event; + event = group->overflow_event; goto queue; } -- cgit v1.1 From fed95bab8d29b928fcf6225be72d37ded452e8a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 25 Feb 2014 19:28:44 +0800 Subject: sysfs: fix namespace refcnt leak As mount() and kill_sb() is not a one-to-one match, we shoudn't get ns refcnt unconditionally in sysfs_mount(), and instead we should get the refcnt only when kernfs_mount() allocated a new superblock. v2: - Changed the name of the new argument, suggested by Tejun. - Made the argument optional, suggested by Tejun. v3: - Make the new argument as second-to-last arg, suggested by Tejun. Signed-off-by: Li Zefan Acked-by: Tejun Heo --- fs/kernfs/mount.c | 8 +++++++- fs/sysfs/mount.c | 5 +++-- include/linux/kernfs.h | 9 +++++---- 3 files changed, 15 insertions(+), 7 deletions(-) Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 8 +++++++- fs/sysfs/mount.c | 5 +++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 0d6ce89..0f4152d 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -94,6 +94,7 @@ const void *kernfs_super_ns(struct super_block *sb) * @fs_type: file_system_type of the fs being mounted * @flags: mount flags specified for the mount * @root: kernfs_root of the hierarchy being mounted + * @new_sb_created: tell the caller if we allocated a new superblock * @ns: optional namespace tag of the mount * * This is to be called from each kernfs user's file_system_type->mount() @@ -104,7 +105,8 @@ const void *kernfs_super_ns(struct super_block *sb) * The return value can be passed to the vfs layer verbatim. */ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, const void *ns) + struct kernfs_root *root, bool *new_sb_created, + const void *ns) { struct super_block *sb; struct kernfs_super_info *info; @@ -122,6 +124,10 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, kfree(info); if (IS_ERR(sb)) return ERR_CAST(sb); + + if (new_sb_created) + *new_sb_created = !sb->s_root; + if (!sb->s_root) { error = kernfs_fill_super(sb); if (error) { diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 6211230..3eaf5c6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -27,6 +27,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, { struct dentry *root; void *ns; + bool new_sb; if (!(flags & MS_KERNMOUNT)) { if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) @@ -37,8 +38,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, } ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); - root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns); - if (IS_ERR(root)) + root = kernfs_mount_ns(fs_type, flags, sysfs_root, &new_sb, ns); + if (IS_ERR(root) || !new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); return root; } -- cgit v1.1