summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/inode.c3
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/afs/write.c3
-rw-r--r--fs/aio.c6
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/attr.c50
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c25
-rw-r--r--fs/block_dev.c88
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/buffer.c123
-rw-r--r--fs/ceph/auth.c7
-rw-r--r--fs/ceph/auth.h6
-rw-r--r--fs/ceph/auth_none.c8
-rw-r--r--fs/ceph/auth_x.c12
-rw-r--r--fs/ceph/caps.c97
-rw-r--r--fs/ceph/ceph_fs.h21
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/mds_client.c49
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/messenger.c6
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/mon_client.c7
-rw-r--r--fs/ceph/osd_client.c7
-rw-r--r--fs/ceph/osdmap.c2
-rw-r--r--fs/ceph/super.c16
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/file.c5
-rw-r--r--fs/coda/coda_int.h3
-rw-r--r--fs/coda/file.c4
-rw-r--r--fs/compat.c2
-rw-r--r--fs/configfs/inode.c14
-rw-r--r--fs/debugfs/file.c21
-rw-r--r--fs/direct-io.c61
-rw-r--r--fs/ecryptfs/file.c2
-rw-r--r--fs/ecryptfs/inode.c4
-rw-r--r--fs/exofs/file.c7
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/file.c7
-rw-r--r--fs/ext2/inode.c153
-rw-r--r--fs/ext2/super.c20
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext3/fsync.c4
-rw-r--r--fs/ext3/super.c38
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/fsync.c8
-rw-r--r--fs/ext4/inode.c40
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/super.c37
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c40
-rw-r--r--fs/fat/inode.c35
-rw-r--r--fs/fcntl.c7
-rw-r--r--fs/file_table.c21
-rw-r--r--fs/fs-writeback.c64
-rw-r--r--fs/fscache/page.c36
-rw-r--r--fs/fuse/dev.c527
-rw-r--r--fs/fuse/dir.c5
-rw-r--r--fs/fuse/file.c48
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/ops_inode.c5
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/hpfs/file.c4
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/dir.c127
-rw-r--r--fs/jffs2/file.c4
-rw-r--r--fs/jffs2/fs.c11
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/file.c4
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/super.c16
-rw-r--r--fs/libfs.c111
-rw-r--r--fs/logfs/file.c4
-rw-r--r--fs/logfs/logfs.h2
-rw-r--r--fs/minix/dir.c11
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/minix/itree_v2.c27
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ncpfs/file.c2
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/btree.h2
-rw-r--r--fs/nilfs2/file.c4
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/segbuf.h2
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ntfs/dir.c5
-rw-r--r--fs/ntfs/file.c9
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/ocfs2/super.c50
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/pipe.c102
-rw-r--r--fs/qnx4/dir.c2
-rw-r--r--fs/quota/dquot.c190
-rw-r--r--fs/quota/quota.c4
-rw-r--r--fs/ramfs/file-mmu.c3
-rw-r--r--fs/ramfs/file-nommu.c9
-rw-r--r--fs/reiserfs/dir.c8
-rw-r--r--fs/reiserfs/file.c5
-rw-r--r--fs/reiserfs/super.c48
-rw-r--r--fs/smbfs/file.c3
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/splice.c2
-rw-r--r--fs/super.c17
-rw-r--r--fs/sync.c10
-rw-r--r--fs/sysfs/inode.c8
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/ubifs/file.c17
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/udf/balloc.c43
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/file.c28
-rw-r--r--fs/udf/ialloc.c21
-rw-r--r--fs/udf/inode.c5
-rw-r--r--fs/udf/namei.c20
-rw-r--r--fs/udf/super.c13
-rw-r--r--fs/udf/udfdecl.h1
-rw-r--r--fs/ufs/balloc.c24
-rw-r--r--fs/ufs/dir.c2
-rw-r--r--fs/ufs/file.c5
-rw-r--r--fs/ufs/ialloc.c13
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/ufs/namei.c16
-rw-r--r--fs/ufs/super.c110
-rw-r--r--fs/ufs/truncate.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h356
-rw-r--r--fs/xfs/quota/xfs_qm.c4
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_iget.c29
-rw-r--r--fs/xfs/xfs_inode.c144
-rw-r--r--fs/xfs/xfs_log_recover.c11
-rw-r--r--fs/xfs/xfs_mount.c68
-rw-r--r--fs/xfs/xfs_rtalloc.c4
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_trans.c446
-rw-r--r--fs/xfs/xfs_trans.h411
-rw-r--r--fs/xfs/xfs_vnodeops.c2
166 files changed, 2607 insertions, 2043 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 25b300e..2bedc6c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -257,15 +257,13 @@ v9fs_file_write(struct file *filp, const char __user * data,
return total;
}
-static int v9fs_file_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int v9fs_file_fsync(struct file *filp, int datasync)
{
struct p9_fid *fid;
struct p9_wstat wstat;
int retval;
- P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp,
- dentry, datasync);
+ P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 23aa52f..f4287e4 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -197,7 +197,7 @@ const struct file_operations adfs_dir_operations = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
.readdir = adfs_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
static int
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 005ea34..a36da53 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -26,7 +26,7 @@ const struct file_operations adfs_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 0f5e309..6f850b0 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -322,8 +322,9 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
if (error)
goto out;
+ /* XXX: this is missing some actual on-disk truncation.. */
if (ia_valid & ATTR_SIZE)
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
if (error)
goto out;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 861dae6..f05b615 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -183,7 +183,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent
void affs_free_prealloc(struct inode *inode);
extern void affs_truncate(struct inode *);
-int affs_file_fsync(struct file *, struct dentry *, int);
+int affs_file_fsync(struct file *, int);
/* dir.c */
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 184e55c..322710c 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -916,9 +916,9 @@ affs_truncate(struct inode *inode)
affs_free_prealloc(inode);
}
-int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int affs_file_fsync(struct file *filp, int datasync)
{
- struct inode * inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int ret, err;
ret = write_inode_now(inode, 0);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 807f284..5f679b77 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -740,7 +740,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
extern int afs_writeback_all(struct afs_vnode *);
-extern int afs_fsync(struct file *, struct dentry *, int);
+extern int afs_fsync(struct file *, int);
/*****************************************************************************/
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f490995..9fdc7fe 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
memcpy(&server->addr, addr, sizeof(struct in_addr));
server->addr.s_addr = addr->s_addr;
+ _leave(" = %p{%d}", server, atomic_read(&server->usage));
+ } else {
+ _leave(" = NULL [nomem]");
}
-
- _leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3bed54a..3dab9e9 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -701,8 +701,9 @@ int afs_writeback_all(struct afs_vnode *vnode)
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
-int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int afs_fsync(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct afs_writeback *wb, *xwb;
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
int ret;
diff --git a/fs/aio.c b/fs/aio.c
index 48fdeeb..1ccf25c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -527,7 +527,7 @@ static void aio_fput_routine(struct work_struct *data)
/* Complete the fput(s) */
if (req->ki_filp != NULL)
- __fput(req->ki_filp);
+ fput(req->ki_filp);
/* Link the iocb into the context's free list */
spin_lock_irq(&ctx->ctx_lock);
@@ -560,11 +560,11 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
/*
* Try to optimize the aio and eventfd file* puts, by avoiding to
- * schedule work in case it is not __fput() time. In normal cases,
+ * schedule work in case it is not final fput() time. In normal cases,
* we would not be holding the last reference to the file*, so
* this function will be executed w/out any aio kthread wakeup.
*/
- if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(!fput_atomic(req->ki_filp))) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 9bd4b38..e4b75d6 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -205,7 +205,7 @@ static struct inode *anon_inode_mkinode(void)
* that it already _is_ on the dirty list.
*/
inode->i_state = I_DIRTY;
- inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
+ inode->i_mode = S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE;
diff --git a/fs/attr.c b/fs/attr.c
index 0815e93..b4fa3b0 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -67,14 +67,14 @@ EXPORT_SYMBOL(inode_change_ok);
* @offset: the new size to assign to the inode
* @Returns: 0 on success, -ve errno on failure
*
+ * inode_newsize_ok must be called with i_mutex held.
+ *
* inode_newsize_ok will check filesystem limits and ulimits to check that the
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
* when necessary. Caller must not proceed with inode size change if failure is
* returned. @inode must be a file (not directory), with appropriate
* permissions to allow truncate (inode_newsize_ok does NOT check these
* conditions).
- *
- * inode_newsize_ok must be called with i_mutex held.
*/
int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
@@ -104,17 +104,25 @@ out_big:
}
EXPORT_SYMBOL(inode_newsize_ok);
-int inode_setattr(struct inode * inode, struct iattr * attr)
+/**
+ * generic_setattr - copy simple metadata updates into the generic inode
+ * @inode: the inode to be updated
+ * @attr: the new attributes
+ *
+ * generic_setattr must be called with i_mutex held.
+ *
+ * generic_setattr updates the inode's metadata with that specified
+ * in attr. Noticably missing is inode size update, which is more complex
+ * as it requires pagecache updates. See simple_setsize.
+ *
+ * The inode is not marked as dirty after this operation. The rationale is
+ * that for "simple" filesystems, the struct inode is the inode storage.
+ * The caller is free to mark the inode dirty afterwards if needed.
+ */
+void generic_setattr(struct inode *inode, const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
- if (ia_valid & ATTR_SIZE &&
- attr->ia_size != i_size_read(inode)) {
- int error = vmtruncate(inode, attr->ia_size);
- if (error)
- return error;
- }
-
if (ia_valid & ATTR_UID)
inode->i_uid = attr->ia_uid;
if (ia_valid & ATTR_GID)
@@ -135,6 +143,28 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
mode &= ~S_ISGID;
inode->i_mode = mode;
}
+}
+EXPORT_SYMBOL(generic_setattr);
+
+/*
+ * note this function is deprecated, the new truncate sequence should be
+ * used instead -- see eg. simple_setsize, generic_setattr.
+ */
+int inode_setattr(struct inode *inode, const struct iattr *attr)
+{
+ unsigned int ia_valid = attr->ia_valid;
+
+ if (ia_valid & ATTR_SIZE &&
+ attr->ia_size != i_size_read(inode)) {
+ int error;
+
+ error = vmtruncate(inode, attr->ia_size);
+ if (error)
+ return error;
+ }
+
+ generic_setattr(inode, attr);
+
mark_inode_dirty(inode);
return 0;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index a05287a..52e59bf 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -93,8 +93,7 @@ static int bad_file_release(struct inode *inode, struct file *filp)
return -EIO;
}
-static int bad_file_fsync(struct file *file, struct dentry *dentry,
- int datasync)
+static int bad_file_fsync(struct file *file, int datasync)
{
return -EIO;
}
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 8f73841..d967e05 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -78,7 +78,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
const struct file_operations bfs_dir_operations = {
.read = generic_read_dir,
.readdir = bfs_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.llseek = generic_file_llseek,
};
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c5f9a0..63039ed 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
/* clear any space allocated but not loaded */
if (phdr->p_filesz < phdr->p_memsz) {
- ret = clear_user((void *) (seg->addr + phdr->p_filesz),
- phdr->p_memsz - phdr->p_filesz);
- if (ret)
- return ret;
+ if (clear_user((void *) (seg->addr + phdr->p_filesz),
+ phdr->p_memsz - phdr->p_filesz))
+ return -EFAULT;
}
if (mm) {
@@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
struct elf32_fdpic_loadseg *seg;
struct elf32_phdr *phdr;
unsigned long load_addr, delta_vaddr;
- int loop, dvset, ret;
+ int loop, dvset;
load_addr = params->load_addr;
delta_vaddr = 0;
@@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
* PT_LOAD */
if (prot & PROT_WRITE && disp > 0) {
kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
- ret = clear_user((void __user *) maddr, disp);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr, disp))
+ return -EFAULT;
maddr += disp;
}
@@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
if (prot & PROT_WRITE && excess1 > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess1);
- ret = clear_user((void __user *) maddr + phdr->p_filesz,
- excess1);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr + phdr->p_filesz,
+ excess1))
+ return -EFAULT;
}
#else
if (excess > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess);
- ret = clear_user((void *) maddr + phdr->p_filesz, excess);
- if (ret)
- return ret;
+ if (clear_user((void *) maddr + phdr->p_filesz, excess))
+ return -EFAULT;
}
#endif
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 49566c1..b6ab27c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -56,15 +56,22 @@
#endif
/*
- * User data (stack, data section and bss) needs to be aligned
- * for the same reasons as SLAB memory is, and to the same amount.
- * Avoid duplicating architecture specific code by using the same
- * macro as with SLAB allocation:
+ * User data (data section and bss) needs to be aligned.
+ * We pick 0x20 here because it is the max value elf2flt has always
+ * used in producing FLAT files, and because it seems to be large
+ * enough to make all the gcc alignment related tests happy.
+ */
+#define FLAT_DATA_ALIGN (0x20)
+
+/*
+ * User data (stack) also needs to be aligned.
+ * Here we can be a bit looser than the data sections since this
+ * needs to only meet arch ABI requirements.
*/
#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
+#define FLAT_STACK_ALIGN (ARCH_SLAB_MINALIGN)
#else
-#define FLAT_DATA_ALIGN (sizeof(void *))
+#define FLAT_STACK_ALIGN (sizeof(void *))
#endif
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
@@ -129,7 +136,7 @@ static unsigned long create_flat_tables(
sp = (unsigned long *)p;
sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
- sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+ sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
envp = argv + (argc + 1);
@@ -589,7 +596,7 @@ static int load_flat_file(struct linux_binprm * bprm,
if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
do_munmap(current->mm, textpos, text_len);
- do_munmap(current->mm, realdatastart, data_len + extra);
+ do_munmap(current->mm, realdatastart, len);
ret = result;
goto err;
}
@@ -876,7 +883,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
- stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
+ stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
if (IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 26e5f50..99d6af8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,8 +172,9 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
- iov, offset, nr_segs, blkdev_get_blocks, NULL);
+ return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode,
+ I_BDEV(inode), iov, offset, nr_segs,
+ blkdev_get_blocks, NULL);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -309,8 +310,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
*pagep = NULL;
- return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- blkdev_get_block);
+ return block_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, blkdev_get_block);
}
static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -358,12 +359,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
return retval;
}
-/*
- * Filp is never NULL; the only case when ->fsync() is called with
- * NULL first argument is nfsd_sync_dir() and that's not a directory.
- */
-
-int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int blkdev_fsync(struct file *filp, int datasync)
{
struct inode *bd_inode = filp->f_mapping->host;
struct block_device *bdev = I_BDEV(bd_inode);
@@ -710,8 +706,13 @@ retry:
* @bdev is about to be opened exclusively. Check @bdev can be opened
* exclusively and mark that an exclusive open is in progress. Each
* successful call to this function must be matched with a call to
- * either bd_claim() or bd_abort_claiming(). If this function
- * succeeds, the matching bd_claim() is guaranteed to succeed.
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
*
* CONTEXT:
* Might sleep.
@@ -738,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
return ERR_PTR(-ENXIO);
whole = bdget_disk(disk, 0);
+ module_put(disk->fops->owner);
put_disk(disk);
if (!whole)
return ERR_PTR(-ENOMEM);
@@ -786,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
__bd_abort_claiming(whole, holder); /* releases bdev_lock */
}
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ /* note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder will
+ * be set to bd_claim before being set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+ struct block_device *whole, void *holder)
+{
+ spin_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ __bd_claim(bdev, whole, holder);
+ __bd_abort_claiming(whole, holder); /* not actually an abort */
+}
+
/**
* bd_claim - claim a block device
* @bdev: block device to claim
* @holder: holder trying to claim @bdev
*
- * Try to claim @bdev which must have been opened successfully. This
- * function may be called with or without preceding
- * blk_start_claiming(). In the former case, this function is always
- * successful and terminates the claiming block.
+ * Try to claim @bdev which must have been opened successfully.
*
* CONTEXT:
* Might sleep.
@@ -810,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder)
might_sleep();
spin_lock(&bdev_lock);
-
res = bd_prepare_to_claim(bdev, whole, holder);
- if (res == 0) {
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- }
-
- if (whole->bd_claiming)
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
- else
- spin_unlock(&bdev_lock);
+ if (res == 0)
+ __bd_claim(bdev, whole, holder);
+ spin_unlock(&bdev_lock);
return res;
}
@@ -1480,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (whole) {
if (res == 0)
- BUG_ON(bd_claim(bdev, filp) != 0);
+ bd_finish_claiming(bdev, whole, filp);
else
bd_abort_claiming(whole, filp);
}
@@ -1716,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
goto out_blkdev_put;
- BUG_ON(bd_claim(bdev, holder) != 0);
+ bd_finish_claiming(bdev, whole, holder);
return bdev;
out_blkdev_put:
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e9bf864..29c2009 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2434,7 +2434,7 @@ void btrfs_update_iflags(struct inode *inode);
void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
/* file.c */
-int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
+int btrfs_sync_file(struct file *file, int datasync);
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 79437c5..787b50a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1101,8 +1101,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* important optimization for directories because holding the mutex prevents
* new operations on the dir while we write to disk.
*/
-int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int btrfs_sync_file(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index e8aa708..d54812b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
}
/*
- * block_write_begin takes care of the basic task of block allocation and
- * bringing partial write blocks uptodate first.
- *
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
+ * Filesystems implementing the new truncate sequence should use the
+ * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * The filesystem needs to handle block truncation upon failure.
*/
-int block_write_begin(struct file *file, struct address_space *mapping,
+int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
@@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
*pagep = NULL;
-
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- */
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
}
}
out:
return status;
}
+EXPORT_SYMBOL(block_write_begin_newtrunc);
+
+/*
+ * block_write_begin takes care of the basic task of block allocation and
+ * bringing partial write blocks uptodate first.
+ *
+ * If *pagep is not NULL, then block_write_begin uses the locked page
+ * at *pagep rather than allocating its own. In this case, the page will
+ * not be unlocked or deallocated on failure.
+ */
+int block_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata,
+ get_block_t *get_block)
+{
+ int ret;
+
+ ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, get_block);
+
+ /*
+ * prepare_write() may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ *
+ * Filesystems which pass down their own page also cannot
+ * call into vmtruncate here because it would lead to lock
+ * inversion problems (*pagep is locked). This is a further
+ * example of where the old truncate sequence is inadequate.
+ */
+ if (unlikely(ret) && *pagep == NULL) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(block_write_begin);
int block_write_end(struct file *file, struct address_space *mapping,
@@ -2324,7 +2351,7 @@ out:
* For moronic filesystems that do not allow holes in file.
* We may have to extend the file.
*/
-int cont_write_begin(struct file *file, struct address_space *mapping,
+int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block, loff_t *bytes)
@@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
}
*pagep = NULL;
- err = block_write_begin(file, mapping, pos, len,
+ err = block_write_begin_newtrunc(file, mapping, pos, len,
flags, pagep, fsdata, get_block);
out:
return err;
}
+EXPORT_SYMBOL(cont_write_begin_newtrunc);
+
+int cont_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata,
+ get_block_t *get_block, loff_t *bytes)
+{
+ int ret;
+
+ ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, get_block, bytes);
+ if (unlikely(ret)) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(cont_write_begin);
int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write);
*
* We are not allowed to take the i_mutex here so we have to play games to
* protect against truncate races as the page could now be beyond EOF. Because
- * vmtruncate() writes the inode size before removing pages, once we have the
+ * truncate writes the inode size before removing pages, once we have the
* page lock we can determine safely if the page is beyond EOF. If it is not
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
@@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
}
/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
+ * Filesystems implementing the new truncate sequence should use the
+ * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * The filesystem needs to handle block truncation upon failure.
*/
-int nobh_write_begin(struct file *file, struct address_space *mapping,
+int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
@@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
*pagep = NULL;
- return block_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, get_block);
+ return block_write_begin_newtrunc(file, mapping, pos, len,
+ flags, pagep, fsdata, get_block);
}
if (PageMappedToDisk(page))
@@ -2605,8 +2652,34 @@ out_release:
page_cache_release(page);
*pagep = NULL;
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
+ return ret;
+}
+EXPORT_SYMBOL(nobh_write_begin_newtrunc);
+
+/*
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
+ */
+int nobh_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata,
+ get_block_t *get_block)
+{
+ int ret;
+
+ ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, get_block);
+
+ /*
+ * prepare_write() may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ */
+ if (unlikely(ret)) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
return ret;
}
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index 9f46de2..89490bea 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -1,7 +1,6 @@
#include "ceph_debug.h"
#include <linux/module.h>
-#include <linux/slab.h>
#include <linux/err.h>
#include <linux/slab.h>
@@ -217,8 +216,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
if (ac->protocol != protocol) {
ret = ceph_auth_init_protocol(ac, protocol);
if (ret) {
- pr_err("error %d on auth method %s init\n",
- ret, ac->ops->name);
+ pr_err("error %d on auth protocol %d init\n",
+ ret, protocol);
goto out;
}
}
@@ -247,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
if (!ac->protocol)
return ceph_auth_build_hello(ac, msg_buf, msg_len);
BUG_ON(!ac->ops);
- if (!ac->ops->is_authenticated(ac))
+ if (ac->ops->should_authenticate(ac))
return ceph_build_auth_request(ac, msg_buf, msg_len);
return 0;
}
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h
index 4429a70..d38a2fb 100644
--- a/fs/ceph/auth.h
+++ b/fs/ceph/auth.h
@@ -24,6 +24,12 @@ struct ceph_auth_client_ops {
int (*is_authenticated)(struct ceph_auth_client *ac);
/*
+ * true if we should (re)authenticate, e.g., when our tickets
+ * are getting old and crusty.
+ */
+ int (*should_authenticate)(struct ceph_auth_client *ac);
+
+ /*
* build requests and process replies during monitor
* handshake. if handle_reply returns -EAGAIN, we build
* another request.
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index 24407c1..ad1dc21 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac)
return !xi->starting;
}
+static int should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_auth_none_info *xi = ac->private;
+
+ return xi->starting;
+}
+
/*
* the generic auth code decode the global_id, and we carry no actual
* authenticate state, so nothing happens here.
@@ -98,6 +105,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.reset = reset,
.destroy = destroy,
.is_authenticated = is_authenticated,
+ .should_authenticate = should_authenticate,
.handle_reply = handle_reply,
.create_authorizer = ceph_auth_none_create_authorizer,
.destroy_authorizer = ceph_auth_none_destroy_authorizer,
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 7b20623..83d4d27 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
return (ac->want_keys & xi->have_keys) == ac->want_keys;
}
+static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi = ac->private;
+ int need;
+
+ ceph_x_validate_tickets(ac, &need);
+ dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
+ ac->want_keys, need, xi->have_keys);
+ return need != 0;
+}
+
static int ceph_x_encrypt_buflen(int ilen)
{
return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
@@ -620,6 +631,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
static const struct ceph_auth_client_ops ceph_x_ops = {
.name = "x",
.is_authenticated = ceph_x_is_authenticated,
+ .should_authenticate = ceph_x_should_authenticate,
.build_request = ceph_x_build_request,
.handle_reply = ceph_x_handle_reply,
.create_authorizer = ceph_x_create_authorizer,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0dd0b81..619b616 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -981,6 +981,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
return 0;
}
+static void __queue_cap_release(struct ceph_mds_session *session,
+ u64 ino, u64 cap_id, u32 migrate_seq,
+ u32 issue_seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ struct ceph_mds_cap_item *item;
+
+ spin_lock(&session->s_cap_lock);
+ BUG_ON(!session->s_num_cap_releases);
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+
+ dout(" adding %llx release to mds%d msg %p (%d left)\n",
+ ino, session->s_mds, msg, session->s_num_cap_releases);
+
+ BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+ head = msg->front.iov_base;
+ head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
+ item = msg->front.iov_base + msg->front.iov_len;
+ item->ino = cpu_to_le64(ino);
+ item->cap_id = cpu_to_le64(cap_id);
+ item->migrate_seq = cpu_to_le32(migrate_seq);
+ item->seq = cpu_to_le32(issue_seq);
+
+ session->s_num_cap_releases--;
+
+ msg->front.iov_len += sizeof(*item);
+ if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+ dout(" release msg %p full\n", msg);
+ list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+ } else {
+ dout(" release msg %p at %d/%d (%d)\n", msg,
+ (int)le32_to_cpu(head->num),
+ (int)CEPH_CAPS_PER_RELEASE,
+ (int)msg->front.iov_len);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
/*
* Queue cap releases when an inode is dropped from our cache. Since
* inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1034,9 @@ void ceph_queue_caps_release(struct inode *inode)
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
struct ceph_mds_session *session = cap->session;
- struct ceph_msg *msg;
- struct ceph_mds_cap_release *head;
- struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
- BUG_ON(!session->s_num_cap_releases);
- msg = list_first_entry(&session->s_cap_releases,
- struct ceph_msg, list_head);
-
- dout(" adding %p release to mds%d msg %p (%d left)\n",
- inode, session->s_mds, msg, session->s_num_cap_releases);
-
- BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
- head = msg->front.iov_base;
- head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
- item = msg->front.iov_base + msg->front.iov_len;
- item->ino = cpu_to_le64(ceph_ino(inode));
- item->cap_id = cpu_to_le64(cap->cap_id);
- item->migrate_seq = cpu_to_le32(cap->mseq);
- item->seq = cpu_to_le32(cap->issue_seq);
-
- session->s_num_cap_releases--;
-
- msg->front.iov_len += sizeof(*item);
- if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
- dout(" release msg %p full\n", msg);
- list_move_tail(&msg->list_head,
- &session->s_cap_releases_done);
- } else {
- dout(" release msg %p at %d/%d (%d)\n", msg,
- (int)le32_to_cpu(head->num),
- (int)CEPH_CAPS_PER_RELEASE,
- (int)msg->front.iov_len);
- }
- spin_unlock(&session->s_cap_lock);
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
p = rb_next(p);
__ceph_remove_cap(cap);
}
@@ -1776,9 +1784,9 @@ out:
spin_unlock(&ci->i_unsafe_lock);
}
-int ceph_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ceph_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
unsigned flush_tid;
int ret;
@@ -2655,7 +2663,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_mds_caps *h;
int mds = session->s_mds;
int op;
- u32 seq;
+ u32 seq, mseq;
struct ceph_vino vino;
u64 cap_id;
u64 size, max_size;
@@ -2675,6 +2683,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap = CEPH_NOSNAP;
cap_id = le64_to_cpu(h->cap_id);
seq = le32_to_cpu(h->seq);
+ mseq = le32_to_cpu(h->migrate_seq);
size = le64_to_cpu(h->size);
max_size = le64_to_cpu(h->max_size);
@@ -2689,6 +2698,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
+
+ if (op == CEPH_CAP_OP_IMPORT)
+ __queue_cap_release(session, vino.ino, cap_id,
+ mseq, seq);
+
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session, -1);
+ ceph_send_cap_releases(mdsc, session);
goto done;
}
@@ -2714,7 +2735,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
spin_lock(&inode->i_lock);
cap = __get_cap_for_mds(ceph_inode(inode), mds);
if (!cap) {
- dout("no cap on %p ino %llx.%llx from mds%d, releasing\n",
+ dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
spin_unlock(&inode->i_lock);
goto done;
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
index 3b9eeed..2fa992e 100644
--- a/fs/ceph/ceph_fs.h
+++ b/fs/ceph/ceph_fs.h
@@ -265,16 +265,17 @@ extern const char *ceph_mds_state_name(int s);
* - they also define the lock ordering by the MDS
* - a few of these are internal to the mds
*/
-#define CEPH_LOCK_DN 1
-#define CEPH_LOCK_ISNAP 2
-#define CEPH_LOCK_IVERSION 4 /* mds internal */
-#define CEPH_LOCK_IFILE 8 /* mds internal */
-#define CEPH_LOCK_IAUTH 32
-#define CEPH_LOCK_ILINK 64
-#define CEPH_LOCK_IDFT 128 /* dir frag tree */
-#define CEPH_LOCK_INEST 256 /* mds internal */
-#define CEPH_LOCK_IXATTR 512
-#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */
+#define CEPH_LOCK_DVERSION 1
+#define CEPH_LOCK_DN 2
+#define CEPH_LOCK_ISNAP 16
+#define CEPH_LOCK_IVERSION 32 /* mds internal */
+#define CEPH_LOCK_IFILE 64
+#define CEPH_LOCK_IAUTH 128
+#define CEPH_LOCK_ILINK 256
+#define CEPH_LOCK_IDFT 512 /* dir frag tree */
+#define CEPH_LOCK_INEST 1024 /* mds internal */
+#define CEPH_LOCK_IXATTR 2048
+#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
/* client_session ops */
enum {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 4fd3090..f857193 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -587,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
/* we only need inode linkage */
@@ -1107,10 +1107,9 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
* an fsync() on a dir will wait for any uncommitted directory
* operations to commit.
*/
-static int ceph_dir_fsync(struct file *file, struct dentry *dentry,
- int datasync)
+static int ceph_dir_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_path.dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct list_head *head = &ci->i_unsafe_dirops;
struct ceph_mds_request *req;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 1744764..4480cb1 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -133,7 +133,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_ino1 = vino;
req->r_ino2.ino = cfh->parent_ino;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6512b67..6251a15 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -230,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
/* do the open */
req = prepare_open_request(dir->i_sb, flags, mode);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
if (flags & O_CREAT) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a81b8b6..ab47f46 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -69,7 +69,7 @@ struct inode *ceph_get_snapdir(struct inode *parent)
BUG_ON(!S_ISDIR(parent->i_mode));
if (IS_ERR(inode))
- return ERR_PTR(PTR_ERR(inode));
+ return inode;
inode->i_mode = parent->i_mode;
inode->i_uid = parent->i_uid;
inode->i_gid = parent->i_gid;
@@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
spin_lock(&dcache_lock);
spin_lock(&dn->d_lock);
- list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
+ list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 885aa57..1766947 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
*
* Called under s_mutex.
*/
-static int add_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session,
- int extra)
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra)
{
struct ceph_msg *msg;
struct ceph_mds_cap_release *head;
@@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
/*
* called under s_mutex
*/
-static void send_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
{
struct ceph_msg *msg;
@@ -1768,12 +1768,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
mutex_unlock(&mdsc->mutex);
dout("do_request waiting\n");
if (req->r_timeout) {
- err = (long)wait_for_completion_interruptible_timeout(
+ err = (long)wait_for_completion_killable_timeout(
&req->r_completion, req->r_timeout);
if (err == 0)
err = -EIO;
} else {
- err = wait_for_completion_interruptible(&req->r_completion);
+ err = wait_for_completion_killable(&req->r_completion);
}
dout("do_request waited, got %d\n", err);
mutex_lock(&mdsc->mutex);
@@ -1980,7 +1980,7 @@ out_err:
}
mutex_unlock(&mdsc->mutex);
- add_cap_releases(mdsc, req->r_session, -1);
+ ceph_add_cap_releases(mdsc, req->r_session, -1);
mutex_unlock(&session->s_mutex);
/* kick calling process */
@@ -2014,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
req = __lookup_request(mdsc, tid);
if (!req) {
- dout("forward %llu to mds%d - req dne\n", tid, next_mds);
+ dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
goto out; /* dup reply? */
}
- if (fwd_seq <= req->r_num_fwd) {
- dout("forward %llu to mds%d - old seq %d <= %d\n",
+ if (req->r_aborted) {
+ dout("forward tid %llu aborted, unregistering\n", tid);
+ __unregister_request(mdsc, req);
+ } else if (fwd_seq <= req->r_num_fwd) {
+ dout("forward tid %llu to mds%d - old seq %d <= %d\n",
tid, next_mds, req->r_num_fwd, fwd_seq);
} else {
/* resend. forward race not possible; mds would drop */
- dout("forward %llu to mds%d (we resend)\n", tid, next_mds);
+ dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
+ BUG_ON(req->r_err);
+ BUG_ON(req->r_got_result);
req->r_num_fwd = fwd_seq;
req->r_resend_mds = next_mds;
put_request_session(req);
@@ -2428,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_dentry_info *di;
int mds = session->s_mds;
struct ceph_mds_lease *h = msg->front.iov_base;
+ u32 seq;
struct ceph_vino vino;
int mask;
struct qstr dname;
@@ -2441,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
mask = le16_to_cpu(h->mask);
+ seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
if (dname.len != get_unaligned_le32(h+1))
@@ -2451,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */
inode = ceph_find_inode(sb, vino);
- dout("handle_lease '%s', mask %d, ino %llx %p\n",
- ceph_lease_op_name(h->action), mask, vino.ino, inode);
+ dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), mask, vino.ino, inode,
+ dname.len, dname.name);
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
@@ -2477,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
switch (h->action) {
case CEPH_MDS_LEASE_REVOKE:
if (di && di->lease_session == session) {
- h->seq = cpu_to_le32(di->lease_seq);
+ if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+ h->seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
}
release = 1;
@@ -2491,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
unsigned long duration =
le32_to_cpu(h->duration_ms) * HZ / 1000;
- di->lease_seq = le32_to_cpu(h->seq);
+ di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
di->lease_renew_after = di->lease_renew_from +
(duration >> 1);
@@ -2541,7 +2550,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
return;
lease = msg->front.iov_base;
lease->action = action;
- lease->mask = cpu_to_le16(CEPH_LOCK_DN);
+ lease->mask = cpu_to_le16(1);
lease->ino = cpu_to_le64(ceph_vino(inode).ino);
lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
lease->seq = cpu_to_le32(seq);
@@ -2571,7 +2580,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
BUG_ON(inode == NULL);
BUG_ON(dentry == NULL);
- BUG_ON(mask != CEPH_LOCK_DN);
+ BUG_ON(mask == 0);
/* is dentry lease valid? */
spin_lock(&dentry->d_lock);
@@ -2681,10 +2690,10 @@ static void delayed_work(struct work_struct *work)
send_renew_caps(mdsc, s);
else
ceph_con_keepalive(&s->s_con);
- add_cap_releases(mdsc, s, -1);
+ ceph_add_cap_releases(mdsc, s, -1);
if (s->s_state == CEPH_MDS_SESSION_OPEN ||
s->s_state == CEPH_MDS_SESSION_HUNG)
- send_cap_releases(mdsc, s);
+ ceph_send_cap_releases(mdsc, s);
mutex_unlock(&s->s_mutex);
ceph_put_mds_session(s);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4..b292fa4 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -322,6 +322,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
kref_put(&req->r_kref, ceph_mdsc_release_request);
}
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 60b7483..64b8b1f 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -120,6 +120,12 @@ void ceph_msgr_exit(void)
destroy_workqueue(ceph_msgr_wq);
}
+void ceph_msgr_flush()
+{
+ flush_workqueue(ceph_msgr_wq);
+}
+
+
/*
* socket callback functions
*/
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 00a9430..76fbc95 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -213,6 +213,7 @@ extern int ceph_parse_ips(const char *c, const char *end,
extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
+extern void ceph_msgr_flush(void);
extern struct ceph_messenger *ceph_messenger_create(
struct ceph_entity_addr *myaddr);
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index f6510a4..07a5399 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
ceph_msg_put(req->reply);
if (req->request)
ceph_msg_put(req->request);
+
+ kfree(req);
}
static void put_generic_request(struct ceph_mon_generic_request *req)
@@ -704,8 +706,11 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
int ret;
+ int was_auth = 0;
mutex_lock(&monc->mutex);
+ if (monc->auth->ops)
+ was_auth = monc->auth->ops->is_authenticated(monc->auth);
monc->pending_auth = 0;
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
@@ -716,7 +721,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
wake_up(&monc->client->auth_wq);
} else if (ret > 0) {
__send_prepared_auth_request(monc, ret);
- } else if (monc->auth->ops->is_authenticated(monc->auth)) {
+ } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index afa7bb3..d25b4ad 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -361,8 +361,13 @@ static void put_osd(struct ceph_osd *osd)
{
dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
atomic_read(&osd->o_ref) - 1);
- if (atomic_dec_and_test(&osd->o_ref))
+ if (atomic_dec_and_test(&osd->o_ref)) {
+ struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
+
+ if (osd->o_authorizer)
+ ac->ops->destroy_authorizer(ac, osd->o_authorizer);
kfree(osd);
+ }
}
/*
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index cfdd8f4..ddc656f 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -706,7 +706,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
len, *p, end);
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush))
- return ERR_PTR(PTR_ERR(newcrush));
+ return ERR_CAST(newcrush);
}
/* new flags? */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7c663d9..fa87f51 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
- buf->f_namelen = PATH_MAX;
+ buf->f_namelen = NAME_MAX;
buf->f_frsize = PAGE_CACHE_SIZE;
/* leave fsid little-endian, regardless of host endianness */
@@ -669,9 +669,17 @@ static void ceph_destroy_client(struct ceph_client *client)
/* unmount */
ceph_mdsc_stop(&client->mdsc);
- ceph_monc_stop(&client->monc);
ceph_osdc_stop(&client->osdc);
+ /*
+ * make sure mds and osd connections close out before destroying
+ * the auth module, which is needed to free those connections'
+ * ceph_authorizers.
+ */
+ ceph_msgr_flush();
+
+ ceph_monc_stop(&client->monc);
+
ceph_adjust_min_caps(-client->min_caps);
ceph_debugfs_client_cleanup(client);
@@ -738,7 +746,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
dout("open_root_inode opening '%s'\n", path);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_path1 = kstrdup(path, GFP_NOFS);
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
@@ -918,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
/*
* construct our own bdi so we can control readahead, etc.
*/
-static atomic_long_t bdi_seq = ATOMIC_INIT(0);
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3725c9e..10a4a40 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,7 +10,6 @@
#include <linux/fs.h>
#include <linux/mempool.h>
#include <linux/pagemap.h>
-#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/writeback.h>
#include <linux/slab.h>
@@ -811,7 +810,7 @@ extern void ceph_put_cap(struct ceph_cap *cap);
extern void ceph_queue_caps_release(struct inode *inode);
extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
-extern int ceph_fsync(struct file *file, struct dentry *dentry, int datasync);
+extern int ceph_fsync(struct file *file, int datasync);
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern int ceph_get_cap_mds(struct inode *inode);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 0242ff9..a7eb65c 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -84,7 +84,7 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
size_t write_size, loff_t *poffset);
extern int cifs_lock(struct file *, int, struct file_lock *);
-extern int cifs_fsync(struct file *, struct dentry *, int);
+extern int cifs_fsync(struct file *, int);
extern int cifs_flush(struct file *, fl_owner_t id);
extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
extern const struct file_operations cifs_dir_ops;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a83541e..75541af 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1676,7 +1676,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
return rc;
}
-int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int cifs_fsync(struct file *file, int datasync)
{
int xid;
int rc = 0;
@@ -1688,7 +1688,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
xid = GetXid();
cFYI(1, "Sync file - name: %s datasync: 0x%x",
- dentry->d_name.name, datasync);
+ file->f_path.dentry->d_name.name, datasync);
rc = filemap_write_and_wait(inode->i_mapping);
if (rc == 0) {
@@ -1952,6 +1952,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
bytes_read -= PAGE_CACHE_SIZE;
continue;
}
+ page_cache_release(page);
target = kmap_atomic(page, KM_USER0);
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index d99860a..6b443ff 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -11,8 +11,7 @@ extern int coda_fake_statfs;
void coda_destroy_inodecache(void);
int coda_init_inodecache(void);
-int coda_fsync(struct file *coda_file, struct dentry *coda_dentry,
- int datasync);
+int coda_fsync(struct file *coda_file, int datasync);
void coda_sysctl_init(void);
void coda_sysctl_clean(void);
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 7196077..ad3cd2a 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -202,10 +202,10 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
return 0;
}
-int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
+int coda_fsync(struct file *coda_file, int datasync)
{
struct file *host_file;
- struct inode *coda_inode = coda_dentry->d_inode;
+ struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
struct coda_file_info *cfi;
int err = 0;
diff --git a/fs/compat.c b/fs/compat.c
index f0b391c..6490d21 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -626,7 +626,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
tot_len += len;
if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
goto out;
- if (!access_ok(vrfy_dir(type), buf, len)) {
+ if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index c8af2d9..cf78d44 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -73,15 +73,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
return -EINVAL;
sd_iattr = sd->s_iattr;
-
- error = inode_change_ok(inode, iattr);
- if (error)
- return error;
-
- error = inode_setattr(inode, iattr);
- if (error)
- return error;
-
if (!sd_iattr) {
/* setting attributes for the first time, allocate now */
sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
@@ -94,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
sd->s_iattr = sd_iattr;
}
-
/* attributes were changed atleast once in past */
+ error = simple_setattr(dentry, iattr);
+ if (error)
+ return error;
+
if (ia_valid & ATTR_UID)
sd_iattr->ia_uid = iattr->ia_uid;
if (ia_valid & ATTR_GID)
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 4d74fc7..0210898 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -277,8 +277,10 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"
DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n");
+
/*
- * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value
+ * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value
*
* These functions are exactly the same as the above functions (but use a hex
* output for the decimal challenged). For details look at the above unsigned
@@ -357,6 +359,23 @@ struct dentry *debugfs_create_x32(const char *name, mode_t mode,
}
EXPORT_SYMBOL_GPL(debugfs_create_x32);
+/**
+ * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is %NULL, then the
+ * file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ * from.
+ */
+struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+ struct dentry *parent, u64 *value)
+{
+ return debugfs_create_file(name, mode, parent, value, &fops_x64);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_x64);
+
static int debugfs_size_t_set(void *data, u64 val)
{
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da111aa..7600aac 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1134,27 +1134,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
return ret;
}
-/*
- * This is a library function for use by filesystem drivers.
- *
- * The locking rules are governed by the flags parameter:
- * - if the flags value contains DIO_LOCKING we use a fancy locking
- * scheme for dumb filesystems.
- * For writes this function is called under i_mutex and returns with
- * i_mutex held, for reads, i_mutex is not held on entry, but it is
- * taken and dropped again before returning.
- * For reads and writes i_alloc_sem is taken in shared mode and released
- * on I/O completion (which may happen asynchronously after returning to
- * the caller).
- *
- * - if the flags value does NOT contain DIO_LOCKING we don't use any
- * internal locking but rather rely on the filesystem to synchronize
- * direct I/O reads/writes versus each other and truncate.
- * For reads and writes both i_mutex and i_alloc_sem are not held on
- * entry and are never taken.
- */
ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
dio_submit_t submit_io, int flags)
@@ -1247,9 +1228,46 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
nr_segs, blkbits, get_block, end_io,
submit_io, dio);
+out:
+ return retval;
+}
+EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
+
+/*
+ * This is a library function for use by filesystem drivers.
+ *
+ * The locking rules are governed by the flags parameter:
+ * - if the flags value contains DIO_LOCKING we use a fancy locking
+ * scheme for dumb filesystems.
+ * For writes this function is called under i_mutex and returns with
+ * i_mutex held, for reads, i_mutex is not held on entry, but it is
+ * taken and dropped again before returning.
+ * For reads and writes i_alloc_sem is taken in shared mode and released
+ * on I/O completion (which may happen asynchronously after returning to
+ * the caller).
+ *
+ * - if the flags value does NOT contain DIO_LOCKING we don't use any
+ * internal locking but rather rely on the filesystem to synchronize
+ * direct I/O reads/writes versus each other and truncate.
+ * For reads and writes both i_mutex and i_alloc_sem are not held on
+ * entry and are never taken.
+ */
+ssize_t
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+ dio_submit_t submit_io, int flags)
+{
+ ssize_t retval;
+
+ retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
+ offset, nr_segs, get_block, end_io, submit_io, flags);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again for DIO_LOCKING.
+ * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
+ * their own manner. This is a further example of where the old
+ * truncate sequence is inadequate.
*
* NOTE: filesystems with their own locking have to handle this
* on their own.
@@ -1257,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
if (flags & DIO_LOCKING) {
if (unlikely((rw & WRITE) && retval < 0)) {
loff_t isize = i_size_read(inode);
+ loff_t end = offset + iov_length(iov, nr_segs);
+
if (end > isize)
vmtruncate(inode, isize);
}
}
-out:
return retval;
}
EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 3bdddbc..e8fcf4e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -274,7 +274,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
}
static int
-ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+ecryptfs_fsync(struct file *file, int datasync)
{
return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 65dee2f..31ef525 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -805,7 +805,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
- (ia->ia_size & ~PAGE_CACHE_MASK));
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
- rc = vmtruncate(inode, ia->ia_size);
+ rc = simple_setsize(inode, ia->ia_size);
if (rc)
goto out;
lower_ia->ia_size = ia->ia_size;
@@ -830,7 +830,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
goto out;
}
}
- vmtruncate(inode, ia->ia_size);
+ simple_setsize(inode, ia->ia_size);
rc = ecryptfs_write_inode_size_to_metadata(inode);
if (rc) {
printk(KERN_ERR "Problem with "
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 839b9dc..fef6899 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -40,12 +40,11 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
return 0;
}
-static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int exofs_file_fsync(struct file *filp, int datasync)
{
int ret;
struct address_space *mapping = filp->f_mapping;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = mapping->host;
struct super_block *sb;
ret = filemap_write_and_wait(mapping);
@@ -66,7 +65,7 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
static int exofs_flush(struct file *file, fl_owner_t id)
{
- exofs_file_fsync(file, file->f_path.dentry, 1);
+ exofs_file_fsync(file, 1);
/* TODO: Flush the OSD target */
return 0;
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 0b038e4..52b34f1 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -122,7 +122,6 @@ extern int ext2_write_inode (struct inode *, struct writeback_control *);
extern void ext2_delete_inode (struct inode *);
extern int ext2_sync_inode (struct inode *);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void ext2_truncate (struct inode *);
extern int ext2_setattr (struct dentry *, struct iattr *);
extern void ext2_set_inode_flags(struct inode *inode);
extern void ext2_get_inode_flags(struct ext2_inode_info *);
@@ -155,7 +154,7 @@ extern void ext2_write_super (struct super_block *);
extern const struct file_operations ext2_dir_operations;
/* file.c */
-extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync);
+extern int ext2_fsync(struct file *file, int datasync);
extern const struct inode_operations ext2_file_inode_operations;
extern const struct file_operations ext2_file_operations;
extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5d198d0..49eec94 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
return 0;
}
-int ext2_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ext2_fsync(struct file *file, int datasync)
{
int ret;
- struct super_block *sb = dentry->d_inode->i_sb;
+ struct super_block *sb = file->f_mapping->host->i_sb;
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
- ret = simple_fsync(file, dentry, datasync);
+ ret = generic_file_fsync(file, datasync);
if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,
@@ -95,7 +95,6 @@ const struct file_operations ext2_xip_file_operations = {
#endif
const struct inode_operations ext2_file_inode_operations = {
- .truncate = ext2_truncate,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 527c46d..3675088 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -54,6 +54,18 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
inode->i_blocks - ea_blocks == 0);
}
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
+
+static void ext2_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, to, inode->i_size);
+ ext2_truncate_blocks(inode, inode->i_size);
+ }
+}
+
/*
* Called at the last iput() if i_nlink is zero.
*/
@@ -71,7 +83,7 @@ void ext2_delete_inode (struct inode * inode)
inode->i_size = 0;
if (inode->i_blocks)
- ext2_truncate (inode);
+ ext2_truncate_blocks(inode, 0);
ext2_free_inode (inode);
return;
@@ -757,8 +769,8 @@ int __ext2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- ext2_get_block);
+ return block_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, ext2_get_block);
}
static int
@@ -766,8 +778,25 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int ret;
+
*pagep = NULL;
- return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
+ ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+ if (ret < 0)
+ ext2_write_failed(mapping, pos + len);
+ return ret;
+}
+
+static int ext2_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ int ret;
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ if (ret < len)
+ ext2_write_failed(mapping, pos + len);
+ return ret;
}
static int
@@ -775,13 +804,18 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int ret;
+
/*
* Dir-in-pagecache still uses ext2_write_begin. Would have to rework
* directory handling code to pass around offsets rather than struct
* pages in order to make this work easily.
*/
- return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- ext2_get_block);
+ ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep,
+ fsdata, ext2_get_block);
+ if (ret < 0)
+ ext2_write_failed(mapping, pos + len);
+ return ret;
}
static int ext2_nobh_writepage(struct page *page,
@@ -800,10 +834,15 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
-
- return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, ext2_get_block, NULL);
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
+
+ ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
+ iov, offset, nr_segs, ext2_get_block, NULL);
+ if (ret < 0 && (rw & WRITE))
+ ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
+ return ret;
}
static int
@@ -818,7 +857,7 @@ const struct address_space_operations ext2_aops = {
.writepage = ext2_writepage,
.sync_page = block_sync_page,
.write_begin = ext2_write_begin,
- .write_end = generic_write_end,
+ .write_end = ext2_write_end,
.bmap = ext2_bmap,
.direct_IO = ext2_direct_IO,
.writepages = ext2_writepages,
@@ -1027,7 +1066,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de
ext2_free_data(inode, p, q);
}
-void ext2_truncate(struct inode *inode)
+static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
__le32 *i_data = EXT2_I(inode)->i_data;
struct ext2_inode_info *ei = EXT2_I(inode);
@@ -1039,27 +1078,8 @@ void ext2_truncate(struct inode *inode)
int n;
long iblock;
unsigned blocksize;
-
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (ext2_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
-
blocksize = inode->i_sb->s_blocksize;
- iblock = (inode->i_size + blocksize-1)
- >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
-
- if (mapping_is_xip(inode->i_mapping))
- xip_truncate_page(inode->i_mapping, inode->i_size);
- else if (test_opt(inode->i_sb, NOBH))
- nobh_truncate_page(inode->i_mapping,
- inode->i_size, ext2_get_block);
- else
- block_truncate_page(inode->i_mapping,
- inode->i_size, ext2_get_block);
+ iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
n = ext2_block_to_path(inode, iblock, offsets, NULL);
if (n == 0)
@@ -1127,6 +1147,62 @@ do_indirects:
ext2_discard_reservation(inode);
mutex_unlock(&ei->truncate_mutex);
+}
+
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
+{
+ /*
+ * XXX: it seems like a bug here that we don't allow
+ * IS_APPEND inode to have blocks-past-i_size trimmed off.
+ * review and fix this.
+ *
+ * Also would be nice to be able to handle IO errors and such,
+ * but that's probably too much to ask.
+ */
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+ if (ext2_inode_is_fast_symlink(inode))
+ return;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return;
+ __ext2_truncate_blocks(inode, offset);
+}
+
+int ext2_setsize(struct inode *inode, loff_t newsize)
+{
+ loff_t oldsize;
+ int error;
+
+ error = inode_newsize_ok(inode, newsize);
+ if (error)
+ return error;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return -EINVAL;
+ if (ext2_inode_is_fast_symlink(inode))
+ return -EINVAL;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (mapping_is_xip(inode->i_mapping))
+ error = xip_truncate_page(inode->i_mapping, newsize);
+ else if (test_opt(inode->i_sb, NOBH))
+ error = nobh_truncate_page(inode->i_mapping,
+ newsize, ext2_get_block);
+ else
+ error = block_truncate_page(inode->i_mapping,
+ newsize, ext2_get_block);
+ if (error)
+ return error;
+
+ oldsize = inode->i_size;
+ i_size_write(inode, newsize);
+ truncate_pagecache(inode, oldsize, newsize);
+
+ __ext2_truncate_blocks(inode, newsize);
+
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
@@ -1134,6 +1210,8 @@ do_indirects:
} else {
mark_inode_dirty(inode);
}
+
+ return 0;
}
static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
@@ -1474,8 +1552,15 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
return error;
}
- error = inode_setattr(inode, iattr);
- if (!error && (iattr->ia_valid & ATTR_MODE))
+ if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
+ error = ext2_setsize(inode, iattr->ia_size);
+ if (error)
+ return error;
+ }
+ generic_setattr(inode, iattr);
+ if (iattr->ia_valid & ATTR_MODE)
error = ext2_acl_chmod(inode);
+ mark_inode_dirty(inode);
+
return error;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 71e9eb1..7ff43f4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -119,6 +119,8 @@ static void ext2_put_super (struct super_block * sb)
int i;
struct ext2_sb_info *sbi = EXT2_SB(sb);
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
if (sb->s_dirt)
ext2_write_super(sb);
@@ -1063,6 +1065,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ext2_sops;
sb->s_export_op = &ext2_export_ops;
sb->s_xattr = ext2_xattr_handlers;
+
+#ifdef CONFIG_QUOTA
+ sb->dq_op = &dquot_operations;
+ sb->s_qcop = &dquot_quotactl_ops;
+#endif
+
root = ext2_iget(sb, EXT2_ROOT_INO);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
@@ -1241,6 +1249,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
spin_unlock(&sbi->s_lock);
return 0;
}
+
/*
* OK, we are remounting a valid rw partition rdonly, so set
* the rdonly flag and then mark the partition as valid again.
@@ -1248,6 +1257,13 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
es->s_state = cpu_to_le16(sbi->s_mount_state);
es->s_mtime = cpu_to_le32(get_seconds());
spin_unlock(&sbi->s_lock);
+
+ err = dquot_suspend(sb, -1);
+ if (err < 0) {
+ spin_lock(&sbi->s_lock);
+ goto restore_opts;
+ }
+
ext2_sync_super(sb, es, 1);
} else {
__le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
@@ -1269,8 +1285,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
if (!ext2_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
spin_unlock(&sbi->s_lock);
+
ext2_write_super(sb);
+
+ dquot_resume(sb, -1);
}
+
return 0;
restore_opts:
sbi->s_mount_opt = old_opts.s_mount_opt;
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 373fa90..e2e72c3 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -297,7 +297,7 @@ static void free_rb_tree_fname(struct rb_root *root)
kfree (old);
}
if (!parent)
- root->rb_node = NULL;
+ *root = RB_ROOT;
else if (parent->rb_left == n)
parent->rb_left = NULL;
else if (parent->rb_right == n)
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index fcf7487..d7e9f74 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -43,9 +43,9 @@
* inode to disk.
*/
-int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
+int ext3_sync_file(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ext3_inode_info *ei = EXT3_I(inode);
journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
int ret, needs_barrier = 0;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0fc1293..6c953bb 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -410,6 +410,8 @@ static void ext3_put_super (struct super_block * sb)
struct ext3_super_block *es = sbi->s_es;
int i, err;
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
lock_kernel();
ext3_xattr_put_super(sb);
@@ -748,7 +750,7 @@ static int ext3_release_dquot(struct dquot *dquot);
static int ext3_mark_dquot_dirty(struct dquot *dquot);
static int ext3_write_info(struct super_block *sb, int type);
static int ext3_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount);
+ char *path);
static int ext3_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -767,12 +769,12 @@ static const struct dquot_operations ext3_quota_operations = {
static const struct quotactl_ops ext3_qctl_operations = {
.quota_on = ext3_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
};
#endif
@@ -1527,7 +1529,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
/* Turn quotas off */
for (i = 0; i < MAXQUOTAS; i++) {
if (sb_dqopt(sb)->files[i])
- vfs_quota_off(sb, i, 0);
+ dquot_quota_off(sb, i);
}
#endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2551,6 +2553,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
ext3_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext3_mount_options old_opts;
+ int enable_quota = 0;
int err;
#ifdef CONFIG_QUOTA
int i;
@@ -2597,6 +2600,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
}
if (*flags & MS_RDONLY) {
+ err = dquot_suspend(sb, -1);
+ if (err < 0)
+ goto restore_opts;
+
/*
* First of all, the unconditional stuff we have to do
* to disable replay of the journal when we next remount
@@ -2651,6 +2658,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
goto restore_opts;
if (!ext3_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
+ enable_quota = 1;
}
}
#ifdef CONFIG_QUOTA
@@ -2662,6 +2670,9 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
#endif
unlock_super(sb);
unlock_kernel();
+
+ if (enable_quota)
+ dquot_resume(sb, -1);
return 0;
restore_opts:
sb->s_flags = old_sb_flags;
@@ -2851,24 +2862,21 @@ static int ext3_write_info(struct super_block *sb, int type)
*/
static int ext3_quota_on_mount(struct super_block *sb, int type)
{
- return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
- EXT3_SB(sb)->s_jquota_fmt, type);
+ return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
+ EXT3_SB(sb)->s_jquota_fmt, type);
}
/*
* Standard function to be called on quota_on
*/
static int ext3_quota_on(struct super_block *sb, int type, int format_id,
- char *name, int remount)
+ char *name)
{
int err;
struct path path;
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* When remounting, no checks are needed and in fact, name is NULL */
- if (remount)
- return vfs_quota_on(sb, type, format_id, name, remount);
err = kern_path(name, LOOKUP_FOLLOW, &path);
if (err)
@@ -2906,7 +2914,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
}
}
- err = vfs_quota_on_path(sb, type, format_id, &path);
+ err = dquot_quota_on_path(sb, type, format_id, &path);
path_put(&path);
return err;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 60bd310..19a4de5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1519,7 +1519,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
extern void ext4_htree_free_dir_info(struct dir_private_info *p);
/* fsync.c */
-extern int ext4_sync_file(struct file *, struct dentry *, int);
+extern int ext4_sync_file(struct file *, int);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index b6a74f9..592adf2 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -71,9 +71,9 @@ static void ext4_sync_parent(struct inode *inode)
* i_mutex lock is held when entering and exiting this function
*/
-int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int ext4_sync_file(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret;
@@ -81,7 +81,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
J_ASSERT(ext4_journal_current_handle() == NULL);
- trace_ext4_sync_file(file, dentry, datasync);
+ trace_ext4_sync_file(file, datasync);
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
@@ -91,7 +91,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
return ret;
if (!journal) {
- ret = simple_fsync(file, dentry, datasync);
+ ret = generic_file_fsync(file, datasync);
if (!ret && !list_empty(&inode->i_dentry))
ext4_sync_parent(inode);
return ret;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 19df61c..42272d6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4942,20 +4942,26 @@ void ext4_set_inode_flags(struct inode *inode)
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
void ext4_get_inode_flags(struct ext4_inode_info *ei)
{
- unsigned int flags = ei->vfs_inode.i_flags;
-
- ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
- if (flags & S_SYNC)
- ei->i_flags |= EXT4_SYNC_FL;
- if (flags & S_APPEND)
- ei->i_flags |= EXT4_APPEND_FL;
- if (flags & S_IMMUTABLE)
- ei->i_flags |= EXT4_IMMUTABLE_FL;
- if (flags & S_NOATIME)
- ei->i_flags |= EXT4_NOATIME_FL;
- if (flags & S_DIRSYNC)
- ei->i_flags |= EXT4_DIRSYNC_FL;
+ unsigned int vfs_fl;
+ unsigned long old_fl, new_fl;
+
+ do {
+ vfs_fl = ei->vfs_inode.i_flags;
+ old_fl = ei->i_flags;
+ new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+ EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+ EXT4_DIRSYNC_FL);
+ if (vfs_fl & S_SYNC)
+ new_fl |= EXT4_SYNC_FL;
+ if (vfs_fl & S_APPEND)
+ new_fl |= EXT4_APPEND_FL;
+ if (vfs_fl & S_IMMUTABLE)
+ new_fl |= EXT4_IMMUTABLE_FL;
+ if (vfs_fl & S_NOATIME)
+ new_fl |= EXT4_NOATIME_FL;
+ if (vfs_fl & S_DIRSYNC)
+ new_fl |= EXT4_DIRSYNC_FL;
+ } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5191,7 +5197,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = 0;
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
return 0;
}
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
@@ -5204,9 +5210,9 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
} else {
- ei->i_flags |= EXT4_HUGE_FILE_FL;
+ ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
/* i_block is stored in file system block size */
i_blocks = i_blocks >> (inode->i_blkbits - 9);
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 3a6c92a..52abfa1 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -960,6 +960,9 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
+ if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
+ return -EPERM;
+
/* Ext4 move extent does not support swapfile */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 49d88c0..4e8983a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -646,6 +646,8 @@ static void ext4_put_super(struct super_block *sb)
struct ext4_super_block *es = sbi->s_es;
int i, err;
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
flush_workqueue(sbi->dio_unwritten_wq);
destroy_workqueue(sbi->dio_unwritten_wq);
@@ -1062,7 +1064,7 @@ static int ext4_release_dquot(struct dquot *dquot);
static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount);
+ char *path);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -1084,12 +1086,12 @@ static const struct dquot_operations ext4_quota_operations = {
static const struct quotactl_ops ext4_qctl_operations = {
.quota_on = ext4_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
};
#endif
@@ -2054,7 +2056,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
/* Turn quotas off */
for (i = 0; i < MAXQUOTAS; i++) {
if (sb_dqopt(sb)->files[i])
- vfs_quota_off(sb, i, 0);
+ dquot_quota_off(sb, i);
}
#endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -3576,6 +3578,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext4_mount_options old_opts;
+ int enable_quota = 0;
ext4_group_t g;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
int err;
@@ -3633,6 +3636,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
if (*flags & MS_RDONLY) {
+ err = dquot_suspend(sb, -1);
+ if (err < 0)
+ goto restore_opts;
+
/*
* First of all, the unconditional stuff we have to do
* to disable replay of the journal when we next remount
@@ -3701,6 +3708,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
if (!ext4_setup_super(sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
+ enable_quota = 1;
}
}
ext4_setup_system_zone(sb);
@@ -3716,6 +3724,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
#endif
unlock_super(sb);
unlock_kernel();
+ if (enable_quota)
+ dquot_resume(sb, -1);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
@@ -3913,24 +3923,21 @@ static int ext4_write_info(struct super_block *sb, int type)
*/
static int ext4_quota_on_mount(struct super_block *sb, int type)
{
- return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
- EXT4_SB(sb)->s_jquota_fmt, type);
+ return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
+ EXT4_SB(sb)->s_jquota_fmt, type);
}
/*
* Standard function to be called on quota_on
*/
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- char *name, int remount)
+ char *name)
{
int err;
struct path path;
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* When remounting, no checks are needed and in fact, name is NULL */
- if (remount)
- return vfs_quota_on(sb, type, format_id, name, remount);
err = kern_path(name, LOOKUP_FOLLOW, &path);
if (err)
@@ -3969,7 +3976,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
}
}
- err = vfs_quota_on_path(sb, type, format_id, &path);
+ err = dquot_quota_on_path(sb, type, format_id, &path);
path_put(&path);
return err;
}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 53dba57..27ac257 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -306,11 +306,11 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
extern const struct file_operations fat_file_operations;
extern const struct inode_operations fat_file_inode_operations;
extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
-extern void fat_truncate(struct inode *inode);
+extern int fat_setsize(struct inode *inode, loff_t offset);
+extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-extern int fat_file_fsync(struct file *file, struct dentry *dentry,
- int datasync);
+extern int fat_file_fsync(struct file *file, int datasync);
/* fat/inode.c */
extern void fat_attach(struct inode *inode, loff_t i_pos);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a14c2f6..990dfae 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -149,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp)
return 0;
}
-int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int fat_file_fsync(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int res, err;
- res = simple_fsync(filp, dentry, datasync);
+ res = generic_file_fsync(filp, datasync);
err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
return res ? res : err;
@@ -283,7 +283,7 @@ static int fat_free(struct inode *inode, int skip)
return fat_free_clusters(inode, free_start);
}
-void fat_truncate(struct inode *inode)
+void fat_truncate_blocks(struct inode *inode, loff_t offset)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
const unsigned int cluster_size = sbi->cluster_size;
@@ -293,10 +293,10 @@ void fat_truncate(struct inode *inode)
* This protects against truncating a file bigger than it was then
* trying to write into the hole.
*/
- if (MSDOS_I(inode)->mmu_private > inode->i_size)
- MSDOS_I(inode)->mmu_private = inode->i_size;
+ if (MSDOS_I(inode)->mmu_private > offset)
+ MSDOS_I(inode)->mmu_private = offset;
- nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits;
+ nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits;
fat_free(inode, nr_clusters);
fat_flush_inodes(inode->i_sb, inode, NULL);
@@ -364,6 +364,18 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
return 0;
}
+int fat_setsize(struct inode *inode, loff_t offset)
+{
+ int error;
+
+ error = simple_setsize(inode, offset);
+ if (error)
+ return error;
+ fat_truncate_blocks(inode, offset);
+
+ return error;
+}
+
#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
/* valid file mode bits */
#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
@@ -378,7 +390,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
/*
* Expand the file. Since inode_setattr() updates ->i_size
* before calling the ->truncate(), but FAT needs to fill the
- * hole before it.
+ * hole before it. XXX: this is no longer true with new truncate
+ * sequence.
*/
if (attr->ia_valid & ATTR_SIZE) {
if (attr->ia_size > inode->i_size) {
@@ -427,15 +440,20 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_valid &= ~ATTR_MODE;
}
- if (attr->ia_valid)
- error = inode_setattr(inode, attr);
+ if (attr->ia_valid & ATTR_SIZE) {
+ error = fat_setsize(inode, attr->ia_size);
+ if (error)
+ goto out;
+ }
+
+ generic_setattr(inode, attr);
+ mark_inode_dirty(inode);
out:
return error;
}
EXPORT_SYMBOL_GPL(fat_setattr);
const struct inode_operations fat_file_inode_operations = {
- .truncate = fat_truncate,
.setattr = fat_setattr,
.getattr = fat_getattr,
};
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ed33904..7bf45ae 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -142,14 +142,29 @@ static int fat_readpages(struct file *file, struct address_space *mapping,
return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
}
+static void fat_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, to, inode->i_size);
+ fat_truncate_blocks(inode, inode->i_size);
+ }
+}
+
static int fat_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int err;
+
*pagep = NULL;
- return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- fat_get_block,
+ err = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, fat_get_block,
&MSDOS_I(mapping->host)->mmu_private);
+ if (err < 0)
+ fat_write_failed(mapping, pos + len);
+ return err;
}
static int fat_write_end(struct file *file, struct address_space *mapping,
@@ -159,6 +174,8 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
int err;
err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+ if (err < len)
+ fat_write_failed(mapping, pos + len);
if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
@@ -172,7 +189,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
if (rw == WRITE) {
/*
@@ -193,8 +212,12 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
* FAT need to use the DIO_LOCKING for avoiding the race
* condition of fat_get_block() and ->truncate().
*/
- return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, fat_get_block, NULL);
+ ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
+ iov, offset, nr_segs, fat_get_block, NULL);
+ if (ret < 0 && (rw & WRITE))
+ fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
+
+ return ret;
}
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
@@ -429,7 +452,7 @@ static void fat_delete_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
- fat_truncate(inode);
+ fat_truncate_blocks(inode, 0);
clear_inode(inode);
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f74d270..51e11bf 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,7 +274,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
ret = copy_from_user(&owner, owner_p, sizeof(owner));
if (ret)
- return ret;
+ return -EFAULT;
switch (owner.type) {
case F_OWNER_TID:
@@ -332,8 +332,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
}
read_unlock(&filp->f_owner.lock);
- if (!ret)
+ if (!ret) {
ret = copy_to_user(owner_p, &owner, sizeof(owner));
+ if (ret)
+ ret = -EFAULT;
+ }
return ret;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 32d12b7..5c7d10e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -194,14 +194,6 @@ struct file *alloc_file(struct path *path, fmode_t mode,
}
EXPORT_SYMBOL(alloc_file);
-void fput(struct file *file)
-{
- if (atomic_long_dec_and_test(&file->f_count))
- __fput(file);
-}
-
-EXPORT_SYMBOL(fput);
-
/**
* drop_file_write_access - give up ability to write to a file
* @file: the file to which we will stop writing
@@ -227,10 +219,9 @@ void drop_file_write_access(struct file *file)
}
EXPORT_SYMBOL_GPL(drop_file_write_access);
-/* __fput is called from task context when aio completion releases the last
- * last use of a struct file *. Do not use otherwise.
+/* the real guts of fput() - releasing the last reference to file
*/
-void __fput(struct file *file)
+static void __fput(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
@@ -268,6 +259,14 @@ void __fput(struct file *file)
mntput(mnt);
}
+void fput(struct file *file)
+{
+ if (atomic_long_dec_and_test(&file->f_count))
+ __fput(file);
+}
+
+EXPORT_SYMBOL(fput);
+
struct file *fget(unsigned int fd)
{
struct file *file;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ea8592b..1d1088f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,7 +45,6 @@ struct wb_writeback_args {
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
- unsigned int sb_pinned:1;
};
/*
@@ -193,8 +192,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
}
static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_args *args,
- int wait)
+ struct wb_writeback_args *args)
{
struct bdi_work *work;
@@ -206,8 +204,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
if (work) {
bdi_work_init(work, args);
bdi_queue_work(bdi, work);
- if (wait)
- bdi_wait_on_work_clear(work);
} else {
struct bdi_writeback *wb = &bdi->wb;
@@ -234,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
.range_cyclic = 0,
- /*
- * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
- * lets make it explicitly clear.
- */
- .sb_pinned = 1,
};
struct bdi_work work;
@@ -254,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
* @bdi: the backing device to write from
* @sb: write inodes from this super_block
* @nr_pages: the number of pages to write
- * @sb_locked: caller already holds sb umount sem.
*
* Description:
* This does WB_SYNC_NONE opportunistic writeback. The IO is only
* started when this function returns, we make no guarentees on
- * completion. Caller specifies whether sb umount sem is held already or not.
+ * completion. Caller need not hold sb s_umount semaphore.
*
*/
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages, int sb_locked)
+ long nr_pages)
{
struct wb_writeback_args args = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
.nr_pages = nr_pages,
.range_cyclic = 1,
- .sb_pinned = sb_locked,
};
/*
@@ -282,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
args.for_background = 1;
}
- bdi_alloc_queue_work(bdi, &args, sb_locked);
+ bdi_alloc_queue_work(bdi, &args);
}
/*
@@ -595,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
/*
* Caller must already hold the ref for this
*/
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
+ if (wbc->sync_mode == WB_SYNC_ALL) {
WARN_ON(!rwsem_is_locked(&sb->s_umount));
return SB_NOT_PINNED;
}
@@ -769,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb,
.for_kupdate = args->for_kupdate,
.for_background = args->for_background,
.range_cyclic = args->range_cyclic,
- .sb_pinned = args->sb_pinned,
};
unsigned long oldest_jif;
long wrote = 0;
@@ -912,7 +900,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
while ((work = get_next_work_item(bdi, wb)) != NULL) {
struct wb_writeback_args args = work->args;
- int post_clear;
/*
* Override sync mode, in case we must wait for completion
@@ -920,13 +907,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
if (force_wait)
work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
- post_clear = WB_SYNC_ALL || args.sb_pinned;
-
/*
* If this isn't a data integrity operation, just notify
* that we have seen this work and we are now starting it.
*/
- if (!post_clear)
+ if (args.sync_mode == WB_SYNC_NONE)
wb_clear_pending(wb, work);
wrote += wb_writeback(wb, &args);
@@ -935,7 +920,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
* This is a data integrity writeback, so only do the
* notification when we have completed the work.
*/
- if (post_clear)
+ if (args.sync_mode == WB_SYNC_ALL)
wb_clear_pending(wb, work);
}
@@ -1011,7 +996,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages)
if (!bdi_has_dirty_io(bdi))
continue;
- bdi_alloc_queue_work(bdi, &args, 0);
+ bdi_alloc_queue_work(bdi, &args);
}
rcu_read_unlock();
@@ -1220,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode);
}
-static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
-{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- long nr_to_write;
-
- nr_to_write = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
-}
-
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
@@ -1243,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
*/
void writeback_inodes_sb(struct super_block *sb)
{
- __writeback_inodes_sb(sb, 0);
-}
-EXPORT_SYMBOL(writeback_inodes_sb);
+ unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+ unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+ long nr_to_write;
-/**
- * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
- * @sb: the superblock
- *
- * Like writeback_inodes_sb(), except the caller already holds the
- * sb umount sem.
- */
-void writeback_inodes_sb_locked(struct super_block *sb)
-{
- __writeback_inodes_sb(sb, 1);
+ nr_to_write = nr_dirty + nr_unstable +
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+ bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
}
+EXPORT_SYMBOL(writeback_inodes_sb);
/**
* writeback_inodes_sb_if_idle - start writeback if none underway
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 47aefd3..723b889 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -710,30 +710,26 @@ static void fscache_write_op(struct fscache_operation *_op)
goto superseded;
}
- if (page) {
- radix_tree_tag_set(&cookie->stores, page->index,
- FSCACHE_COOKIE_STORING_TAG);
- radix_tree_tag_clear(&cookie->stores, page->index,
- FSCACHE_COOKIE_PENDING_TAG);
- }
+ radix_tree_tag_set(&cookie->stores, page->index,
+ FSCACHE_COOKIE_STORING_TAG);
+ radix_tree_tag_clear(&cookie->stores, page->index,
+ FSCACHE_COOKIE_PENDING_TAG);
spin_unlock(&cookie->stores_lock);
spin_unlock(&object->lock);
- if (page) {
- fscache_set_op_state(&op->op, "Store");
- fscache_stat(&fscache_n_store_pages);
- fscache_stat(&fscache_n_cop_write_page);
- ret = object->cache->ops->write_page(op, page);
- fscache_stat_d(&fscache_n_cop_write_page);
- fscache_set_op_state(&op->op, "EndWrite");
- fscache_end_page_write(object, page);
- if (ret < 0) {
- fscache_set_op_state(&op->op, "Abort");
- fscache_abort_object(object);
- } else {
- fscache_enqueue_operation(&op->op);
- }
+ fscache_set_op_state(&op->op, "Store");
+ fscache_stat(&fscache_n_store_pages);
+ fscache_stat(&fscache_n_cop_write_page);
+ ret = object->cache->ops->write_page(op, page);
+ fscache_stat_d(&fscache_n_cop_write_page);
+ fscache_set_op_state(&op->op, "EndWrite");
+ fscache_end_page_write(object, page);
+ if (ret < 0) {
+ fscache_set_op_state(&op->op, "Abort");
+ fscache_abort_object(object);
+ } else {
+ fscache_enqueue_operation(&op->op);
}
_leave("");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e53df5e..9424796 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -16,6 +16,9 @@
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/slab.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/swap.h>
+#include <linux/splice.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
@@ -499,6 +502,9 @@ struct fuse_copy_state {
int write;
struct fuse_req *req;
const struct iovec *iov;
+ struct pipe_buffer *pipebufs;
+ struct pipe_buffer *currbuf;
+ struct pipe_inode_info *pipe;
unsigned long nr_segs;
unsigned long seglen;
unsigned long addr;
@@ -506,16 +512,16 @@ struct fuse_copy_state {
void *mapaddr;
void *buf;
unsigned len;
+ unsigned move_pages:1;
};
static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
- int write, struct fuse_req *req,
+ int write,
const struct iovec *iov, unsigned long nr_segs)
{
memset(cs, 0, sizeof(*cs));
cs->fc = fc;
cs->write = write;
- cs->req = req;
cs->iov = iov;
cs->nr_segs = nr_segs;
}
@@ -523,7 +529,18 @@ static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
/* Unmap and put previous page of userspace buffer */
static void fuse_copy_finish(struct fuse_copy_state *cs)
{
- if (cs->mapaddr) {
+ if (cs->currbuf) {
+ struct pipe_buffer *buf = cs->currbuf;
+
+ if (!cs->write) {
+ buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
+ } else {
+ kunmap_atomic(cs->mapaddr, KM_USER0);
+ buf->len = PAGE_SIZE - cs->len;
+ }
+ cs->currbuf = NULL;
+ cs->mapaddr = NULL;
+ } else if (cs->mapaddr) {
kunmap_atomic(cs->mapaddr, KM_USER0);
if (cs->write) {
flush_dcache_page(cs->pg);
@@ -545,26 +562,61 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
unlock_request(cs->fc, cs->req);
fuse_copy_finish(cs);
- if (!cs->seglen) {
- BUG_ON(!cs->nr_segs);
- cs->seglen = cs->iov[0].iov_len;
- cs->addr = (unsigned long) cs->iov[0].iov_base;
- cs->iov++;
- cs->nr_segs--;
+ if (cs->pipebufs) {
+ struct pipe_buffer *buf = cs->pipebufs;
+
+ if (!cs->write) {
+ err = buf->ops->confirm(cs->pipe, buf);
+ if (err)
+ return err;
+
+ BUG_ON(!cs->nr_segs);
+ cs->currbuf = buf;
+ cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+ cs->len = buf->len;
+ cs->buf = cs->mapaddr + buf->offset;
+ cs->pipebufs++;
+ cs->nr_segs--;
+ } else {
+ struct page *page;
+
+ if (cs->nr_segs == cs->pipe->buffers)
+ return -EIO;
+
+ page = alloc_page(GFP_HIGHUSER);
+ if (!page)
+ return -ENOMEM;
+
+ buf->page = page;
+ buf->offset = 0;
+ buf->len = 0;
+
+ cs->currbuf = buf;
+ cs->mapaddr = kmap_atomic(page, KM_USER0);
+ cs->buf = cs->mapaddr;
+ cs->len = PAGE_SIZE;
+ cs->pipebufs++;
+ cs->nr_segs++;
+ }
+ } else {
+ if (!cs->seglen) {
+ BUG_ON(!cs->nr_segs);
+ cs->seglen = cs->iov[0].iov_len;
+ cs->addr = (unsigned long) cs->iov[0].iov_base;
+ cs->iov++;
+ cs->nr_segs--;
+ }
+ err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+ if (err < 0)
+ return err;
+ BUG_ON(err != 1);
+ offset = cs->addr % PAGE_SIZE;
+ cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
+ cs->buf = cs->mapaddr + offset;
+ cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->seglen -= cs->len;
+ cs->addr += cs->len;
}
- down_read(&current->mm->mmap_sem);
- err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
- &cs->pg, NULL);
- up_read(&current->mm->mmap_sem);
- if (err < 0)
- return err;
- BUG_ON(err != 1);
- offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
- cs->buf = cs->mapaddr + offset;
- cs->len = min(PAGE_SIZE - offset, cs->seglen);
- cs->seglen -= cs->len;
- cs->addr += cs->len;
return lock_request(cs->fc, cs->req);
}
@@ -586,23 +638,178 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
return ncpy;
}
+static int fuse_check_page(struct page *page)
+{
+ if (page_mapcount(page) ||
+ page->mapping != NULL ||
+ page_count(page) != 1 ||
+ (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
+ ~(1 << PG_locked |
+ 1 << PG_referenced |
+ 1 << PG_uptodate |
+ 1 << PG_lru |
+ 1 << PG_active |
+ 1 << PG_reclaim))) {
+ printk(KERN_WARNING "fuse: trying to steal weird page\n");
+ printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
+ return 1;
+ }
+ return 0;
+}
+
+static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
+{
+ int err;
+ struct page *oldpage = *pagep;
+ struct page *newpage;
+ struct pipe_buffer *buf = cs->pipebufs;
+ struct address_space *mapping;
+ pgoff_t index;
+
+ unlock_request(cs->fc, cs->req);
+ fuse_copy_finish(cs);
+
+ err = buf->ops->confirm(cs->pipe, buf);
+ if (err)
+ return err;
+
+ BUG_ON(!cs->nr_segs);
+ cs->currbuf = buf;
+ cs->len = buf->len;
+ cs->pipebufs++;
+ cs->nr_segs--;
+
+ if (cs->len != PAGE_SIZE)
+ goto out_fallback;
+
+ if (buf->ops->steal(cs->pipe, buf) != 0)
+ goto out_fallback;
+
+ newpage = buf->page;
+
+ if (WARN_ON(!PageUptodate(newpage)))
+ return -EIO;
+
+ ClearPageMappedToDisk(newpage);
+
+ if (fuse_check_page(newpage) != 0)
+ goto out_fallback_unlock;
+
+ mapping = oldpage->mapping;
+ index = oldpage->index;
+
+ /*
+ * This is a new and locked page, it shouldn't be mapped or
+ * have any special flags on it
+ */
+ if (WARN_ON(page_mapped(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(page_has_private(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(PageMlocked(oldpage)))
+ goto out_fallback_unlock;
+
+ remove_from_page_cache(oldpage);
+ page_cache_release(oldpage);
+
+ err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
+ if (err) {
+ printk(KERN_WARNING "fuse_try_move_page: failed to add page");
+ goto out_fallback_unlock;
+ }
+ page_cache_get(newpage);
+
+ if (!(buf->flags & PIPE_BUF_FLAG_LRU))
+ lru_cache_add_file(newpage);
+
+ err = 0;
+ spin_lock(&cs->fc->lock);
+ if (cs->req->aborted)
+ err = -ENOENT;
+ else
+ *pagep = newpage;
+ spin_unlock(&cs->fc->lock);
+
+ if (err) {
+ unlock_page(newpage);
+ page_cache_release(newpage);
+ return err;
+ }
+
+ unlock_page(oldpage);
+ page_cache_release(oldpage);
+ cs->len = 0;
+
+ return 0;
+
+out_fallback_unlock:
+ unlock_page(newpage);
+out_fallback:
+ cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+ cs->buf = cs->mapaddr + buf->offset;
+
+ err = lock_request(cs->fc, cs->req);
+ if (err)
+ return err;
+
+ return 1;
+}
+
+static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
+ unsigned offset, unsigned count)
+{
+ struct pipe_buffer *buf;
+
+ if (cs->nr_segs == cs->pipe->buffers)
+ return -EIO;
+
+ unlock_request(cs->fc, cs->req);
+ fuse_copy_finish(cs);
+
+ buf = cs->pipebufs;
+ page_cache_get(page);
+ buf->page = page;
+ buf->offset = offset;
+ buf->len = count;
+
+ cs->pipebufs++;
+ cs->nr_segs++;
+ cs->len = 0;
+
+ return 0;
+}
+
/*
* Copy a page in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
+static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
unsigned offset, unsigned count, int zeroing)
{
+ int err;
+ struct page *page = *pagep;
+
if (page && zeroing && count < PAGE_SIZE) {
void *mapaddr = kmap_atomic(page, KM_USER1);
memset(mapaddr, 0, PAGE_SIZE);
kunmap_atomic(mapaddr, KM_USER1);
}
while (count) {
- if (!cs->len) {
- int err = fuse_copy_fill(cs);
- if (err)
- return err;
+ if (cs->write && cs->pipebufs && page) {
+ return fuse_ref_page(cs, page, offset, count);
+ } else if (!cs->len) {
+ if (cs->move_pages && page &&
+ offset == 0 && count == PAGE_SIZE) {
+ err = fuse_try_move_page(cs, pagep);
+ if (err <= 0)
+ return err;
+ } else {
+ err = fuse_copy_fill(cs);
+ if (err)
+ return err;
+ }
}
if (page) {
void *mapaddr = kmap_atomic(page, KM_USER1);
@@ -627,8 +834,10 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
- struct page *page = req->pages[i];
- int err = fuse_copy_page(cs, page, offset, count, zeroing);
+ int err;
+
+ err = fuse_copy_page(cs, &req->pages[i], offset, count,
+ zeroing);
if (err)
return err;
@@ -705,11 +914,10 @@ __acquires(&fc->lock)
*
* Called with fc->lock held, releases it
*/
-static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
- const struct iovec *iov, unsigned long nr_segs)
+static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
+ size_t nbytes, struct fuse_req *req)
__releases(&fc->lock)
{
- struct fuse_copy_state cs;
struct fuse_in_header ih;
struct fuse_interrupt_in arg;
unsigned reqsize = sizeof(ih) + sizeof(arg);
@@ -725,14 +933,13 @@ __releases(&fc->lock)
arg.unique = req->in.h.unique;
spin_unlock(&fc->lock);
- if (iov_length(iov, nr_segs) < reqsize)
+ if (nbytes < reqsize)
return -EINVAL;
- fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
- err = fuse_copy_one(&cs, &ih, sizeof(ih));
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
if (!err)
- err = fuse_copy_one(&cs, &arg, sizeof(arg));
- fuse_copy_finish(&cs);
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+ fuse_copy_finish(cs);
return err ? err : reqsize;
}
@@ -746,18 +953,13 @@ __releases(&fc->lock)
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
+ struct fuse_copy_state *cs, size_t nbytes)
{
int err;
struct fuse_req *req;
struct fuse_in *in;
- struct fuse_copy_state cs;
unsigned reqsize;
- struct file *file = iocb->ki_filp;
- struct fuse_conn *fc = fuse_get_conn(file);
- if (!fc)
- return -EPERM;
restart:
spin_lock(&fc->lock);
@@ -777,7 +979,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
if (!list_empty(&fc->interrupts)) {
req = list_entry(fc->interrupts.next, struct fuse_req,
intr_entry);
- return fuse_read_interrupt(fc, req, iov, nr_segs);
+ return fuse_read_interrupt(fc, cs, nbytes, req);
}
req = list_entry(fc->pending.next, struct fuse_req, list);
@@ -787,7 +989,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
in = &req->in;
reqsize = in->h.len;
/* If request is too large, reply with an error and restart the read */
- if (iov_length(iov, nr_segs) < reqsize) {
+ if (nbytes < reqsize) {
req->out.h.error = -EIO;
/* SETXATTR is special, since it may contain too large data */
if (in->h.opcode == FUSE_SETXATTR)
@@ -796,12 +998,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
goto restart;
}
spin_unlock(&fc->lock);
- fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
- err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+ cs->req = req;
+ err = fuse_copy_one(cs, &in->h, sizeof(in->h));
if (!err)
- err = fuse_copy_args(&cs, in->numargs, in->argpages,
+ err = fuse_copy_args(cs, in->numargs, in->argpages,
(struct fuse_arg *) in->args, 0);
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
spin_lock(&fc->lock);
req->locked = 0;
if (req->aborted) {
@@ -829,6 +1031,110 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
return err;
}
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct fuse_copy_state cs;
+ struct file *file = iocb->ki_filp;
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -EPERM;
+
+ fuse_copy_init(&cs, fc, 1, iov, nr_segs);
+
+ return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
+}
+
+static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = fuse_dev_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ int ret;
+ int page_nr = 0;
+ int do_wakeup = 0;
+ struct pipe_buffer *bufs;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(in);
+ if (!fc)
+ return -EPERM;
+
+ bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ if (!bufs)
+ return -ENOMEM;
+
+ fuse_copy_init(&cs, fc, 1, NULL, 0);
+ cs.pipebufs = bufs;
+ cs.pipe = pipe;
+ ret = fuse_dev_do_read(fc, in, &cs, len);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+ pipe_lock(pipe);
+
+ if (!pipe->readers) {
+ send_sig(SIGPIPE, current, 0);
+ if (!ret)
+ ret = -EPIPE;
+ goto out_unlock;
+ }
+
+ if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ while (page_nr < cs.nr_segs) {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ struct pipe_buffer *buf = pipe->bufs + newbuf;
+
+ buf->page = bufs[page_nr].page;
+ buf->offset = bufs[page_nr].offset;
+ buf->len = bufs[page_nr].len;
+ buf->ops = &fuse_dev_pipe_buf_ops;
+
+ pipe->nrbufs++;
+ page_nr++;
+ ret += buf->len;
+
+ if (pipe->inode)
+ do_wakeup = 1;
+ }
+
+out_unlock:
+ pipe_unlock(pipe);
+
+ if (do_wakeup) {
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ }
+
+out:
+ for (; page_nr < cs.nr_segs; page_nr++)
+ page_cache_release(bufs[page_nr].page);
+
+ kfree(bufs);
+ return ret;
+}
+
static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
@@ -988,23 +1294,17 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
*/
-static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
+ struct fuse_copy_state *cs, size_t nbytes)
{
int err;
- size_t nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
- struct fuse_copy_state cs;
- struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
- if (!fc)
- return -EPERM;
- fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
if (nbytes < sizeof(struct fuse_out_header))
return -EINVAL;
- err = fuse_copy_one(&cs, &oh, sizeof(oh));
+ err = fuse_copy_one(cs, &oh, sizeof(oh));
if (err)
goto err_finish;
@@ -1017,7 +1317,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
* and error contains notification code.
*/
if (!oh.unique) {
- err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
+ err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
return err ? err : nbytes;
}
@@ -1036,7 +1336,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
if (req->aborted) {
spin_unlock(&fc->lock);
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
spin_lock(&fc->lock);
request_end(fc, req);
return -ENOENT;
@@ -1053,7 +1353,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
queue_interrupt(fc, req);
spin_unlock(&fc->lock);
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
return nbytes;
}
@@ -1061,11 +1361,13 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
list_move(&req->list, &fc->io);
req->out.h = oh;
req->locked = 1;
- cs.req = req;
+ cs->req = req;
+ if (!req->out.page_replace)
+ cs->move_pages = 0;
spin_unlock(&fc->lock);
- err = copy_out_args(&cs, &req->out, nbytes);
- fuse_copy_finish(&cs);
+ err = copy_out_args(cs, &req->out, nbytes);
+ fuse_copy_finish(cs);
spin_lock(&fc->lock);
req->locked = 0;
@@ -1081,10 +1383,101 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
err_unlock:
spin_unlock(&fc->lock);
err_finish:
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
return err;
}
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
+ if (!fc)
+ return -EPERM;
+
+ fuse_copy_init(&cs, fc, 0, iov, nr_segs);
+
+ return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
+}
+
+static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ unsigned nbuf;
+ unsigned idx;
+ struct pipe_buffer *bufs;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc;
+ size_t rem;
+ ssize_t ret;
+
+ fc = fuse_get_conn(out);
+ if (!fc)
+ return -EPERM;
+
+ bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ if (!bufs)
+ return -ENOMEM;
+
+ pipe_lock(pipe);
+ nbuf = 0;
+ rem = 0;
+ for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
+ rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
+
+ ret = -EINVAL;
+ if (rem < len) {
+ pipe_unlock(pipe);
+ goto out;
+ }
+
+ rem = len;
+ while (rem) {
+ struct pipe_buffer *ibuf;
+ struct pipe_buffer *obuf;
+
+ BUG_ON(nbuf >= pipe->buffers);
+ BUG_ON(!pipe->nrbufs);
+ ibuf = &pipe->bufs[pipe->curbuf];
+ obuf = &bufs[nbuf];
+
+ if (rem >= ibuf->len) {
+ *obuf = *ibuf;
+ ibuf->ops = NULL;
+ pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+ pipe->nrbufs--;
+ } else {
+ ibuf->ops->get(pipe, ibuf);
+ *obuf = *ibuf;
+ obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+ obuf->len = rem;
+ ibuf->offset += obuf->len;
+ ibuf->len -= obuf->len;
+ }
+ nbuf++;
+ rem -= obuf->len;
+ }
+ pipe_unlock(pipe);
+
+ fuse_copy_init(&cs, fc, 0, NULL, nbuf);
+ cs.pipebufs = bufs;
+ cs.pipe = pipe;
+
+ if (flags & SPLICE_F_MOVE)
+ cs.move_pages = 1;
+
+ ret = fuse_dev_do_write(fc, &cs, len);
+
+ for (idx = 0; idx < nbuf; idx++) {
+ struct pipe_buffer *buf = &bufs[idx];
+ buf->ops->release(pipe, buf);
+ }
+out:
+ kfree(bufs);
+ return ret;
+}
+
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
unsigned mask = POLLOUT | POLLWRNORM;
@@ -1226,8 +1619,10 @@ const struct file_operations fuse_dev_operations = {
.llseek = no_llseek,
.read = do_sync_read,
.aio_read = fuse_dev_read,
+ .splice_read = fuse_dev_splice_read,
.write = do_sync_write,
.aio_write = fuse_dev_write,
+ .splice_write = fuse_dev_splice_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4787ae6..3cdc5f7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1156,10 +1156,9 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
return 0;
}
-static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
+static int fuse_dir_fsync(struct file *file, int datasync)
{
- /* nfsd can call this with no file */
- return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
+ return fuse_fsync_common(file, datasync, 1);
}
static bool update_mtime(unsigned ivalid)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a9f5e13..ada0ade 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -351,10 +351,9 @@ static void fuse_sync_writes(struct inode *inode)
fuse_release_nowrite(inode);
}
-int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
- int isdir)
+int fuse_fsync_common(struct file *file, int datasync, int isdir)
{
- struct inode *inode = de->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
struct fuse_req *req;
@@ -403,9 +402,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
return err;
}
-static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+static int fuse_fsync(struct file *file, int datasync)
{
- return fuse_fsync_common(file, de, datasync, 0);
+ return fuse_fsync_common(file, datasync, 0);
}
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -517,17 +516,26 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
int i;
size_t count = req->misc.read.in.size;
size_t num_read = req->out.args[0].size;
- struct inode *inode = req->pages[0]->mapping->host;
+ struct address_space *mapping = NULL;
- /*
- * Short read means EOF. If file size is larger, truncate it
- */
- if (!req->out.h.error && num_read < count) {
- loff_t pos = page_offset(req->pages[0]) + num_read;
- fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
- }
+ for (i = 0; mapping == NULL && i < req->num_pages; i++)
+ mapping = req->pages[i]->mapping;
- fuse_invalidate_attr(inode); /* atime changed */
+ if (mapping) {
+ struct inode *inode = mapping->host;
+
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (!req->out.h.error && num_read < count) {
+ loff_t pos;
+
+ pos = page_offset(req->pages[0]) + num_read;
+ fuse_read_update_size(inode, pos,
+ req->misc.read.attr_ver);
+ }
+ fuse_invalidate_attr(inode); /* atime changed */
+ }
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
@@ -536,6 +544,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
else
SetPageError(page);
unlock_page(page);
+ page_cache_release(page);
}
if (req->ff)
fuse_file_put(req->ff);
@@ -550,6 +559,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
req->out.argpages = 1;
req->out.page_zeroing = 1;
+ req->out.page_replace = 1;
fuse_read_fill(req, file, pos, count, FUSE_READ);
req->misc.read.attr_ver = fuse_get_attr_version(fc);
if (fc->async_read) {
@@ -589,6 +599,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
return PTR_ERR(req);
}
}
+ page_cache_get(page);
req->pages[req->num_pages] = page;
req->num_pages++;
return 0;
@@ -994,10 +1005,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
- down_read(&current->mm->mmap_sem);
- npages = get_user_pages(current, current->mm, user_addr, npages, !write,
- 0, req->pages, NULL);
- up_read(&current->mm->mmap_sem);
+ npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
if (npages < 0)
return npages;
@@ -1580,9 +1588,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
while (iov_iter_count(&ii)) {
struct page *page = pages[page_idx++];
size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
- void *kaddr, *map;
+ void *kaddr;
- kaddr = map = kmap(page);
+ kaddr = kmap(page);
while (todo) {
char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 01cc462..8f309f0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -177,6 +177,9 @@ struct fuse_out {
/** Zero partially or not copied pages */
unsigned page_zeroing:1;
+ /** Pages may be replaced with new ones */
+ unsigned page_replace:1;
+
/** Number or arguments */
unsigned numargs;
@@ -568,8 +571,7 @@ void fuse_release_common(struct file *file, int opcode);
/**
* Send FSYNC or FSYNCDIR request
*/
-int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
- int isdir);
+int fuse_fsync_common(struct file *file, int datasync, int isdir);
/**
* Notify poll wakeup
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index a739a0a..9f8b525 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -700,8 +700,14 @@ out:
return 0;
page_cache_release(page);
+
+ /*
+ * XXX(hch): the call below should probably be replaced with
+ * a call to the gfs2-specific truncate blocks helper to actually
+ * release disk blocks..
+ */
if (pos + len > ip->i_inode.i_size)
- vmtruncate(&ip->i_inode, ip->i_inode.i_size);
+ simple_setsize(&ip->i_inode, ip->i_inode.i_size);
out_endtrans:
gfs2_trans_end(sdp);
out_trans_fail:
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index b20bfcc..ed9a94f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -554,9 +554,9 @@ static int gfs2_close(struct inode *inode, struct file *file)
* Returns: errno
*/
-static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
+static int gfs2_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
int ret = 0;
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 4e64352..98cdd05 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1071,6 +1071,9 @@ int gfs2_permission(struct inode *inode, int mask)
return error;
}
+/*
+ * XXX: should be changed to have proper ordering by opencoding simple_setsize
+ */
static int setattr_size(struct inode *inode, struct iattr *attr)
{
struct gfs2_inode *ip = GFS2_I(inode);
@@ -1081,7 +1084,7 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
return error;
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
gfs2_trans_end(sdp);
if (error)
return error;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3a029d8..87ac189 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -411,9 +411,9 @@ int hostfs_file_open(struct inode *ino, struct file *file)
return 0;
}
-int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int hostfs_fsync(struct file *file, int datasync)
{
- return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync);
+ return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync);
}
static const struct file_operations hostfs_file_fops = {
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 3efabff..a9ae9bf 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -19,9 +19,9 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
return 0;
}
-int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
+int hpfs_file_fsync(struct file *file, int datasync)
{
- /*return file_fsync(file, dentry);*/
+ /*return file_fsync(file, datasync);*/
return 0; /* Don't fsync :-) */
}
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 97bf738..75f9d43 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *,
/* file.c */
-int hpfs_file_fsync(struct file *, struct dentry *, int);
+int hpfs_file_fsync(struct file *, int);
extern const struct file_operations hpfs_file_ops;
extern const struct inode_operations hpfs_file_iops;
extern const struct address_space_operations hpfs_aops;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 2e4dfa8..826c3f9 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -587,7 +587,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
return err;
}
-static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+static int hppfs_fsync(struct file *file, int datasync)
{
return 0;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a0bbd3d..a4e9a7e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -688,7 +688,7 @@ static void init_once(void *foo)
const struct file_operations hugetlbfs_file_operations = {
.read = hugetlbfs_read,
.mmap = hugetlbfs_file_mmap,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
};
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index a33aab6..54a92fd 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (inode->i_mode != mode) {
struct iattr attr;
- attr.ia_valid = ATTR_MODE;
+ attr.ia_valid = ATTR_MODE | ATTR_CTIME;
attr.ia_mode = mode;
+ attr.ia_ctime = CURRENT_TIME_SEC;
rc = jffs2_do_setattr(inode, &attr);
if (rc < 0)
return rc;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7aa4417..166062a 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
jffs2_free_raw_inode(ri);
- d_instantiate(dentry, inode);
D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages));
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
fail:
make_bad_inode(inode);
+ unlock_new_inode(inode);
iput(inode);
jffs2_free_raw_inode(ri);
return ret;
@@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* We use f->target field to store the target path. */
@@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
memcpy(f->target, target, targetlen + 1);
@@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
@@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
@@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
-
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index e7291c1..8134970 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -26,9 +26,9 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page **pagep, void **fsdata);
static int jffs2_readpage (struct file *filp, struct page *pg);
-int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int jffs2_fsync(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
/* Trigger GC to flush any pending writes for this inode */
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 86e0821..459d39d 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -169,13 +169,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- /* We have to do the vmtruncate() without f->sem held, since
+ /* We have to do the simple_setsize() without f->sem held, since
some pages may be locked and waiting for it in readpage().
We are protected from a simultaneous write() extending i_size
back past iattr->ia_size, because do_truncate() holds the
generic inode semaphore. */
if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
- vmtruncate(inode, iattr->ia_size);
+ simple_setsize(inode, iattr->ia_size);
inode->i_blocks = (inode->i_size + 511) >> 9;
}
@@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
inode->i_blocks = 0;
inode->i_size = 0;
- insert_inode_hash(inode);
+ if (insert_inode_locked(inode) < 0) {
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(-EINVAL);
+ }
return inode;
}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 035a767..4791aac 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations;
extern const struct file_operations jffs2_file_operations;
extern const struct inode_operations jffs2_file_inode_operations;
extern const struct address_space_operations jffs2_file_address_operations;
-int jffs2_fsync(struct file *, struct dentry *, int);
+int jffs2_fsync(struct file *, int);
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
/* ioctl.c */
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 85d9ec6..127263c 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -27,9 +27,9 @@
#include "jfs_acl.h"
#include "jfs_debug.h"
-int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int jfs_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int rc = 0;
if (!(inode->i_state & I_DIRTY) ||
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 9e6bda3..11042b1 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -21,7 +21,7 @@
struct fid;
extern struct inode *ialloc(struct inode *, umode_t);
-extern int jfs_fsync(struct file *, struct dentry *, int);
+extern int jfs_fsync(struct file *, int);
extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
extern struct inode *jfs_iget(struct super_block *, unsigned long);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b66832a..b38f96b 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -179,6 +179,8 @@ static void jfs_put_super(struct super_block *sb)
jfs_info("In jfs_put_super");
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
lock_kernel();
rc = jfs_umount(sb);
@@ -396,10 +398,20 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
JFS_SBI(sb)->flag = flag;
ret = jfs_mount_rw(sb, 1);
+
+ /* mark the fs r/w for quota activity */
+ sb->s_flags &= ~MS_RDONLY;
+
unlock_kernel();
+ dquot_resume(sb, -1);
return ret;
}
if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
+ rc = dquot_suspend(sb, -1);
+ if (rc < 0) {
+ unlock_kernel();
+ return rc;
+ }
rc = jfs_umount_rw(sb);
JFS_SBI(sb)->flag = flag;
unlock_kernel();
@@ -469,6 +481,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
*/
sb->s_op = &jfs_super_operations;
sb->s_export_op = &jfs_export_operations;
+#ifdef CONFIG_QUOTA
+ sb->dq_op = &dquot_operations;
+ sb->s_qcop = &dquot_quotactl_ops;
+#endif
/*
* Initialize direct-mapping inode/address-space
diff --git a/fs/libfs.c b/fs/libfs.c
index 232bea4..dcaf972 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/vfs.h>
+#include <linux/quotaops.h>
#include <linux/mutex.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
@@ -58,11 +59,6 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
return NULL;
}
-int simple_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
- return 0;
-}
-
int dcache_dir_open(struct inode *inode, struct file *file)
{
static struct qstr cursor_name = {.len = 1, .name = "."};
@@ -190,7 +186,7 @@ const struct file_operations simple_dir_operations = {
.llseek = dcache_dir_lseek,
.read = generic_read_dir,
.readdir = dcache_readdir,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
};
const struct inode_operations simple_dir_inode_operations = {
@@ -330,6 +326,81 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
}
+/**
+ * simple_setsize - handle core mm and vfs requirements for file size change
+ * @inode: inode
+ * @newsize: new file size
+ *
+ * Returns 0 on success, -error on failure.
+ *
+ * simple_setsize must be called with inode_mutex held.
+ *
+ * simple_setsize will check that the requested new size is OK (see
+ * inode_newsize_ok), and then will perform the necessary i_size update
+ * and pagecache truncation (if necessary). It will be typically be called
+ * from the filesystem's setattr function when ATTR_SIZE is passed in.
+ *
+ * The inode itself must have correct permissions and attributes to allow
+ * i_size to be changed, this function then just checks that the new size
+ * requested is valid.
+ *
+ * In the case of simple in-memory filesystems with inodes stored solely
+ * in the inode cache, and file data in the pagecache, nothing more needs
+ * to be done to satisfy a truncate request. Filesystems with on-disk
+ * blocks for example will need to free them in the case of truncate, in
+ * that case it may be easier not to use simple_setsize (but each of its
+ * components will likely be required at some point to update pagecache
+ * and inode etc).
+ */
+int simple_setsize(struct inode *inode, loff_t newsize)
+{
+ loff_t oldsize;
+ int error;
+
+ error = inode_newsize_ok(inode, newsize);
+ if (error)
+ return error;
+
+ oldsize = inode->i_size;
+ i_size_write(inode, newsize);
+ truncate_pagecache(inode, oldsize, newsize);
+
+ return error;
+}
+EXPORT_SYMBOL(simple_setsize);
+
+/**
+ * simple_setattr - setattr for simple in-memory filesystem
+ * @dentry: dentry
+ * @iattr: iattr structure
+ *
+ * Returns 0 on success, -error on failure.
+ *
+ * simple_setattr implements setattr for an in-memory filesystem which
+ * does not store its own file data or metadata (eg. uses the page cache
+ * and inode cache as its data store).
+ */
+int simple_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ error = inode_change_ok(inode, iattr);
+ if (error)
+ return error;
+
+ if (iattr->ia_valid & ATTR_SIZE) {
+ error = simple_setsize(inode, iattr->ia_size);
+ if (error)
+ return error;
+ }
+
+ generic_setattr(inode, iattr);
+
+ return error;
+}
+EXPORT_SYMBOL(simple_setattr);
+
int simple_readpage(struct file *file, struct page *page)
{
clear_highpage(page);
@@ -418,7 +489,8 @@ int simple_write_end(struct file *file, struct address_space *mapping,
* unique inode values later for this filesystem, then you must take care
* to pass it an appropriate max_reserved value to avoid collisions.
*/
-int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files)
+int simple_fill_super(struct super_block *s, unsigned long magic,
+ struct tree_descr *files)
{
struct inode *inode;
struct dentry *root;
@@ -851,13 +923,22 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
}
EXPORT_SYMBOL_GPL(generic_fh_to_parent);
-int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
+/**
+ * generic_file_fsync - generic fsync implementation for simple filesystems
+ * @file: file to synchronize
+ * @datasync: only synchronize essential metadata if true
+ *
+ * This is a generic implementation of the fsync method for simple
+ * filesystems which track all non-inode metadata in the buffers list
+ * hanging off the address_space structure.
+ */
+int generic_file_fsync(struct file *file, int datasync)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0, /* metadata-only; caller takes care of data */
};
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int err;
int ret;
@@ -872,7 +953,15 @@ int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
ret = err;
return ret;
}
-EXPORT_SYMBOL(simple_fsync);
+EXPORT_SYMBOL(generic_file_fsync);
+
+/*
+ * No-op implementation of ->fsync for in-memory filesystems.
+ */
+int noop_fsync(struct file *file, int datasync)
+{
+ return 0;
+}
EXPORT_SYMBOL(dcache_dir_close);
EXPORT_SYMBOL(dcache_dir_lseek);
@@ -895,7 +984,7 @@ EXPORT_SYMBOL(simple_release_fs);
EXPORT_SYMBOL(simple_rename);
EXPORT_SYMBOL(simple_rmdir);
EXPORT_SYMBOL(simple_statfs);
-EXPORT_SYMBOL(simple_sync_file);
+EXPORT_SYMBOL(noop_fsync);
EXPORT_SYMBOL(simple_unlink);
EXPORT_SYMBOL(simple_read_from_buffer);
EXPORT_SYMBOL(simple_write_to_buffer);
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 0de5240..abe1caf 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -219,9 +219,9 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
}
}
-int logfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int logfs_fsync(struct file *file, int datasync)
{
- struct super_block *sb = dentry->d_inode->i_sb;
+ struct super_block *sb = file->f_mapping->host->i_sb;
logfs_write_anchor(sb);
return 0;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 1a9db84..c838c4d 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -506,7 +506,7 @@ extern const struct address_space_operations logfs_reg_aops;
int logfs_readpage(struct file *file, struct page *page);
int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
unsigned long arg);
-int logfs_fsync(struct file *file, struct dentry *dentry, int datasync);
+int logfs_fsync(struct file *file, int datasync);
/* gc.c */
u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 6198731..1dbf921 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -22,7 +22,7 @@ const struct file_operations minix_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = minix_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
static inline void dir_put_page(struct page *page)
@@ -72,16 +72,9 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
{
struct address_space *mapping = dir->i_mapping;
struct page *page = read_mapping_page(mapping, n, NULL);
- if (!IS_ERR(page)) {
+ if (!IS_ERR(page))
kmap(page);
- if (!PageUptodate(page))
- goto fail;
- }
return page;
-
-fail:
- dir_put_page(page);
- return ERR_PTR(-EIO);
}
static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 3eec3e6..d5320ff 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -19,7 +19,7 @@ const struct file_operations minix_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index f230109..13487ad 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -20,6 +20,9 @@ static inline block_t *i_data(struct inode *inode)
return (block_t *)minix_i(inode)->u.i2_data;
}
+#define DIRCOUNT 7
+#define INDIRCOUNT(sb) (1 << ((sb)->s_blocksize_bits - 2))
+
static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
{
int n = 0;
@@ -34,21 +37,21 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
printk("MINIX-fs: block_to_path: "
"block %ld too big on dev %s\n",
block, bdevname(sb->s_bdev, b));
- } else if (block < 7) {
+ } else if (block < DIRCOUNT) {
offsets[n++] = block;
- } else if ((block -= 7) < 256) {
- offsets[n++] = 7;
+ } else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) {
+ offsets[n++] = DIRCOUNT;
offsets[n++] = block;
- } else if ((block -= 256) < 256*256) {
- offsets[n++] = 8;
- offsets[n++] = block>>8;
- offsets[n++] = block & 255;
+ } else if ((block -= INDIRCOUNT(sb)) < INDIRCOUNT(sb) * INDIRCOUNT(sb)) {
+ offsets[n++] = DIRCOUNT + 1;
+ offsets[n++] = block / INDIRCOUNT(sb);
+ offsets[n++] = block % INDIRCOUNT(sb);
} else {
- block -= 256*256;
- offsets[n++] = 9;
- offsets[n++] = block>>16;
- offsets[n++] = (block>>8) & 255;
- offsets[n++] = block & 255;
+ block -= INDIRCOUNT(sb) * INDIRCOUNT(sb);
+ offsets[n++] = DIRCOUNT + 2;
+ offsets[n++] = (block / INDIRCOUNT(sb)) / INDIRCOUNT(sb);
+ offsets[n++] = (block / INDIRCOUNT(sb)) % INDIRCOUNT(sb);
+ offsets[n++] = block % INDIRCOUNT(sb);
}
return n;
}
diff --git a/fs/namei.c b/fs/namei.c
index 48e1f60..868d0cb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1621,6 +1621,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
case LAST_DOTDOT:
follow_dotdot(nd);
dir = nd->path.dentry;
+ case LAST_DOT:
if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
if (!dir->d_op->d_revalidate(dir, nd)) {
error = -ESTALE;
@@ -1628,7 +1629,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
}
}
/* fallthrough */
- case LAST_DOT:
case LAST_ROOT:
if (open_flag & O_CREAT)
goto exit;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index b938708..3639cc5 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -22,7 +22,7 @@
#include <linux/ncp_fs.h>
#include "ncplib_kernel.h"
-static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync)
+static int ncp_fsync(struct file *file, int datasync)
{
return 0;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index db64854..782b431 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -53,7 +53,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *);
static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
static int nfs_rename(struct inode *, struct dentry *,
struct inode *, struct dentry *);
-static int nfs_fsync_dir(struct file *, struct dentry *, int);
+static int nfs_fsync_dir(struct file *, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
const struct file_operations nfs_dir_operations = {
@@ -641,8 +641,10 @@ out:
* All directory operations under NFS are synchronous, so fsync()
* is a dummy operation.
*/
-static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
+static int nfs_fsync_dir(struct file *filp, int datasync)
{
+ struct dentry *dentry = filp->f_path.dentry;
+
dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
datasync);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cac96bc..36a5e74 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -53,7 +53,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *, fl_owner_t id);
-static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync);
+static int nfs_file_fsync(struct file *, int datasync);
static int nfs_check_flags(int flags);
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
@@ -322,8 +322,9 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
* whether any write errors occurred for this process.
*/
static int
-nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
+nfs_file_fsync(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = dentry->d_inode;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 12f7109..4a27347 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4122,8 +4122,8 @@ nfs4_state_shutdown(void)
nfs4_lock_state();
nfs4_release_reclaim();
__nfs4_state_shutdown();
- nfsd4_destroy_callback_queue();
nfs4_unlock_state();
+ nfsd4_destroy_callback_queue();
}
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ebbf3b6..3c11112 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (size_change)
put_write_access(inode);
if (!err)
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(inode, 1);
+ commit_metadata(fhp);
out:
return err;
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index af638d5..43c8c5b 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -75,8 +75,6 @@ struct nilfs_btree_path {
extern struct kmem_cache *nilfs_btree_path_cache;
-int nilfs_btree_path_cache_init(void);
-void nilfs_btree_path_cache_destroy(void);
int nilfs_btree_init(struct nilfs_bmap *);
int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
const __u64 *, const __u64 *, int);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 30292df..c9a30d7 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -27,7 +27,7 @@
#include "nilfs.h"
#include "segment.h"
-int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int nilfs_sync_file(struct file *file, int datasync)
{
/*
* Called from fsync() system call
@@ -37,7 +37,7 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
* This function should be implemented when the writeback function
* will be implemented.
*/
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int err;
if (!nilfs_inode_dirty(inode))
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 8723e5b..47d6d79 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -228,7 +228,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
struct page *, struct inode *);
/* file.c */
-extern int nilfs_sync_file(struct file *, struct dentry *, int);
+extern int nilfs_sync_file(struct file *, int);
/* ioctl.c */
long nilfs_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index fdf1c3b..85fbb66 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -127,8 +127,6 @@ struct nilfs_segment_buffer {
extern struct kmem_cache *nilfs_segbuf_cachep;
-int __init nilfs_init_segbuf_cache(void);
-void nilfs_destroy_segbuf_cache(void);
struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
void nilfs_segbuf_free(struct nilfs_segment_buffer *);
void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index dca1423..01e20db 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -221,8 +221,6 @@ enum {
extern struct kmem_cache *nilfs_transaction_cachep;
/* segment.c */
-extern int nilfs_init_transaction_cache(void);
-extern void nilfs_destroy_transaction_cache(void);
extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 03b34b7..414ef68 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1130,13 +1130,13 @@ static void nilfs_segbuf_init_once(void *obj)
static void nilfs_destroy_cachep(void)
{
- if (nilfs_inode_cachep)
+ if (nilfs_inode_cachep)
kmem_cache_destroy(nilfs_inode_cachep);
- if (nilfs_transaction_cachep)
+ if (nilfs_transaction_cachep)
kmem_cache_destroy(nilfs_transaction_cachep);
- if (nilfs_segbuf_cachep)
+ if (nilfs_segbuf_cachep)
kmem_cache_destroy(nilfs_segbuf_cachep);
- if (nilfs_btree_path_cache)
+ if (nilfs_btree_path_cache)
kmem_cache_destroy(nilfs_btree_path_cache);
}
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index fe44d3f..0f48e7c 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1527,10 +1527,9 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
* this problem for now. We do write the $BITMAP attribute if it is present
* which is the important one for a directory so things are not too bad.
*/
-static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int ntfs_dir_fsync(struct file *filp, int datasync)
{
- struct inode *bmp_vi, *vi = dentry->d_inode;
+ struct inode *bmp_vi, *vi = filp->f_mapping->host;
int err, ret;
ntfs_attr na;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index a1924a0..113ebd9 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2133,7 +2133,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
/**
* ntfs_file_fsync - sync a file to disk
* @filp: file to be synced
- * @dentry: dentry describing the file to sync
* @datasync: if non-zero only flush user data and not metadata
*
* Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync
@@ -2149,19 +2148,15 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
* Also, if @datasync is true, we do not wait on the inode to be written out
* but we always wait on the page cache pages to be written out.
*
- * Note: In the past @filp could be NULL so we ignore it as we don't need it
- * anyway.
- *
* Locking: Caller must hold i_mutex on the inode.
*
* TODO: We should probably also write all attribute/index inodes associated
* with this inode but since we have no simple way of getting to them we ignore
* this problem for now.
*/
-static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int ntfs_file_fsync(struct file *filp, int datasync)
{
- struct inode *vi = dentry->d_inode;
+ struct inode *vi = filp->f_mapping->host;
int err, ret = 0;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 97e54b9..6a13ea6 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -175,13 +175,12 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file)
return 0;
}
-static int ocfs2_sync_file(struct file *file,
- struct dentry *dentry,
- int datasync)
+static int ocfs2_sync_file(struct file *file, int datasync)
{
int err = 0;
journal_t *journal;
- struct inode *inode = dentry->d_inode;
+ struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
@@ -1053,7 +1052,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
/*
- * This will intentionally not wind up calling vmtruncate(),
+ * This will intentionally not wind up calling simple_setsize(),
* since all the work for a size change has been done above.
* Otherwise, we could get into problems with truncate as
* ip_alloc_sem is used there to protect against i_size
@@ -2119,9 +2118,13 @@ relock:
* direct write may have instantiated a few
* blocks outside i_size. Trim these off again.
* Don't need i_size_read because we hold i_mutex.
+ *
+ * XXX(hch): this looks buggy because ocfs2 did not
+ * actually implement ->truncate. Take a look at
+ * the new truncate sequence and update this accordingly
*/
if (*ppos + count > inode->i_size)
- vmtruncate(inode, inode->i_size);
+ simple_setsize(inode, inode->i_size);
ret = written;
goto out_dio;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2c26ce2..0eaa929 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -879,13 +879,15 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
continue;
if (unsuspend)
- status = vfs_quota_enable(
- sb_dqopt(sb)->files[type],
- type, QFMT_OCFS2,
- DQUOT_SUSPENDED);
- else
- status = vfs_quota_disable(sb, type,
- DQUOT_SUSPENDED);
+ status = dquot_resume(sb, type);
+ else {
+ struct ocfs2_mem_dqinfo *oinfo;
+
+ /* Cancel periodic syncing before suspending */
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ status = dquot_suspend(sb, type);
+ }
if (status < 0)
break;
}
@@ -916,8 +918,8 @@ static int ocfs2_enable_quotas(struct ocfs2_super *osb)
status = -ENOENT;
goto out_quota_off;
}
- status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
- DQUOT_USAGE_ENABLED);
+ status = dquot_enable(inode[type], type, QFMT_OCFS2,
+ DQUOT_USAGE_ENABLED);
if (status < 0)
goto out_quota_off;
}
@@ -952,8 +954,8 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
* quota files */
- vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
- DQUOT_LIMITS_ENABLED);
+ dquot_disable(sb, type, DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
if (!inode)
continue;
iput(inode);
@@ -962,7 +964,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
/* Handle quota on quotactl */
static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount)
+ char *path)
{
unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
@@ -970,30 +972,24 @@ static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
return -EINVAL;
- if (remount)
- return 0; /* Just ignore it has been handled in
- * ocfs2_remount() */
- return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
- format_id, DQUOT_LIMITS_ENABLED);
+ return dquot_enable(sb_dqopt(sb)->files[type], type,
+ format_id, DQUOT_LIMITS_ENABLED);
}
/* Handle quota off quotactl */
-static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
+static int ocfs2_quota_off(struct super_block *sb, int type)
{
- if (remount)
- return 0; /* Ignore now and handle later in
- * ocfs2_remount() */
- return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
+ return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
}
static const struct quotactl_ops ocfs2_quotactl_ops = {
.quota_on = ocfs2_quota_on,
.quota_off = ocfs2_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk,
};
static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 399487c..6e7a329 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -329,7 +329,7 @@ const struct file_operations omfs_file_operations = {
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/pipe.c b/fs/pipe.c
index d79872e..279eef9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,9 +26,14 @@
/*
* The max size that a non-root user is allowed to grow the pipe. Can
- * be set by root in /proc/sys/fs/pipe-max-pages
+ * be set by root in /proc/sys/fs/pipe-max-size
*/
-unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16;
+unsigned int pipe_max_size = 1048576;
+
+/*
+ * Minimum pipe size, as required by POSIX
+ */
+unsigned int pipe_min_size = PAGE_SIZE;
/*
* We use a start+len construction, which provides full use of the
@@ -230,6 +235,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
return kmap(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_map);
/**
* generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
@@ -249,6 +255,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
} else
kunmap(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_unmap);
/**
* generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
@@ -279,6 +286,7 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
return 1;
}
+EXPORT_SYMBOL(generic_pipe_buf_steal);
/**
* generic_pipe_buf_get - get a reference to a &struct pipe_buffer
@@ -294,6 +302,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
page_cache_get(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_get);
/**
* generic_pipe_buf_confirm - verify contents of the pipe buffer
@@ -309,6 +318,7 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info,
{
return 0;
}
+EXPORT_SYMBOL(generic_pipe_buf_confirm);
/**
* generic_pipe_buf_release - put a reference to a &struct pipe_buffer
@@ -323,6 +333,7 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe,
{
page_cache_release(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_release);
static const struct pipe_buf_operations anon_pipe_buf_ops = {
.can_merge = 1,
@@ -1112,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
* Allocate a new array of pipe buffers and copy the info over. Returns the
* pipe size if successful, or return -ERROR on error.
*/
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
{
struct pipe_buffer *bufs;
/*
- * Must be a power-of-2 currently
- */
- if (!is_power_of_2(arg))
- return -EINVAL;
-
- /*
* We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
* expect a lot of shrink+grow operations, just free and allocate
* again like we would do for growing. If the pipe currently
* contains more buffers than arg, then return busy.
*/
- if (arg < pipe->nrbufs)
+ if (nr_pages < pipe->nrbufs)
return -EBUSY;
- bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
+ bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
if (unlikely(!bufs))
return -ENOMEM;
@@ -1140,20 +1145,56 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
* and adjust the indexes.
*/
if (pipe->nrbufs) {
- const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
- const unsigned int head = pipe->nrbufs - tail;
+ unsigned int tail;
+ unsigned int head;
+ tail = pipe->curbuf + pipe->nrbufs;
+ if (tail < pipe->buffers)
+ tail = 0;
+ else
+ tail &= (pipe->buffers - 1);
+
+ head = pipe->nrbufs - tail;
if (head)
memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
if (tail)
- memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
+ memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
}
pipe->curbuf = 0;
kfree(pipe->bufs);
pipe->bufs = bufs;
- pipe->buffers = arg;
- return arg;
+ pipe->buffers = nr_pages;
+ return nr_pages * PAGE_SIZE;
+}
+
+/*
+ * Currently we rely on the pipe array holding a power-of-2 number
+ * of pages.
+ */
+static inline unsigned int round_pipe_size(unsigned int size)
+{
+ unsigned long nr_pages;
+
+ nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+}
+
+/*
+ * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
+ * will return an error.
+ */
+int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
+ if (ret < 0 || !write)
+ return ret;
+
+ pipe_max_size = round_pipe_size(pipe_max_size);
+ return ret;
}
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1168,25 +1209,32 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
mutex_lock(&pipe->inode->i_mutex);
switch (cmd) {
- case F_SETPIPE_SZ:
- if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages)
- return -EINVAL;
- /*
- * The pipe needs to be at least 2 pages large to
- * guarantee POSIX behaviour.
- */
- if (arg < 2)
- return -EINVAL;
- ret = pipe_set_size(pipe, arg);
+ case F_SETPIPE_SZ: {
+ unsigned int size, nr_pages;
+
+ size = round_pipe_size(arg);
+ nr_pages = size >> PAGE_SHIFT;
+
+ ret = -EINVAL;
+ if (!nr_pages)
+ goto out;
+
+ if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
+ ret = -EPERM;
+ goto out;
+ }
+ ret = pipe_set_size(pipe, nr_pages);
break;
+ }
case F_GETPIPE_SZ:
- ret = pipe->buffers;
+ ret = pipe->buffers * PAGE_SIZE;
break;
default:
ret = -EINVAL;
break;
}
+out:
mutex_unlock(&pipe->inode->i_mutex);
return ret;
}
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 3d3fd46..6e8fc62 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -80,7 +80,7 @@ const struct file_operations qnx4_dir_operations =
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = qnx4_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
const struct inode_operations qnx4_dir_inode_operations =
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1ad8bf0..12c233d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -228,10 +228,6 @@ static struct hlist_head *dquot_hash;
struct dqstats dqstats;
EXPORT_SYMBOL(dqstats);
-#ifdef CONFIG_SMP
-struct dqstats *dqstats_pcpu;
-EXPORT_SYMBOL(dqstats_pcpu);
-#endif
static qsize_t inode_get_rsv_space(struct inode *inode);
static void __dquot_initialize(struct inode *inode, int type);
@@ -584,7 +580,7 @@ out:
}
EXPORT_SYMBOL(dquot_scan_active);
-int vfs_quota_sync(struct super_block *sb, int type, int wait)
+int dquot_quota_sync(struct super_block *sb, int type, int wait)
{
struct list_head *dirty;
struct dquot *dquot;
@@ -656,7 +652,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
return 0;
}
-EXPORT_SYMBOL(vfs_quota_sync);
+EXPORT_SYMBOL(dquot_quota_sync);
/* Free unused dquots from cache */
static void prune_dqcache(int count)
@@ -676,27 +672,10 @@ static void prune_dqcache(int count)
}
}
-static int dqstats_read(unsigned int type)
-{
- int count = 0;
-#ifdef CONFIG_SMP
- int cpu;
- for_each_possible_cpu(cpu)
- count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
- /* Statistics reading is racy, but absolute accuracy isn't required */
- if (count < 0)
- count = 0;
-#else
- count = dqstats.stat[type];
-#endif
- return count;
-}
-
/*
* This is called from kswapd when we think we need some
* more memory
*/
-
static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
{
if (nr) {
@@ -704,7 +683,9 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
prune_dqcache(nr);
spin_unlock(&dq_list_lock);
}
- return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
+ return ((unsigned)
+ percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])
+ /100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dqcache_shrinker = {
@@ -1815,7 +1796,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
- transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA);
+ transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA);
ret = __dquot_transfer(inode, transfer_to);
dqput_all(transfer_to);
@@ -1850,6 +1831,7 @@ const struct dquot_operations dquot_operations = {
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
+EXPORT_SYMBOL(dquot_operations);
/*
* Generic helper for ->open on filesystems supporting disk quotas.
@@ -1868,7 +1850,7 @@ EXPORT_SYMBOL(dquot_file_open);
/*
* Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
*/
-int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
+int dquot_disable(struct super_block *sb, int type, unsigned int flags)
{
int cnt, ret = 0;
struct quota_info *dqopt = sb_dqopt(sb);
@@ -1998,14 +1980,15 @@ put_inodes:
}
return ret;
}
-EXPORT_SYMBOL(vfs_quota_disable);
+EXPORT_SYMBOL(dquot_disable);
-int vfs_quota_off(struct super_block *sb, int type, int remount)
+int dquot_quota_off(struct super_block *sb, int type)
{
- return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
- (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
+ return dquot_disable(sb, type,
+ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
}
-EXPORT_SYMBOL(vfs_quota_off);
+EXPORT_SYMBOL(dquot_quota_off);
+
/*
* Turn quotas on on a device
*/
@@ -2123,36 +2106,43 @@ out_fmt:
}
/* Reenable quotas on remount RW */
-static int vfs_quota_on_remount(struct super_block *sb, int type)
+int dquot_resume(struct super_block *sb, int type)
{
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *inode;
- int ret;
+ int ret = 0, cnt;
unsigned int flags;
- mutex_lock(&dqopt->dqonoff_mutex);
- if (!sb_has_quota_suspended(sb, type)) {
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && cnt != type)
+ continue;
+
+ mutex_lock(&dqopt->dqonoff_mutex);
+ if (!sb_has_quota_suspended(sb, cnt)) {
+ mutex_unlock(&dqopt->dqonoff_mutex);
+ continue;
+ }
+ inode = dqopt->files[cnt];
+ dqopt->files[cnt] = NULL;
+ spin_lock(&dq_state_lock);
+ flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED,
+ cnt);
+ dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt);
+ spin_unlock(&dq_state_lock);
mutex_unlock(&dqopt->dqonoff_mutex);
- return 0;
- }
- inode = dqopt->files[type];
- dqopt->files[type] = NULL;
- spin_lock(&dq_state_lock);
- flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
- DQUOT_LIMITS_ENABLED, type);
- dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
- spin_unlock(&dq_state_lock);
- mutex_unlock(&dqopt->dqonoff_mutex);
- flags = dquot_generic_flag(flags, type);
- ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id,
- flags);
- iput(inode);
+ flags = dquot_generic_flag(flags, cnt);
+ ret = vfs_load_quota_inode(inode, cnt,
+ dqopt->info[cnt].dqi_fmt_id, flags);
+ iput(inode);
+ }
return ret;
}
+EXPORT_SYMBOL(dquot_resume);
-int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+int dquot_quota_on_path(struct super_block *sb, int type, int format_id,
struct path *path)
{
int error = security_quota_on(path->dentry);
@@ -2167,40 +2157,36 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
DQUOT_LIMITS_ENABLED);
return error;
}
-EXPORT_SYMBOL(vfs_quota_on_path);
+EXPORT_SYMBOL(dquot_quota_on_path);
-int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
- int remount)
+int dquot_quota_on(struct super_block *sb, int type, int format_id, char *name)
{
struct path path;
int error;
- if (remount)
- return vfs_quota_on_remount(sb, type);
-
error = kern_path(name, LOOKUP_FOLLOW, &path);
if (!error) {
- error = vfs_quota_on_path(sb, type, format_id, &path);
+ error = dquot_quota_on_path(sb, type, format_id, &path);
path_put(&path);
}
return error;
}
-EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(dquot_quota_on);
/*
* More powerful function for turning on quotas allowing setting
* of individual quota flags
*/
-int vfs_quota_enable(struct inode *inode, int type, int format_id,
- unsigned int flags)
+int dquot_enable(struct inode *inode, int type, int format_id,
+ unsigned int flags)
{
int ret = 0;
struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
/* Just unsuspend quotas? */
- if (flags & DQUOT_SUSPENDED)
- return vfs_quota_on_remount(sb, type);
+ BUG_ON(flags & DQUOT_SUSPENDED);
+
if (!flags)
return 0;
/* Just updating flags needed? */
@@ -2232,13 +2218,13 @@ out_lock:
load_quota:
return vfs_load_quota_inode(inode, type, format_id, flags);
}
-EXPORT_SYMBOL(vfs_quota_enable);
+EXPORT_SYMBOL(dquot_enable);
/*
* This function is used when filesystem needs to initialize quotas
* during mount time.
*/
-int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
int format_id, int type)
{
struct dentry *dentry;
@@ -2264,24 +2250,7 @@ out:
dput(dentry);
return error;
}
-EXPORT_SYMBOL(vfs_quota_on_mount);
-
-/* Wrapper to turn on quotas when remounting rw */
-int vfs_dq_quota_on_remount(struct super_block *sb)
-{
- int cnt;
- int ret = 0, err;
-
- if (!sb->s_qcop || !sb->s_qcop->quota_on)
- return -ENOSYS;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
- if (err < 0 && !ret)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfs_dq_quota_on_remount);
+EXPORT_SYMBOL(dquot_quota_on_mount);
static inline qsize_t qbtos(qsize_t blocks)
{
@@ -2316,8 +2285,8 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
spin_unlock(&dq_data_lock);
}
-int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
- struct fs_disk_quota *di)
+int dquot_get_dqblk(struct super_block *sb, int type, qid_t id,
+ struct fs_disk_quota *di)
{
struct dquot *dquot;
@@ -2329,7 +2298,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
return 0;
}
-EXPORT_SYMBOL(vfs_get_dqblk);
+EXPORT_SYMBOL(dquot_get_dqblk);
#define VFS_FS_DQ_MASK \
(FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
@@ -2428,7 +2397,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
return 0;
}
-int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
+int dquot_set_dqblk(struct super_block *sb, int type, qid_t id,
struct fs_disk_quota *di)
{
struct dquot *dquot;
@@ -2444,10 +2413,10 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
out:
return rc;
}
-EXPORT_SYMBOL(vfs_set_dqblk);
+EXPORT_SYMBOL(dquot_set_dqblk);
/* Generic routine for getting common part of quota file information */
-int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
{
struct mem_dqinfo *mi;
@@ -2466,10 +2435,10 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return 0;
}
-EXPORT_SYMBOL(vfs_get_dqinfo);
+EXPORT_SYMBOL(dquot_get_dqinfo);
/* Generic routine for setting common part of quota file information */
-int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
{
struct mem_dqinfo *mi;
int err = 0;
@@ -2496,27 +2465,27 @@ out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return err;
}
-EXPORT_SYMBOL(vfs_set_dqinfo);
+EXPORT_SYMBOL(dquot_set_dqinfo);
-const struct quotactl_ops vfs_quotactl_ops = {
- .quota_on = vfs_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk
+const struct quotactl_ops dquot_quotactl_ops = {
+ .quota_on = dquot_quota_on,
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
};
-
+EXPORT_SYMBOL(dquot_quotactl_ops);
static int do_proc_dqstats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
-#ifdef CONFIG_SMP
- /* Update global table */
unsigned int type = (int *)table->data - dqstats.stat;
- dqstats.stat[type] = dqstats_read(type);
-#endif
+
+ /* Update global table */
+ dqstats.stat[type] =
+ percpu_counter_sum_positive(&dqstats.counter[type]);
return proc_dointvec(table, write, buffer, lenp, ppos);
}
@@ -2609,7 +2578,7 @@ static ctl_table sys_table[] = {
static int __init dquot_init(void)
{
- int i;
+ int i, ret;
unsigned long nr_hash, order;
printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__);
@@ -2627,12 +2596,11 @@ static int __init dquot_init(void)
if (!dquot_hash)
panic("Cannot create dquot hash table");
-#ifdef CONFIG_SMP
- dqstats_pcpu = alloc_percpu(struct dqstats);
- if (!dqstats_pcpu)
- panic("Cannot create dquot stats table");
-#endif
- memset(&dqstats, 0, sizeof(struct dqstats));
+ for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
+ ret = percpu_counter_init(&dqstats.counter[i], 0);
+ if (ret)
+ panic("Cannot create dquot stat counters");
+ }
/* Find power-of-two hlist_heads which can fit into allocation */
nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index ce3dfd0..b299961 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -73,7 +73,7 @@ static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
if (IS_ERR(pathname))
return PTR_ERR(pathname);
if (sb->s_qcop->quota_on)
- ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0);
+ ret = sb->s_qcop->quota_on(sb, type, id, pathname);
putname(pathname);
return ret;
}
@@ -260,7 +260,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
case Q_QUOTAOFF:
if (!sb->s_qcop->quota_off)
return -ENOSYS;
- return sb->s_qcop->quota_off(sb, type, 0);
+ return sb->s_qcop->quota_off(sb, type);
case Q_GETFMT:
return quota_getfmt(sb, type, addr);
case Q_GETINFO:
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 78f613c..4884ac5 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -43,12 +43,13 @@ const struct file_operations ramfs_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
};
const struct inode_operations ramfs_file_inode_operations = {
+ .setattr = simple_setattr,
.getattr = simple_getattr,
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5ea4ad8..d532c20 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
@@ -146,7 +146,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
return ret;
}
- ret = vmtruncate(inode, newsize);
+ ret = simple_setsize(inode, newsize);
return ret;
}
@@ -169,7 +169,8 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
/* pick out size-changing events */
if (ia->ia_valid & ATTR_SIZE) {
- loff_t size = i_size_read(inode);
+ loff_t size = inode->i_size;
+
if (ia->ia_size != size) {
ret = ramfs_nommu_resize(inode, ia->ia_size, size);
if (ret < 0 || ia->ia_valid == ATTR_SIZE)
@@ -182,7 +183,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
}
}
- ret = inode_setattr(inode, ia);
+ generic_setattr(inode, ia);
out:
ia->ia_valid = old_ia_valid;
return ret;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 4455fbe..198dabf 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -14,8 +14,7 @@
extern const struct reiserfs_key MIN_KEY;
static int reiserfs_readdir(struct file *, void *, filldir_t);
-static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
- int datasync);
+static int reiserfs_dir_fsync(struct file *filp, int datasync);
const struct file_operations reiserfs_dir_operations = {
.llseek = generic_file_llseek,
@@ -28,10 +27,9 @@ const struct file_operations reiserfs_dir_operations = {
#endif
};
-static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int reiserfs_dir_fsync(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int err;
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9977df9..b82cdd8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -134,10 +134,9 @@ static void reiserfs_vfs_truncate_file(struct inode *inode)
* be removed...
*/
-static int reiserfs_sync_file(struct file *filp,
- struct dentry *dentry, int datasync)
+static int reiserfs_sync_file(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int err;
int barrier_done;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 59125fb..9822fa1 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -158,6 +158,7 @@ static int finish_unfinished(struct super_block *s)
#ifdef CONFIG_QUOTA
int i;
int ms_active_set;
+ int quota_enabled[MAXQUOTAS];
#endif
/* compose key to look for "save" links */
@@ -179,8 +180,15 @@ static int finish_unfinished(struct super_block *s)
}
/* Turn on quotas so that they are updated correctly */
for (i = 0; i < MAXQUOTAS; i++) {
+ quota_enabled[i] = 1;
if (REISERFS_SB(s)->s_qf_names[i]) {
- int ret = reiserfs_quota_on_mount(s, i);
+ int ret;
+
+ if (sb_has_quota_active(s, i)) {
+ quota_enabled[i] = 0;
+ continue;
+ }
+ ret = reiserfs_quota_on_mount(s, i);
if (ret < 0)
reiserfs_warning(s, "reiserfs-2500",
"cannot turn on journaled "
@@ -304,8 +312,8 @@ static int finish_unfinished(struct super_block *s)
#ifdef CONFIG_QUOTA
/* Turn quotas off */
for (i = 0; i < MAXQUOTAS; i++) {
- if (sb_dqopt(s)->files[i])
- vfs_quota_off(s, i, 0);
+ if (sb_dqopt(s)->files[i] && quota_enabled[i])
+ dquot_quota_off(s, i);
}
if (ms_active_set)
/* Restore the flag back */
@@ -466,6 +474,8 @@ static void reiserfs_put_super(struct super_block *s)
struct reiserfs_transaction_handle th;
th.t_trans_id = 0;
+ dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
reiserfs_write_lock(s);
if (s->s_dirt)
@@ -620,7 +630,7 @@ static int reiserfs_acquire_dquot(struct dquot *);
static int reiserfs_release_dquot(struct dquot *);
static int reiserfs_mark_dquot_dirty(struct dquot *);
static int reiserfs_write_info(struct super_block *, int);
-static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
+static int reiserfs_quota_on(struct super_block *, int, int, char *);
static const struct dquot_operations reiserfs_quota_operations = {
.write_dquot = reiserfs_write_dquot,
@@ -634,12 +644,12 @@ static const struct dquot_operations reiserfs_quota_operations = {
static const struct quotactl_ops reiserfs_qctl_operations = {
.quota_on = reiserfs_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk,
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk,
};
#endif
@@ -1242,6 +1252,11 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (s->s_flags & MS_RDONLY)
/* it is read-only already */
goto out_ok;
+
+ err = dquot_suspend(s, -1);
+ if (err < 0)
+ goto out_err;
+
/* try to remount file system with read-only permissions */
if (sb_umount_state(rs) == REISERFS_VALID_FS
|| REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) {
@@ -1295,6 +1310,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
s->s_dirt = 0;
if (!(*mount_flags & MS_RDONLY)) {
+ dquot_resume(s, -1);
finish_unfinished(s);
reiserfs_xattr_init(s, *mount_flags);
}
@@ -2022,15 +2038,15 @@ static int reiserfs_write_info(struct super_block *sb, int type)
*/
static int reiserfs_quota_on_mount(struct super_block *sb, int type)
{
- return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
- REISERFS_SB(sb)->s_jquota_fmt, type);
+ return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
+ REISERFS_SB(sb)->s_jquota_fmt, type);
}
/*
* Standard function to be called on quota_on
*/
static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
- char *name, int remount)
+ char *name)
{
int err;
struct path path;
@@ -2039,9 +2055,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
return -EINVAL;
- /* No more checks needed? Path and format_id are bogus anyway... */
- if (remount)
- return vfs_quota_on(sb, type, format_id, name, 1);
+
err = kern_path(name, LOOKUP_FOLLOW, &path);
if (err)
return err;
@@ -2085,7 +2099,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (err)
goto out;
}
- err = vfs_quota_on_path(sb, type, format_id, &path);
+ err = dquot_quota_on_path(sb, type, format_id, &path);
out:
path_put(&path);
return err;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 84ecf0e..8e187a0 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -28,8 +28,9 @@
#include "proto.h"
static int
-smb_fsync(struct file *file, struct dentry * dentry, int datasync)
+smb_fsync(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct smb_sb_info *server = server_from_dentry(dentry);
int result;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index dfa1d67..9551cb6 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -714,7 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
error = server->ops->truncate(inode, attr->ia_size);
if (error)
goto out;
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
if (error)
goto out;
refresh = 1;
diff --git a/fs/splice.c b/fs/splice.c
index ac22b00..740e6b9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
break;
error = add_to_page_cache_lru(page, mapping, index,
- mapping_gfp_mask(mapping));
+ GFP_KERNEL);
if (unlikely(error)) {
page_cache_release(page);
if (error == -EEXIST)
diff --git a/fs/super.c b/fs/super.c
index 69688b1..5c35bc7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -24,7 +24,6 @@
#include <linux/slab.h>
#include <linux/acct.h>
#include <linux/blkdev.h>
-#include <linux/quotaops.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h> /* for the emergency remount stuff */
@@ -94,8 +93,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
init_rwsem(&s->s_dquot.dqptr_sem);
init_waitqueue_head(&s->s_wait_unfrozen);
s->s_maxbytes = MAX_NON_LFS;
- s->dq_op = sb_dquot_ops;
- s->s_qcop = sb_quotactl_ops;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
}
@@ -160,7 +157,6 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- vfs_dq_off(s, 0);
fs->kill_sb(s);
put_filesystem(fs);
put_super(s);
@@ -524,7 +520,7 @@ rescan:
int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
{
int retval;
- int remount_rw, remount_ro;
+ int remount_ro;
if (sb->s_frozen != SB_UNFROZEN)
return -EBUSY;
@@ -540,7 +536,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
sync_filesystem(sb);
remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
- remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
/* If we are remounting RDONLY and current sb is read/write,
make sure there are no rw files opened */
@@ -549,9 +544,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
mark_files_ro(sb);
else if (!fs_may_remount_ro(sb))
return -EBUSY;
- retval = vfs_dq_off(sb, 1);
- if (retval < 0 && retval != -ENOSYS)
- return -EBUSY;
}
if (sb->s_op->remount_fs) {
@@ -560,8 +552,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
return retval;
}
sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
- if (remount_rw)
- vfs_dq_quota_on_remount(sb);
+
/*
* Some filesystems modify their metadata via some other path than the
* bdev buffer cache (eg. use a private mapping, or directories in
@@ -946,8 +937,8 @@ out:
EXPORT_SYMBOL_GPL(vfs_kern_mount);
/**
- * freeze_super -- lock the filesystem and force it into a consistent state
- * @super: the super to lock
+ * freeze_super - lock the filesystem and force it into a consistent state
+ * @sb: the super to lock
*
* Syncs the super to make sure the filesystem is consistent and calls the fs's
* freeze_fs. Subsequent calls to this without first thawing the fs will return
diff --git a/fs/sync.c b/fs/sync.c
index e8cbd41..15aa6f0 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
if (wait)
sync_inodes_sb(sb);
else
- writeback_inodes_sb_locked(sb);
+ writeback_inodes_sb(sb);
if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait);
@@ -130,12 +130,10 @@ void emergency_sync(void)
/*
* Generic function to fsync a file.
- *
- * filp may be NULL if called via the msync of a vma.
*/
-int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int file_fsync(struct file *filp, int datasync)
{
- struct inode * inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
struct super_block * sb;
int ret, err;
@@ -183,7 +181,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
* livelocks in fsync_buffers_list().
*/
mutex_lock(&mapping->host->i_mutex);
- err = file->f_op->fsync(file, file->f_path.dentry, datasync);
+ err = file->f_op->fsync(file, datasync);
if (!ret)
ret = err;
mutex_unlock(&mapping->host->i_mutex);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bbd77e9..0835a3b 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -117,13 +117,13 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
goto out;
- iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
-
- error = inode_setattr(inode, iattr);
+ error = sysfs_sd_setattr(sd, iattr);
if (error)
goto out;
- error = sysfs_sd_setattr(sd, iattr);
+ /* this ignores size changes */
+ generic_setattr(inode, iattr);
+
out:
mutex_unlock(&sysfs_mutex);
return error;
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 1dabed2..79941e4 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -24,7 +24,7 @@ const struct file_operations sysv_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = sysv_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
static inline void dir_put_page(struct page *page)
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 96340c0..750cc22 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 4573734..d4a5380 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -43,6 +43,7 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
* then attach current time stamp.
* But if the filesystem was marked clean, keep it clean.
*/
+ sb->s_dirt = 0;
old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
if (sbi->s_type == FSTYPE_SYSV4) {
if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5692cf7..12f445c 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -967,12 +967,15 @@ static int do_writepage(struct page *page, int len)
* the page locked, and it locks @ui_mutex. However, write-back does take inode
* @i_mutex, which means other VFS operations may be run on this inode at the
* same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'vmtruncate()', which first changes @inode->i_size, then
+ * we have to call 'simple_setsize()', which first changes @inode->i_size, then
* drops the truncated pages. And while dropping the pages, it takes the page
- * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with
+ * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with
* @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
* means that @inode->i_size is changed while @ui_mutex is unlocked.
*
+ * XXX: with the new truncate the above is not true anymore, the simple_setsize
+ * calls can be replaced with the individual components.
+ *
* But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
* inode size. How do we do this if @inode->i_size may became smaller while we
* are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
@@ -1125,7 +1128,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
budgeted = 0;
}
- err = vmtruncate(inode, new_size);
+ err = simple_setsize(inode, new_size);
if (err)
goto out_budg;
@@ -1214,7 +1217,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (attr->ia_valid & ATTR_SIZE) {
dbg_gen("size %lld -> %lld", inode->i_size, new_size);
- err = vmtruncate(inode, new_size);
+ err = simple_setsize(inode, new_size);
if (err)
goto out;
}
@@ -1223,7 +1226,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (attr->ia_valid & ATTR_SIZE) {
/* Truncation changes inode [mc]time */
inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
- /* 'vmtruncate()' changed @i_size, update @ui_size */
+ /* 'simple_setsize()' changed @i_size, update @ui_size */
ui->ui_size = inode->i_size;
}
@@ -1304,9 +1307,9 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ubifs_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
int err;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index bd2542d..2eef553 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb {
* The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
* @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
* make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock
+ * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock
* with 'ubifs_writepage()' (see file.c). All the other inode fields are
* changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* could consider to rework locking and base it on "shadow" fields.
@@ -1678,7 +1678,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
+int ubifs_fsync(struct file *file, int datasync);
int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
/* dir.c */
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 9a9378b..b608efa 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -21,7 +21,6 @@
#include "udfdecl.h"
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bitops.h>
@@ -159,8 +158,6 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
udf_debug("byte=%2x\n",
((char *)bh->b_data)[(bit + i) >> 3]);
} else {
- if (inode)
- dquot_free_block(inode, 1);
udf_add_free_space(sb, sbi->s_partition, 1);
}
}
@@ -210,15 +207,8 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
bit = block % (sb->s_blocksize << 3);
while (bit < (sb->s_blocksize << 3) && block_count > 0) {
- if (!udf_test_bit(bit, bh->b_data))
+ if (!udf_clear_bit(bit, bh->b_data))
goto out;
- else if (dquot_prealloc_block(inode, 1))
- goto out;
- else if (!udf_clear_bit(bit, bh->b_data)) {
- udf_debug("bit already cleared for block %d\n", bit);
- dquot_free_block(inode, 1);
- goto out;
- }
block_count--;
alloc_count++;
bit++;
@@ -338,20 +328,6 @@ search_back:
}
got_block:
-
- /*
- * Check quota for allocation of this block.
- */
- if (inode) {
- int ret = dquot_alloc_block(inode, 1);
-
- if (ret) {
- mutex_unlock(&sbi->s_alloc_mutex);
- *err = ret;
- return 0;
- }
- }
-
newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) -
(sizeof(struct spaceBitmapDesc) << 3);
@@ -401,10 +377,6 @@ static void udf_table_free_blocks(struct super_block *sb,
}
iinfo = UDF_I(table);
- /* We do this up front - There are some error conditions that
- could occure, but.. oh well */
- if (inode)
- dquot_free_block(inode, count);
udf_add_free_space(sb, sbi->s_partition, count);
start = bloc->logicalBlockNum + offset;
@@ -649,10 +621,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
epos.offset -= adsize;
alloc_count = (elen >> sb->s_blocksize_bits);
- if (inode && dquot_prealloc_block(inode,
- alloc_count > block_count ? block_count : alloc_count))
- alloc_count = 0;
- else if (alloc_count > block_count) {
+ if (alloc_count > block_count) {
alloc_count = block_count;
eloc.logicalBlockNum += alloc_count;
elen -= (alloc_count << sb->s_blocksize_bits);
@@ -752,14 +721,6 @@ static int udf_table_new_block(struct super_block *sb,
newblock = goal_eloc.logicalBlockNum;
goal_eloc.logicalBlockNum++;
goal_elen -= sb->s_blocksize;
- if (inode) {
- *err = dquot_alloc_block(inode, 1);
- if (*err) {
- brelse(goal_epos.bh);
- mutex_unlock(&sbi->s_alloc_mutex);
- return 0;
- }
- }
if (goal_elen)
udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 1660c81..51552bf 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -211,5 +211,5 @@ const struct file_operations udf_dir_operations = {
.read = generic_read_dir,
.readdir = udf_readdir,
.unlocked_ioctl = udf_ioctl,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
diff --git a/fs/udf/file.c b/fs/udf/file.c
index baae3a7..94e06d6 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,7 +34,6 @@
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/pagemap.h>
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/aio.h>
#include <linux/smp_lock.h>
@@ -219,39 +218,16 @@ const struct file_operations udf_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.unlocked_ioctl = udf_ioctl,
- .open = dquot_file_open,
+ .open = generic_file_open,
.mmap = generic_file_mmap,
.write = do_sync_write,
.aio_write = udf_file_aio_write,
.release = udf_release_file,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
.llseek = generic_file_llseek,
};
-int udf_setattr(struct dentry *dentry, struct iattr *iattr)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- error = inode_change_ok(inode, iattr);
- if (error)
- return error;
-
- if (is_quota_modification(inode, iattr))
- dquot_initialize(inode);
-
- if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
- (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
- error = dquot_transfer(inode, iattr);
- if (error)
- return error;
- }
-
- return inode_setattr(inode, iattr);
-}
-
const struct inode_operations udf_file_inode_operations = {
.truncate = udf_truncate,
- .setattr = udf_setattr,
};
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 2b5586c..18cd711 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -20,7 +20,6 @@
#include "udfdecl.h"
#include <linux/fs.h>
-#include <linux/quotaops.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -32,13 +31,6 @@ void udf_free_inode(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct udf_sb_info *sbi = UDF_SB(sb);
- /*
- * Note: we must free any quota before locking the superblock,
- * as writing the quota to disk may need the lock as well.
- */
- dquot_free_inode(inode);
- dquot_drop(inode);
-
clear_inode(inode);
mutex_lock(&sbi->s_alloc_mutex);
@@ -61,7 +53,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
struct super_block *sb = dir->i_sb;
struct udf_sb_info *sbi = UDF_SB(sb);
struct inode *inode;
- int block, ret;
+ int block;
uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
struct udf_inode_info *iinfo;
struct udf_inode_info *dinfo = UDF_I(dir);
@@ -146,17 +138,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
insert_inode_hash(inode);
mark_inode_dirty(inode);
- dquot_initialize(inode);
- ret = dquot_alloc_inode(inode);
- if (ret) {
- dquot_drop(inode);
- inode->i_flags |= S_NOQUOTA;
- inode->i_nlink = 0;
- iput(inode);
- *err = ret;
- return NULL;
- }
-
*err = 0;
return inode;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8a3fbd1..124852b 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -36,7 +36,6 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
-#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/crc-itu-t.h>
@@ -71,9 +70,6 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
void udf_delete_inode(struct inode *inode)
{
- if (!is_bad_inode(inode))
- dquot_initialize(inode);
-
truncate_inode_pages(&inode->i_data, 0);
if (is_bad_inode(inode))
@@ -113,7 +109,6 @@ void udf_clear_inode(struct inode *inode)
(unsigned long long)iinfo->i_lenExtents);
}
- dquot_drop(inode);
kfree(iinfo->i_ext.i_data);
iinfo->i_ext.i_data = NULL;
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 585f733..bf5fc67 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/quotaops.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/sched.h>
@@ -563,8 +562,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
int err;
struct udf_inode_info *iinfo;
- dquot_initialize(dir);
-
lock_kernel();
inode = udf_new_inode(dir, mode, &err);
if (!inode) {
@@ -617,8 +614,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
if (!old_valid_dev(rdev))
return -EINVAL;
- dquot_initialize(dir);
-
lock_kernel();
err = -EIO;
inode = udf_new_inode(dir, mode, &err);
@@ -664,8 +659,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *dinfo = UDF_I(dir);
struct udf_inode_info *iinfo;
- dquot_initialize(dir);
-
lock_kernel();
err = -EMLINK;
if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
@@ -800,8 +793,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
struct fileIdentDesc *fi, cfi;
struct kernel_lb_addr tloc;
- dquot_initialize(dir);
-
retval = -ENOENT;
lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -848,8 +839,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
struct fileIdentDesc cfi;
struct kernel_lb_addr tloc;
- dquot_initialize(dir);
-
retval = -ENOENT;
lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -904,8 +893,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
struct buffer_head *bh;
struct udf_inode_info *iinfo;
- dquot_initialize(dir);
-
lock_kernel();
inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
if (!inode)
@@ -1075,8 +1062,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
int err;
struct buffer_head *bh;
- dquot_initialize(dir);
-
lock_kernel();
if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
unlock_kernel();
@@ -1139,9 +1124,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
struct kernel_lb_addr tloc;
struct udf_inode_info *old_iinfo = UDF_I(old_inode);
- dquot_initialize(old_dir);
- dquot_initialize(new_dir);
-
lock_kernel();
ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
if (ofi) {
@@ -1387,7 +1369,6 @@ const struct export_operations udf_export_ops = {
const struct inode_operations udf_dir_inode_operations = {
.lookup = udf_lookup,
.create = udf_create,
- .setattr = udf_setattr,
.link = udf_link,
.unlink = udf_unlink,
.symlink = udf_symlink,
@@ -1400,5 +1381,4 @@ const struct inode_operations udf_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
- .setattr = udf_setattr,
};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 1e4543c..612d1e2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -557,6 +557,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
{
struct udf_options uopt;
struct udf_sb_info *sbi = UDF_SB(sb);
+ int error = 0;
uopt.flags = sbi->s_flags;
uopt.uid = sbi->s_uid;
@@ -582,17 +583,17 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
*flags |= MS_RDONLY;
}
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
- unlock_kernel();
- return 0;
- }
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ goto out_unlock;
+
if (*flags & MS_RDONLY)
udf_close_lvid(sb);
else
udf_open_lvid(sb);
+out_unlock:
unlock_kernel();
- return 0;
+ return error;
}
/* Check Volume Structure Descriptors (ECMA 167 2/9.1) */
@@ -1939,7 +1940,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
/* Fill in the rest of the superblock */
sb->s_op = &udf_sb_ops;
sb->s_export_op = &udf_export_ops;
- sb->dq_op = NULL;
+
sb->s_dirt = 0;
sb->s_magic = UDF_SUPER_MAGIC;
sb->s_time_gran = 1000;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 9079ff7..2bac035 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -131,7 +131,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
/* file.c */
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
-extern int udf_setattr(struct dentry *dentry, struct iattr *iattr);
/* inode.c */
extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
extern int udf_sync_inode(struct inode *);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 5cfa4d8..048484f 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -12,7 +12,6 @@
#include <linux/stat.h>
#include <linux/time.h>
#include <linux/string.h>
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/capability.h>
#include <linux/bitops.h>
@@ -85,9 +84,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
"bit already cleared for fragment %u", i);
}
- dquot_free_block(inode, count);
-
-
fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
uspi->cs_total.cs_nffree += count;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -195,7 +191,6 @@ do_more:
ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, 1);
- dquot_free_block(inode, uspi->s_fpb);
fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree++;
@@ -511,7 +506,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned cgno, fragno, fragoff, count, fragsize, i;
- int ret;
UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
(unsigned long long)fragment, oldcount, newcount);
@@ -557,11 +551,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
for (i = oldcount; i < newcount; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
- ret = dquot_alloc_block(inode, count);
- if (ret) {
- *err = ret;
- return 0;
- }
fs32_sub(sb, &ucg->cg_cs.cs_nffree, count);
fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -598,7 +587,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
struct ufs_cylinder_group * ucg;
unsigned oldcg, i, j, k, allocsize;
u64 result;
- int ret;
UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
inode->i_ino, cgno, (unsigned long long)goal, count);
@@ -667,7 +655,6 @@ cg_found:
for (i = count; i < uspi->s_fpb; i++)
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
i = uspi->s_fpb - count;
- dquot_free_block(inode, i);
fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
uspi->cs_total.cs_nffree += i;
@@ -679,11 +666,6 @@ cg_found:
result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
if (result == INVBLOCK)
return 0;
- ret = dquot_alloc_block(inode, count);
- if (ret) {
- *err = ret;
- return 0;
- }
for (i = 0; i < count; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
@@ -718,7 +700,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
struct ufs_super_block_first * usb1;
struct ufs_cylinder_group * ucg;
u64 result, blkno;
- int ret;
UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
@@ -752,11 +733,6 @@ gotit:
ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, -1);
- ret = dquot_alloc_block(inode, uspi->s_fpb);
- if (ret) {
- *err = ret;
- return INVBLOCK;
- }
fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree--;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 317a0d4..ec78475 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -666,6 +666,6 @@ not_empty:
const struct file_operations ufs_dir_operations = {
.read = generic_read_dir,
.readdir = ufs_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.llseek = generic_file_llseek,
};
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index a8962ce..33afa20 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,7 +24,6 @@
*/
#include <linux/fs.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -41,7 +40,7 @@ const struct file_operations ufs_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .open = dquot_file_open,
- .fsync = simple_fsync,
+ .open = generic_file_open,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 3a959d5..594480e 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -27,7 +27,6 @@
#include <linux/time.h>
#include <linux/stat.h>
#include <linux/string.h>
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/sched.h>
#include <linux/bitops.h>
@@ -95,9 +94,6 @@ void ufs_free_inode (struct inode * inode)
is_directory = S_ISDIR(inode->i_mode);
- dquot_free_inode(inode);
- dquot_drop(inode);
-
clear_inode (inode);
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
@@ -347,21 +343,12 @@ cg_found:
unlock_super (sb);
- dquot_initialize(inode);
- err = dquot_alloc_inode(inode);
- if (err) {
- dquot_drop(inode);
- goto fail_without_unlock;
- }
-
UFSD("allocating inode %lu\n", inode->i_ino);
UFSD("EXIT\n");
return inode;
fail_remove_inode:
unlock_super(sb);
-fail_without_unlock:
- inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
UFSD("EXIT (FAILED): err %d\n", err);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index cffa756..73fe773 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -37,7 +37,6 @@
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -910,9 +909,6 @@ void ufs_delete_inode (struct inode * inode)
{
loff_t old_i_size;
- if (!is_bad_inode(inode))
- dquot_initialize(inode);
-
truncate_inode_pages(&inode->i_data, 0);
if (is_bad_inode(inode))
goto no_delete;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index eabc02e..b056f02 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -30,7 +30,6 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/smp_lock.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -86,8 +85,6 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
UFSD("BEGIN\n");
- dquot_initialize(dir);
-
inode = ufs_new_inode(dir, mode);
err = PTR_ERR(inode);
@@ -112,8 +109,6 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
if (!old_valid_dev(rdev))
return -EINVAL;
- dquot_initialize(dir);
-
inode = ufs_new_inode(dir, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
@@ -138,8 +133,6 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
if (l > sb->s_blocksize)
goto out_notlocked;
- dquot_initialize(dir);
-
lock_kernel();
inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
err = PTR_ERR(inode);
@@ -185,8 +178,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
return -EMLINK;
}
- dquot_initialize(dir);
-
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
atomic_inc(&inode->i_count);
@@ -204,8 +195,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
if (dir->i_nlink >= UFS_LINK_MAX)
goto out;
- dquot_initialize(dir);
-
lock_kernel();
inode_inc_link_count(dir);
@@ -250,8 +239,6 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
struct page *page;
int err = -ENOENT;
- dquot_initialize(dir);
-
de = ufs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto out;
@@ -296,9 +283,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ufs_dir_entry *old_de;
int err = -ENOENT;
- dquot_initialize(old_dir);
- dquot_initialize(new_dir);
-
old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_de)
goto out;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ad9bc1e..3ec5a9e 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -77,7 +77,6 @@
#include <linux/errno.h>
#include <linux/fs.h>
-#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/stat.h>
@@ -1047,7 +1046,7 @@ magic_found:
*/
sb->s_op = &ufs_super_ops;
sb->s_export_op = &ufs_export_ops;
- sb->dq_op = NULL; /***/
+
sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic);
uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno);
@@ -1437,126 +1436,19 @@ static void destroy_inodecache(void)
kmem_cache_destroy(ufs_inode_cachep);
}
-static void ufs_clear_inode(struct inode *inode)
-{
- dquot_drop(inode);
-}
-
-#ifdef CONFIG_QUOTA
-static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
-static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
-#endif
-
static const struct super_operations ufs_super_ops = {
.alloc_inode = ufs_alloc_inode,
.destroy_inode = ufs_destroy_inode,
.write_inode = ufs_write_inode,
.delete_inode = ufs_delete_inode,
- .clear_inode = ufs_clear_inode,
.put_super = ufs_put_super,
.write_super = ufs_write_super,
.sync_fs = ufs_sync_fs,
.statfs = ufs_statfs,
.remount_fs = ufs_remount,
.show_options = ufs_show_options,
-#ifdef CONFIG_QUOTA
- .quota_read = ufs_quota_read,
- .quota_write = ufs_quota_write,
-#endif
};
-#ifdef CONFIG_QUOTA
-
-/* Read data from quotafile - avoid pagecache and such because we cannot afford
- * acquiring the locks... As quota files are never truncated and quota code
- * itself serializes the operations (and noone else should touch the files)
- * we don't have to be afraid of races */
-static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data,
- size_t len, loff_t off)
-{
- struct inode *inode = sb_dqopt(sb)->files[type];
- sector_t blk = off >> sb->s_blocksize_bits;
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
- int tocopy;
- size_t toread;
- struct buffer_head *bh;
- loff_t i_size = i_size_read(inode);
-
- if (off > i_size)
- return 0;
- if (off+len > i_size)
- len = i_size-off;
- toread = len;
- while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
-
- bh = ufs_bread(inode, blk, 0, &err);
- if (err)
- return err;
- if (!bh) /* A hole? */
- memset(data, 0, tocopy);
- else {
- memcpy(data, bh->b_data+offset, tocopy);
- brelse(bh);
- }
- offset = 0;
- toread -= tocopy;
- data += tocopy;
- blk++;
- }
- return len;
-}
-
-/* Write to quotafile */
-static ssize_t ufs_quota_write(struct super_block *sb, int type,
- const char *data, size_t len, loff_t off)
-{
- struct inode *inode = sb_dqopt(sb)->files[type];
- sector_t blk = off >> sb->s_blocksize_bits;
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
- int tocopy;
- size_t towrite = len;
- struct buffer_head *bh;
-
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
- while (towrite > 0) {
- tocopy = sb->s_blocksize - offset < towrite ?
- sb->s_blocksize - offset : towrite;
-
- bh = ufs_bread(inode, blk, 1, &err);
- if (!bh)
- goto out;
- lock_buffer(bh);
- memcpy(bh->b_data+offset, data, tocopy);
- flush_dcache_page(bh->b_page);
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- unlock_buffer(bh);
- brelse(bh);
- offset = 0;
- towrite -= tocopy;
- data += tocopy;
- blk++;
- }
-out:
- if (len == towrite) {
- mutex_unlock(&inode->i_mutex);
- return err;
- }
- if (inode->i_size < off+len-towrite)
- i_size_write(inode, off+len-towrite);
- inode->i_version++;
- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
- mutex_unlock(&inode->i_mutex);
- return len - towrite;
-}
-
-#endif
-
static int ufs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index f294c44..589e01a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -44,7 +44,6 @@
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/sched.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -501,12 +500,10 @@ out:
return err;
}
-
/*
- * We don't define our `inode->i_op->truncate', and call it here,
- * because of:
- * - there is no way to know old size
- * - there is no way inform user about error, if it happens in `truncate'
+ * TODO:
+ * - truncate case should use proper ordering instead of using
+ * simple_setsize
*/
int ufs_setattr(struct dentry *dentry, struct iattr *attr)
{
@@ -518,19 +515,10 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
- if (is_quota_modification(inode, attr))
- dquot_initialize(inode);
-
- if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
- (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
- error = dquot_transfer(inode, attr);
- if (error)
- return error;
- }
if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
loff_t old_i_size = inode->i_size;
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
if (error)
return error;
error = ufs_truncate(inode, old_i_size);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 089eaca..34640d6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1333,6 +1333,21 @@ xfs_vm_writepage(
trace_xfs_writepage(inode, page, 0);
/*
+ * Refuse to write the page out if we are called from reclaim context.
+ *
+ * This is primarily to avoid stack overflows when called from deep
+ * used stacks in random callers for direct reclaim, but disabling
+ * reclaim for kswap is a nice side-effect as kswapd causes rather
+ * suboptimal I/O patters, too.
+ *
+ * This should really be done by the core VM, but until that happens
+ * filesystems like XFS, btrfs and ext4 have to take care of this
+ * by themselves.
+ */
+ if (current->flags & PF_MEMALLOC)
+ goto out_fail;
+
+ /*
* We need a transaction if:
* 1. There are delalloc buffers on the page
* 2. The page is uptodate and we have unmapped buffers
@@ -1366,14 +1381,6 @@ xfs_vm_writepage(
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
- /*
- * VM calculation for nr_to_write seems off. Bump it way
- * up, this gets simple streaming writes zippy again.
- * To be reviewed again after Jens' writeback changes.
- */
- wbc->nr_to_write *= 4;
-
/*
* Convert delayed allocate, unwritten or unmapped space
* to real space and flush out to disk.
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index d8fb1b5..257a56b 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -100,10 +100,10 @@ xfs_iozero(
STATIC int
xfs_file_fsync(
struct file *file,
- struct dentry *dentry,
int datasync)
{
- struct xfs_inode *ip = XFS_I(dentry->d_inode);
+ struct inode *inode = file->f_mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
@@ -140,8 +140,8 @@ xfs_file_fsync(
* might gets cleared when the inode gets written out via the AIL
* or xfs_iflush_cluster.
*/
- if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
- ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
+ if (((inode->i_state & I_DIRTY_DATASYNC) ||
+ ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
ip->i_update_core) {
/*
* Kick off a transaction to log the inode core to get the
@@ -868,7 +868,7 @@ write_retry:
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, iolock);
- error2 = -xfs_file_fsync(file, file->f_path.dentry,
+ error2 = -xfs_file_fsync(file,
(file->f_flags & __O_SYNC) ? 0 : 1);
if (!error)
error = error2;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9c8019c..44f0b2d 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -585,11 +585,20 @@ xfs_vn_fallocate(
bf.l_len = len;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ /* check the new inode size is valid before allocating */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ error = inode_newsize_ok(inode, new_size);
+ if (error)
+ goto out_unlock;
+ }
+
error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
0, XFS_ATTR_NOLOCK);
- if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode))
- new_size = offset + len;
+ if (error)
+ goto out_unlock;
/* Change file size if needed */
if (new_size) {
@@ -600,6 +609,7 @@ xfs_vn_fallocate(
error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
}
+out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
out_error:
return error;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 9ac8aea..067cafb 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -23,7 +23,6 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
-#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3884e20..ef7f021 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -164,10 +164,6 @@ xfs_inode_ag_iterator(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, ag);
- if (!pag->pag_ici_init) {
- xfs_perag_put(pag);
- continue;
- }
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
exclusive, &nr);
xfs_perag_put(pag);
@@ -867,12 +863,7 @@ xfs_reclaim_inode_shrink(
down_read(&xfs_mount_list_lock);
list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-
pag = xfs_perag_get(mp, ag);
- if (!pag->pag_ici_init) {
- xfs_perag_put(pag);
- continue;
- }
reclaimable += pag->pag_ici_reclaimable;
xfs_perag_put(pag);
}
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 207fa77..d12be84 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -50,7 +50,6 @@
#include "quota/xfs_dquot_item.h"
#include "quota/xfs_dquot.h"
#include "xfs_log_recover.h"
-#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
/*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index ff6bc79..73d5aa1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
)
)
-#define DEFINE_PERAG_REF_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
- unsigned long caller_ip), \
- TP_ARGS(mp, agno, refcount, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_agnumber_t, agno) \
- __field(int, refcount) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- __entry->dev = mp->m_super->s_dev; \
- __entry->agno = agno; \
- __entry->refcount = refcount; \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->agno, \
- __entry->refcount, \
- (char *)__entry->caller_ip) \
-);
-
-DEFINE_PERAG_REF_EVENT(xfs_perag_get)
-DEFINE_PERAG_REF_EVENT(xfs_perag_put)
-
#define DEFINE_ATTR_LIST_EVENT(name) \
DEFINE_EVENT(xfs_attr_list_class, name, \
TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -122,6 +95,37 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
+DECLARE_EVENT_CLASS(xfs_perag_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+ unsigned long caller_ip),
+ TP_ARGS(mp, agno, refcount, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(int, refcount)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->refcount = refcount;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->refcount,
+ (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name) \
+DEFINE_EVENT(xfs_perag_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
struct xfs_da_node_entry *btree),
@@ -775,165 +779,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-#define DEFINE_RW_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
- TP_ARGS(ip, count, offset, flags), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(xfs_fsize_t, size) \
- __field(xfs_fsize_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- __field(int, flags) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- __entry->flags = flags; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count 0x%zx ioflags %s", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count, \
- __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
+DECLARE_EVENT_CLASS(xfs_file_class,
+ TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+ TP_ARGS(ip, count, offset, flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_fsize_t, size)
+ __field(xfs_fsize_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ __entry->flags = flags;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count 0x%zx ioflags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count,
+ __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
)
+
+#define DEFINE_RW_EVENT(name) \
+DEFINE_EVENT(xfs_file_class, name, \
+ TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
+ TP_ARGS(ip, count, offset, flags))
DEFINE_RW_EVENT(xfs_file_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
DEFINE_RW_EVENT(xfs_file_splice_write);
-
-#define DEFINE_PAGE_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
- TP_ARGS(inode, page, off), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(pgoff_t, pgoff) \
- __field(loff_t, size) \
- __field(unsigned long, offset) \
- __field(int, delalloc) \
- __field(int, unmapped) \
- __field(int, unwritten) \
- ), \
- TP_fast_assign( \
- int delalloc = -1, unmapped = -1, unwritten = -1; \
- \
- if (page_has_buffers(page)) \
- xfs_count_page_state(page, &delalloc, \
- &unmapped, &unwritten); \
- __entry->dev = inode->i_sb->s_dev; \
- __entry->ino = XFS_I(inode)->i_ino; \
- __entry->pgoff = page_offset(page); \
- __entry->size = i_size_read(inode); \
- __entry->offset = off; \
- __entry->delalloc = delalloc; \
- __entry->unmapped = unmapped; \
- __entry->unwritten = unwritten; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
- "delalloc %d unmapped %d unwritten %d", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->pgoff, \
- __entry->size, \
- __entry->offset, \
- __entry->delalloc, \
- __entry->unmapped, \
- __entry->unwritten) \
+DECLARE_EVENT_CLASS(xfs_page_class,
+ TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+ TP_ARGS(inode, page, off),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(pgoff_t, pgoff)
+ __field(loff_t, size)
+ __field(unsigned long, offset)
+ __field(int, delalloc)
+ __field(int, unmapped)
+ __field(int, unwritten)
+ ),
+ TP_fast_assign(
+ int delalloc = -1, unmapped = -1, unwritten = -1;
+
+ if (page_has_buffers(page))
+ xfs_count_page_state(page, &delalloc,
+ &unmapped, &unwritten);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = XFS_I(inode)->i_ino;
+ __entry->pgoff = page_offset(page);
+ __entry->size = i_size_read(inode);
+ __entry->offset = off;
+ __entry->delalloc = delalloc;
+ __entry->unmapped = unmapped;
+ __entry->unwritten = unwritten;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+ "delalloc %d unmapped %d unwritten %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->pgoff,
+ __entry->size,
+ __entry->offset,
+ __entry->delalloc,
+ __entry->unmapped,
+ __entry->unwritten)
)
+
+#define DEFINE_PAGE_EVENT(name) \
+DEFINE_EVENT(xfs_page_class, name, \
+ TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
+ TP_ARGS(inode, page, off))
DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
-#define DEFINE_IOMAP_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
- int flags, struct xfs_bmbt_irec *irec), \
- TP_ARGS(ip, offset, count, flags, irec), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(loff_t, size) \
- __field(loff_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- __field(int, flags) \
- __field(xfs_fileoff_t, startoff) \
- __field(xfs_fsblock_t, startblock) \
- __field(xfs_filblks_t, blockcount) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- __entry->flags = flags; \
- __entry->startoff = irec ? irec->br_startoff : 0; \
- __entry->startblock = irec ? irec->br_startblock : 0; \
- __entry->blockcount = irec ? irec->br_blockcount : 0; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count %zd flags %s " \
- "startoff 0x%llx startblock %lld blockcount 0x%llx", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count, \
- __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
- __entry->startoff, \
- (__int64_t)__entry->startblock, \
- __entry->blockcount) \
+DECLARE_EVENT_CLASS(xfs_iomap_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+ int flags, struct xfs_bmbt_irec *irec),
+ TP_ARGS(ip, offset, count, flags, irec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(loff_t, size)
+ __field(loff_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, flags)
+ __field(xfs_fileoff_t, startoff)
+ __field(xfs_fsblock_t, startblock)
+ __field(xfs_filblks_t, blockcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ __entry->flags = flags;
+ __entry->startoff = irec ? irec->br_startoff : 0;
+ __entry->startblock = irec ? irec->br_startblock : 0;
+ __entry->blockcount = irec ? irec->br_blockcount : 0;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count %zd flags %s "
+ "startoff 0x%llx startblock %lld blockcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count,
+ __print_flags(__entry->flags, "|", BMAPI_FLAGS),
+ __entry->startoff,
+ (__int64_t)__entry->startblock,
+ __entry->blockcount)
)
+
+#define DEFINE_IOMAP_EVENT(name) \
+DEFINE_EVENT(xfs_iomap_class, name, \
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
+ int flags, struct xfs_bmbt_irec *irec), \
+ TP_ARGS(ip, offset, count, flags, irec))
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
DEFINE_IOMAP_EVENT(xfs_iomap_found);
DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
-#define DEFINE_SIMPLE_IO_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
- TP_ARGS(ip, offset, count), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(loff_t, size) \
- __field(loff_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count %zd", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count) \
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+ TP_ARGS(ip, offset, count),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(loff_t, size)
+ __field(loff_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count %zd",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count)
);
+
+#define DEFINE_SIMPLE_IO_EVENT(name) \
+DEFINE_EVENT(xfs_simple_io_class, name, \
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
+ TP_ARGS(ip, offset, count))
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 38e7641..2d8b7bc 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref(
if (!xfs_Gqm) {
xfs_Gqm = xfs_Gqm_init();
- if (!xfs_Gqm)
+ if (!xfs_Gqm) {
+ mutex_unlock(&xfs_Gqm_lock);
return ENOMEM;
+ }
}
/*
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 401f364..4917d4e 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,6 @@ typedef struct xfs_perag {
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
- int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
int pag_ici_reclaimable; /* reclaimable inodes */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 6845db9..75df75f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -382,9 +382,6 @@ xfs_iget(
/* get the perag structure and ensure that it's inode capable */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
- if (!pag->pagi_inodeok)
- return EINVAL;
- ASSERT(pag->pag_ici_init);
agino = XFS_INO_TO_AGINO(mp, ino);
again:
@@ -744,30 +741,24 @@ xfs_ilock_demote(
}
#ifdef DEBUG
-/*
- * Debug-only routine, without additional rw_semaphore APIs, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- *
- * Note: this means !xfs_isilocked would give false positives, so don't do that.
- */
int
xfs_isilocked(
xfs_inode_t *ip,
uint lock_flags)
{
- if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
- XFS_ILOCK_EXCL) {
- if (!ip->i_lock.mr_writer)
- return 0;
+ if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
+ if (!(lock_flags & XFS_ILOCK_SHARED))
+ return !!ip->i_lock.mr_writer;
+ return rwsem_is_locked(&ip->i_lock.mr_lock);
}
- if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
- XFS_IOLOCK_EXCL) {
- if (!ip->i_iolock.mr_writer)
- return 0;
+ if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
+ if (!(lock_flags & XFS_IOLOCK_SHARED))
+ return !!ip->i_iolock.mr_writer;
+ return rwsem_is_locked(&ip->i_iolock.mr_lock);
}
- return 1;
+ ASSERT(0);
+ return 0;
}
#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8cd6e8d..d53c39d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1940,10 +1940,10 @@ xfs_ifree_cluster(
int blks_per_cluster;
int nbufs;
int ninodes;
- int i, j, found, pre_flushed;
+ int i, j;
xfs_daddr_t blkno;
xfs_buf_t *bp;
- xfs_inode_t *ip, **ip_found;
+ xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
struct xfs_perag *pag;
@@ -1960,114 +1960,97 @@ xfs_ifree_cluster(
nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
}
- ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
-
for (j = 0; j < nbufs; j++, inum += ninodes) {
+ int found = 0;
+
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
+ /*
+ * We obtain and lock the backing buffer first in the process
+ * here, as we have to ensure that any dirty inode that we
+ * can't get the flush lock on is attached to the buffer.
+ * If we scan the in-memory inodes first, then buffer IO can
+ * complete before we get a lock on it, and hence we may fail
+ * to mark all the active inodes on the buffer stale.
+ */
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+ mp->m_bsize * blks_per_cluster,
+ XBF_LOCK);
+
+ /*
+ * Walk the inodes already attached to the buffer and mark them
+ * stale. These will all have the flush locks held, so an
+ * in-memory inode walk can't lock them.
+ */
+ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ while (lip) {
+ if (lip->li_type == XFS_LI_INODE) {
+ iip = (xfs_inode_log_item_t *)lip;
+ ASSERT(iip->ili_logged == 1);
+ lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
+ xfs_trans_ail_copy_lsn(mp->m_ail,
+ &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
+ xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
+ found++;
+ }
+ lip = lip->li_bio_list;
+ }
/*
- * Look for each inode in memory and attempt to lock it,
- * we can be racing with flush and tail pushing here.
- * any inode we get the locks on, add to an array of
- * inode items to process later.
+ * For each inode in memory attempt to add it to the inode
+ * buffer and set it up for being staled on buffer IO
+ * completion. This is safe as we've locked out tail pushing
+ * and flushing by locking the buffer.
*
- * The get the buffer lock, we could beat a flush
- * or tail pushing thread to the lock here, in which
- * case they will go looking for the inode buffer
- * and fail, we need some other form of interlock
- * here.
+ * We have already marked every inode that was part of a
+ * transaction stale above, which means there is no point in
+ * even trying to lock them.
*/
- found = 0;
for (i = 0; i < ninodes; i++) {
read_lock(&pag->pag_ici_lock);
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
- /* Inode not in memory or we found it already,
- * nothing to do
- */
+ /* Inode not in memory or stale, nothing to do */
if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
- if (xfs_inode_clean(ip)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
-
- /* If we can get the locks then add it to the
- * list, otherwise by the time we get the bp lock
- * below it will already be attached to the
- * inode buffer.
- */
-
- /* This inode will already be locked - by us, lets
- * keep it that way.
- */
-
- if (ip == free_ip) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- } else {
- ip_found[found++] = ip;
- }
- }
+ /* don't try to lock/unlock the current inode */
+ if (ip != free_ip &&
+ !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
+ read_unlock(&pag->pag_ici_lock);
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
-
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- } else {
- ip_found[found++] = ip;
- }
- } else {
+ if (!xfs_iflock_nowait(ip)) {
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
+ continue;
}
- read_unlock(&pag->pag_ici_lock);
- }
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * blks_per_cluster,
- XBF_LOCK);
-
- pre_flushed = 0;
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- while (lip) {
- if (lip->li_type == XFS_LI_INODE) {
- iip = (xfs_inode_log_item_t *)lip;
- ASSERT(iip->ili_logged == 1);
- lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
- xfs_trans_ail_copy_lsn(mp->m_ail,
- &iip->ili_flush_lsn,
- &iip->ili_item.li_lsn);
- xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
- pre_flushed++;
+ xfs_iflags_set(ip, XFS_ISTALE);
+ if (xfs_inode_clean(ip)) {
+ ASSERT(ip != free_ip);
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ continue;
}
- lip = lip->li_bio_list;
- }
- for (i = 0; i < found; i++) {
- ip = ip_found[i];
iip = ip->i_itemp;
-
if (!iip) {
+ /* inode with unlogged changes only */
+ ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
+ found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
@@ -2078,17 +2061,16 @@ xfs_ifree_cluster(
xfs_buf_attach_iodone(bp,
(void(*)(xfs_buf_t*,xfs_log_item_t*))
xfs_istale_done, (xfs_log_item_t *)iip);
- if (ip != free_ip) {
+
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
}
- if (found || pre_flushed)
+ if (found)
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
- kmem_free(ip_found);
xfs_perag_put(pag);
}
@@ -2649,8 +2631,6 @@ xfs_iflush_cluster(
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- ASSERT(pag->pagi_inodeok);
- ASSERT(pag->pag_ici_init);
inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 14a69ae..ed0684c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -132,15 +132,10 @@ xlog_align(
int nbblks,
xfs_buf_t *bp)
{
- xfs_daddr_t offset;
- xfs_caddr_t ptr;
+ xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
- offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
- ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
-
- ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
-
- return ptr;
+ ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
+ return XFS_BUF_PTR(bp) + BBTOB(offset);
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d7bf38c..d59f4e8 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count(
#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
- return E2BIG;
+ return EFBIG;
#else /* Limited by UINT_MAX of sectors */
if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
- return E2BIG;
+ return EFBIG;
#endif
return 0;
}
@@ -393,7 +393,7 @@ xfs_mount_validate_sb(
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_fs_mount_cmn_err(flags,
"file system too large to be mounted on this system.");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
if (unlikely(sbp->sb_inprogress)) {
@@ -413,17 +413,6 @@ xfs_mount_validate_sb(
return 0;
}
-STATIC void
-xfs_initialize_perag_icache(
- xfs_perag_t *pag)
-{
- if (!pag->pag_ici_init) {
- rwlock_init(&pag->pag_ici_lock);
- INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- pag->pag_ici_init = 1;
- }
-}
-
int
xfs_initialize_perag(
xfs_mount_t *mp,
@@ -436,13 +425,8 @@ xfs_initialize_perag(
xfs_agino_t agino;
xfs_ino_t ino;
xfs_sb_t *sbp = &mp->m_sb;
- xfs_ino_t max_inum = XFS_MAXINUMBER_32;
int error = -ENOMEM;
- /* Check to see if the filesystem can overflow 32 bit inodes */
- agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
- ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
-
/*
* Walk the current per-ag tree so we don't try to initialise AGs
* that already exist (growfs case). Allocate and insert all the
@@ -456,11 +440,18 @@ xfs_initialize_perag(
}
if (!first_initialised)
first_initialised = index;
+
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag)
goto out_unwind;
+ pag->pag_agno = index;
+ pag->pag_mount = mp;
+ rwlock_init(&pag->pag_ici_lock);
+ INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+
if (radix_tree_preload(GFP_NOFS))
goto out_unwind;
+
spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
BUG();
@@ -469,25 +460,26 @@ xfs_initialize_perag(
error = -EEXIST;
goto out_unwind;
}
- pag->pag_agno = index;
- pag->pag_mount = mp;
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
}
- /* Clear the mount flag if no inode can overflow 32 bits
- * on this filesystem, or if specifically requested..
+ /*
+ * If we mount with the inode64 option, or no inode overflows
+ * the legacy 32-bit address space clear the inode32 option.
*/
- if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) {
+ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+ ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+
+ if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
mp->m_flags |= XFS_MOUNT_32BITINODES;
- } else {
+ else
mp->m_flags &= ~XFS_MOUNT_32BITINODES;
- }
- /* If we can overflow then setup the ag headers accordingly */
if (mp->m_flags & XFS_MOUNT_32BITINODES) {
- /* Calculate how much should be reserved for inodes to
- * meet the max inode percentage.
+ /*
+ * Calculate how much should be reserved for inodes to meet
+ * the max inode percentage.
*/
if (mp->m_maxicount) {
__uint64_t icount;
@@ -500,30 +492,28 @@ xfs_initialize_perag(
} else {
max_metadata = agcount;
}
+
for (index = 0; index < agcount; index++) {
ino = XFS_AGINO_TO_INO(mp, index, agino);
- if (ino > max_inum) {
+ if (ino > XFS_MAXINUMBER_32) {
index++;
break;
}
- /* This ag is preferred for inodes */
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
if (index < max_metadata)
pag->pagf_metadata = 1;
- xfs_initialize_perag_icache(pag);
xfs_perag_put(pag);
}
} else {
- /* Setup default behavior for smaller filesystems */
for (index = 0; index < agcount; index++) {
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
- xfs_initialize_perag_icache(pag);
xfs_perag_put(pag);
}
}
+
if (maxagi)
*maxagi = index;
return 0;
@@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
cmn_err(CE_WARN, "XFS: size check 1 failed");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_ddev_targp,
d - XFS_FSS_TO_BB(mp, 1),
@@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp)
} else {
cmn_err(CE_WARN, "XFS: size check 2 failed");
if (error == ENOSPC)
- error = XFS_ERROR(E2BIG);
+ error = XFS_ERROR(EFBIG);
return error;
}
@@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
cmn_err(CE_WARN, "XFS: size check 3 failed");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_logdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp)
} else {
cmn_err(CE_WARN, "XFS: size check 3 failed");
if (error == ENOSPC)
- error = XFS_ERROR(E2BIG);
+ error = XFS_ERROR(EFBIG);
return error;
}
}
@@ -1254,7 +1244,7 @@ xfs_mountfs(
* Allocate and initialize the per-ag data.
*/
spin_lock_init(&mp->m_perag_lock);
- INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
+ INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
if (error) {
cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6be05f7..1644551 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2247,7 +2247,7 @@ xfs_rtmount_init(
cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu",
(unsigned long long) XFS_BB_TO_FSB(mp, d),
(unsigned long long) mp->m_sb.sb_rblocks);
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_rtdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -2256,7 +2256,7 @@ xfs_rtmount_init(
cmn_err(CE_WARN,
"XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
if (error == ENOSPC)
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
return error;
}
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index b2d67ad..ff614c2 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -147,7 +147,16 @@ xfs_growfs_rt(
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
# define xfs_growfs_rt(mp,in) (ENOSYS)
-# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
+static inline int /* error */
+xfs_rtmount_init(
+ xfs_mount_t *mp) /* file system mount structure */
+{
+ if (mp->m_sb.sb_rblocks == 0)
+ return 0;
+
+ cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
+ return ENOSYS;
+}
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
# define xfs_rtunmount_inodes(m)
#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ce558ef..28547df 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -48,134 +48,489 @@
kmem_zone_t *xfs_trans_zone;
+
/*
- * Reservation functions here avoid a huge stack in xfs_trans_init
- * due to register overflow from temporaries in the calculations.
+ * Various log reservation values.
+ *
+ * These are based on the size of the file system block because that is what
+ * most transactions manipulate. Each adds in an additional 128 bytes per
+ * item logged to try to account for the overhead of the transaction mechanism.
+ *
+ * Note: Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish() call.
+ * This is because the number in the worst case is quite high and quite
+ * unusual. In order to fix this we need to change xfs_bmap_finish() to free
+ * extents in only a single AG at a time. This will require changes to the
+ * EFI code as well, however, so that the EFI for the extents not freed is
+ * logged again in each transaction. See SGI PV #261917.
+ *
+ * Reservation functions here avoid a huge stack in xfs_trans_init due to
+ * register overflow from temporaries in the calculations.
+ */
+
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents. This gives:
+ * the inode getting the new extents: inode size
+ * the inode's bmap btree: max depth * block size
+ * the agfs of the ags from which the extents are allocated: 2 * sector
+ * the superblock free block counter: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ * the agfs of the ags containing the blocks: 2 * sector size
+ * the agfls of the ags containing the blocks: 2 * sector size
+ * the super block free block counter: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
-xfs_calc_write_reservation(xfs_mount_t *mp)
+xfs_calc_write_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * In truncating a file we free up to two extents at once. We can modify:
+ * the inode being truncated: inode size
+ * the inode's bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * the agf for each of the ags: 4 * sector size
+ * the agfl for each of the ags: 4 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming 4 extents:
+ * 4 exts * 2 trees * (2 * max depth - 1) * block size
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_itruncate_reservation(xfs_mount_t *mp)
+xfs_calc_itruncate_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
+ 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+ (4 * mp->m_sb.sb_sectsize +
+ 4 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 4) +
+ 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
+ 128 * 5 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * In renaming a files we can modify:
+ * the four inodes involved: 4 * inode size
+ * the two directory btrees: 2 * (max depth + v2) * dir block size
+ * the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ * of bmap blocks) giving:
+ * the agf for the ags in which the blocks live: 3 * sector size
+ * the agfl for the ags in which the blocks live: 3 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_rename_reservation(xfs_mount_t *mp)
+xfs_calc_rename_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((4 * mp->m_sb.sb_inodesize +
+ 2 * XFS_DIROP_LOG_RES(mp) +
+ 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
+ (3 * mp->m_sb.sb_sectsize +
+ 3 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 3) +
+ 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
}
+/*
+ * For creating a link to an inode:
+ * the parent directory inode: inode size
+ * the linked inode: inode size
+ * the directory btree could split: (max depth + v2) * dir block size
+ * the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ * the agf for the ag in which the blocks live: sector size
+ * the agfl for the ag in which the blocks live: sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_link_reservation(xfs_mount_t *mp)
+xfs_calc_link_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+ (mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * For removing a directory entry we can modify:
+ * the parent directory inode: inode size
+ * the removed inode: inode size
+ * the directory btree could join: (max depth + v2) * dir block size
+ * the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ * the agf for the ag in which the blocks live: 2 * sector size
+ * the agfl for the ag in which the blocks live: 2 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_remove_reservation(xfs_mount_t *mp)
+xfs_calc_remove_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * For symlink we can modify:
+ * the parent directory inode: inode size
+ * the new inode: inode size
+ * the inode btree entry: 1 block
+ * the directory btree: (max depth + v2) * dir block size
+ * the directory inode's bmap btree: (max depth + v2) * block size
+ * the blocks for the symlink: 1 kB
+ * Or in the first xact we allocate some inodes giving:
+ * the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_symlink_reservation(xfs_mount_t *mp)
+xfs_calc_symlink_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, 1) +
+ XFS_DIROP_LOG_RES(mp) +
+ 1024 +
+ 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
+ (2 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+ XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * For create we can modify:
+ * the parent directory inode: inode size
+ * the new inode: inode size
+ * the inode btree entry: block size
+ * the superblock for the nlink flag: sector size
+ * the directory btree: (max depth + v2) * dir block size
+ * the directory inode's bmap btree: (max depth + v2) * block size
+ * Or in the first xact we allocate some inodes giving:
+ * the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ * the superblock for the nlink flag: sector size
+ * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_create_reservation(xfs_mount_t *mp)
+xfs_calc_create_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, 1) +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
+ (3 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+ XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * Making a new directory is the same as creating a new file.
+ */
STATIC uint
-xfs_calc_mkdir_reservation(xfs_mount_t *mp)
+xfs_calc_mkdir_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return xfs_calc_create_reservation(mp);
}
+/*
+ * In freeing an inode we can modify:
+ * the inode being freed: inode size
+ * the super block free inode counter: sector size
+ * the agi hash list and counters: sector size
+ * the inode btree entry: block size
+ * the on disk inode before ours in the agi hash list: inode cluster size
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_ifree_reservation(xfs_mount_t *mp)
+xfs_calc_ifree_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, 1) +
+ MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+ XFS_INODE_CLUSTER_SIZE(mp)) +
+ 128 * 5 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
STATIC uint
-xfs_calc_ichange_reservation(xfs_mount_t *mp)
+xfs_calc_ichange_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ 512;
+
}
+/*
+ * Growing the data section of the filesystem.
+ * superblock
+ * agi and agf
+ * allocation btrees
+ */
STATIC uint
-xfs_calc_growdata_reservation(xfs_mount_t *mp)
+xfs_calc_growdata_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWDATA_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize * 3 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ * superblock: sector size
+ * agf of the ag from which the extent is allocated: sector size
+ * bmap btree for bitmap/summary inode: max depth * blocksize
+ * bitmap/summary inode: inode size
+ * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
STATIC uint
-xfs_calc_growrtalloc_reservation(xfs_mount_t *mp)
+xfs_calc_growrtalloc_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+ return 2 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+ mp->m_sb.sb_inodesize +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ * one bitmap/summary block: blocksize
+ */
STATIC uint
-xfs_calc_growrtzero_reservation(xfs_mount_t *mp)
+xfs_calc_growrtzero_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTZERO_LOG_RES(mp);
+ return mp->m_sb.sb_blocksize + 128;
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ * superblock: sector size
+ * bitmap inode: inode size
+ * summary inode: inode size
+ * one bitmap block: blocksize
+ * summary blocks: new summary size
+ */
STATIC uint
-xfs_calc_growrtfree_reservation(xfs_mount_t *mp)
+xfs_calc_growrtfree_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTFREE_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_blocksize +
+ mp->m_rsumsize +
+ 128 * 5;
}
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ * inode
+ */
STATIC uint
-xfs_calc_swrite_reservation(xfs_mount_t *mp)
+xfs_calc_swrite_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_SWRITE_LOG_RES(mp);
+ return mp->m_sb.sb_inodesize + 128;
}
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ * inode
+ */
STATIC uint
xfs_calc_writeid_reservation(xfs_mount_t *mp)
{
- return XFS_CALC_WRITEID_LOG_RES(mp);
+ return mp->m_sb.sb_inodesize + 128;
}
+/*
+ * Converting the inode from non-attributed to attributed.
+ * the inode being converted: inode size
+ * agf block and superblock (for block allocation)
+ * the new block (directory sized)
+ * bmap blocks for the new directory block
+ * allocation btrees
+ */
STATIC uint
-xfs_calc_addafork_reservation(xfs_mount_t *mp)
+xfs_calc_addafork_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize * 2 +
+ mp->m_dirblksize +
+ XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Removing the attribute fork of a file
+ * the inode being truncated: inode size
+ * the inode's bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * the agf for each of the ags: 4 * sector size
+ * the agfl for each of the ags: 4 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming 4 extents:
+ * 4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_attrinval_reservation(xfs_mount_t *mp)
+xfs_calc_attrinval_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRINVAL_LOG_RES(mp);
+ return MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+ 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
+ (4 * mp->m_sb.sb_sectsize +
+ 4 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 4) +
+ 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
}
+/*
+ * Setting an attribute.
+ * the inode getting the attribute
+ * the superblock for allocations
+ * the agfs extents are allocated from
+ * the attribute btree * max depth
+ * the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime.
+ */
STATIC uint
-xfs_calc_attrset_reservation(xfs_mount_t *mp)
+xfs_calc_attrset_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+ 128 * (2 + XFS_DA_NODE_MAXDEPTH);
}
+/*
+ * Removing an attribute.
+ * the inode: inode size
+ * the attribute btree could join: max depth * block size
+ * the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ * the agf for the ag in which the blocks live: 2 * sector size
+ * the agfl for the ag in which the blocks live: 2 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_attrrm_reservation(xfs_mount_t *mp)
+xfs_calc_attrrm_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+ 128 * (1 + XFS_DA_NODE_MAXDEPTH +
+ XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
STATIC uint
-xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
+xfs_calc_clear_agi_bucket_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize + 128;
}
/*
@@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
*/
void
xfs_trans_init(
- xfs_mount_t *mp)
+ struct xfs_mount *mp)
{
- xfs_trans_reservations_t *resp;
+ struct xfs_trans_reservations *resp = &mp->m_reservations;
- resp = &(mp->m_reservations);
resp->tr_write = xfs_calc_write_reservation(mp);
resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
resp->tr_rename = xfs_calc_rename_reservation(mp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8c69e78..e639e8e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
/*
- * Various log reservation values.
- * These are based on the size of the file system block
- * because that is what most transactions manipulate.
- * Each adds in an additional 128 bytes per item logged to
- * try to account for the overhead of the transaction mechanism.
- *
- * Note:
- * Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish()
- * call. This is because the number in the worst case is quite high
- * and quite unusual. In order to fix this we need to change
- * xfs_bmap_finish() to free extents in only a single AG at a time.
- * This will require changes to the EFI code as well, however, so that
- * the EFI for the extents not freed is logged again in each transaction.
- * See bug 261917.
- */
-
-/*
* Per-extent log reservation for the allocation btree changes
* involved in freeing or allocating an extent.
* 2 trees * (2 blocks/level * max depth - 1) * block size
@@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
-/*
- * In a write transaction we can allocate a maximum of 2
- * extents. This gives:
- * the inode getting the new extents: inode size
- * the inode's bmap btree: max depth * block size
- * the agfs of the ags from which the extents are allocated: 2 * sector
- * the superblock free block counter: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
- * the agfs of the ags containing the blocks: 2 * sector size
- * the agfls of the ags containing the blocks: 2 * sector size
- * the super block free block counter: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_WRITE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write)
-
-/*
- * In truncating a file we free up to two extents at once. We can modify:
- * the inode being truncated: inode size
- * the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- * the agf for each of the ags: 4 * sector size
- * the agfl for each of the ags: 4 * sector size
- * the super block to reflect the freed blocks: sector size
- * worst case split in allocation btrees per extent assuming 4 extents:
- * 4 exts * 2 trees * (2 * max depth - 1) * block size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
- (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
- ((4 * (mp)->m_sb.sb_sectsize) + \
- (4 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 4) + \
- (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
- (128 * 5) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
-
-/*
- * In renaming a files we can modify:
- * the four inodes involved: 4 * inode size
- * the two directory btrees: 2 * (max depth + v2) * dir block size
- * the two directory bmap btrees: 2 * max depth * block size
- * And the bmap_finish transaction can free dir and bmap blocks (two sets
- * of bmap blocks) giving:
- * the agf for the ags in which the blocks live: 3 * sector size
- * the agfl for the ags in which the blocks live: 3 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_RENAME_LOG_RES(mp) \
- (MAX( \
- ((4 * (mp)->m_sb.sb_inodesize) + \
- (2 * XFS_DIROP_LOG_RES(mp)) + \
- (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
- ((3 * (mp)->m_sb.sb_sectsize) + \
- (3 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 3) + \
- (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
-
#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename)
-
-/*
- * For creating a link to an inode:
- * the parent directory inode: inode size
- * the linked inode: inode size
- * the directory btree could split: (max depth + v2) * dir block size
- * the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free some bmap blocks giving:
- * the agf for the ag in which the blocks live: sector size
- * the agfl for the ag in which the blocks live: sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_LINK_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
- ((mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link)
-
-/*
- * For removing a directory entry we can modify:
- * the parent directory inode: inode size
- * the removed inode: inode size
- * the directory btree could join: (max depth + v2) * dir block size
- * the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free the dir and bmap blocks giving:
- * the agf for the ag in which the blocks live: 2 * sector size
- * the agfl for the ag in which the blocks live: 2 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_REMOVE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove)
-
-/*
- * For symlink we can modify:
- * the parent directory inode: inode size
- * the new inode: inode size
- * the inode btree entry: 1 block
- * the directory btree: (max depth + v2) * dir block size
- * the directory inode's bmap btree: (max depth + v2) * block size
- * the blocks for the symlink: 1 kB
- * Or in the first xact we allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_SYMLINK_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B(mp, 1) + \
- XFS_DIROP_LOG_RES(mp) + \
- 1024 + \
- (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
- (2 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
- XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
-
-/*
- * For create we can modify:
- * the parent directory inode: inode size
- * the new inode: inode size
- * the inode btree entry: block size
- * the superblock for the nlink flag: sector size
- * the directory btree: (max depth + v2) * dir block size
- * the directory inode's bmap btree: (max depth + v2) * block size
- * Or in the first xact we allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the superblock for the nlink flag: sector size
- * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_CREATE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B(mp, 1) + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
- (3 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
- XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
-
-/*
- * Making a new directory is the same as creating a new file.
- */
-#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp)
-
#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir)
-
-/*
- * In freeing an inode we can modify:
- * the inode being freed: inode size
- * the super block free inode counter: sector size
- * the agi hash list and counters: sector size
- * the inode btree entry: block size
- * the on disk inode before ours in the agi hash list: inode cluster size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_IFREE_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), 1) + \
- MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
- (128 * 5) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
-
#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
-
-/*
- * When only changing the inode we log the inode and possibly the superblock
- * We also add a bit of slop for the transaction stuff.
- */
-#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + 512)
-
#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
-
-/*
- * Growing the data section of the filesystem.
- * superblock
- * agi and agf
- * allocation btrees
- */
-#define XFS_CALC_GROWDATA_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize * 3 + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata)
-
-/*
- * Growing the rt section of the filesystem.
- * In the first set of transactions (ALLOC) we allocate space to the
- * bitmap or summary files.
- * superblock: sector size
- * agf of the ag from which the extent is allocated: sector size
- * bmap btree for bitmap/summary inode: max depth * blocksize
- * bitmap/summary inode: inode size
- * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
- */
-#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
- (2 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
- (mp)->m_sb.sb_inodesize + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * \
- (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc)
-
-/*
- * Growing the rt section of the filesystem.
- * In the second set of transactions (ZERO) we zero the new metadata blocks.
- * one bitmap/summary block: blocksize
- */
-#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \
- ((mp)->m_sb.sb_blocksize + 128)
-
#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero)
-
-/*
- * Growing the rt section of the filesystem.
- * In the third set of transactions (FREE) we update metadata without
- * allocating any new blocks.
- * superblock: sector size
- * bitmap inode: inode size
- * summary inode: inode size
- * one bitmap block: blocksize
- * summary blocks: new summary size
- */
-#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize + \
- 2 * (mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_blocksize + \
- (mp)->m_rsumsize + \
- (128 * 5))
-
#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree)
-
-/*
- * Logging the inode modification timestamp on a synchronous write.
- * inode
- */
-#define XFS_CALC_SWRITE_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + 128)
-
#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
/*
* Logging the inode timestamps on an fsync -- same as SWRITE
* as long as SWRITE logs the entire inode core
*/
#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Logging the inode mode bits when writing a setuid/setgid file
- * inode
- */
-#define XFS_CALC_WRITEID_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + 128)
-
#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Converting the inode from non-attributed to attributed.
- * the inode being converted: inode size
- * agf block and superblock (for block allocation)
- * the new block (directory sized)
- * bmap blocks for the new directory block
- * allocation btrees
- */
-#define XFS_CALC_ADDAFORK_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize * 2 + \
- (mp)->m_dirblksize + \
- XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
-
-/*
- * Removing the attribute fork of a file
- * the inode being truncated: inode size
- * the inode's bmap btree: max depth * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- * the agf for each of the ags: 4 * sector size
- * the agfl for each of the ags: 4 * sector size
- * the super block to reflect the freed blocks: sector size
- * worst case split in allocation btrees per extent assuming 4 extents:
- * 4 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
- (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
- ((4 * (mp)->m_sb.sb_sectsize) + \
- (4 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 4) + \
- (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
-
#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
-
-/*
- * Setting an attribute.
- * the inode getting the attribute
- * the superblock for allocations
- * the agfs extents are allocated from
- * the attribute btree * max depth
- * the inode allocation btree
- * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime.
- */
-#define XFS_CALC_ATTRSET_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
- (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
-
#define XFS_ATTRSET_LOG_RES(mp, ext) \
((mp)->m_reservations.tr_attrset + \
(ext * (mp)->m_sb.sb_sectsize) + \
(ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
(128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
-
-/*
- * Removing an attribute.
- * the inode: inode size
- * the attribute btree could join: max depth * block size
- * the inode bmap btree could join or split: max depth * block size
- * And the bmap_finish transaction can free the attr blocks freed giving:
- * the agf for the ag in which the blocks live: 2 * sector size
- * the agfl for the ag in which the blocks live: 2 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_ATTRRM_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
- (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
-
-/*
- * Clearing a bad agino number in an agi hash bucket.
- */
-#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize + 128)
-
#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9d376be..a06bd62 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -267,7 +267,7 @@ xfs_setattr(
if (code) {
ASSERT(tp == NULL);
lock_flags &= ~XFS_ILOCK_EXCL;
- ASSERT(lock_flags == XFS_IOLOCK_EXCL);
+ ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
goto error_return;
}
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
OpenPOWER on IntegriCloud