diff options
Diffstat (limited to 'fs')
166 files changed, 2607 insertions, 2043 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 25b300e..2bedc6c 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -257,15 +257,13 @@ v9fs_file_write(struct file *filp, const char __user * data, return total; } -static int v9fs_file_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int v9fs_file_fsync(struct file *filp, int datasync) { struct p9_fid *fid; struct p9_wstat wstat; int retval; - P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp, - dentry, datasync); + P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); fid = filp->private_data; v9fs_blank_wstat(&wstat); diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 23aa52f..f4287e4 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -197,7 +197,7 @@ const struct file_operations adfs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .readdir = adfs_readdir, - .fsync = simple_fsync, + .fsync = generic_file_fsync, }; static int diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 005ea34..a36da53 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -26,7 +26,7 @@ const struct file_operations adfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, - .fsync = simple_fsync, + .fsync = generic_file_fsync, .write = do_sync_write, .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 0f5e309..6f850b0 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -322,8 +322,9 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) if (error) goto out; + /* XXX: this is missing some actual on-disk truncation.. */ if (ia_valid & ATTR_SIZE) - error = vmtruncate(inode, attr->ia_size); + error = simple_setsize(inode, attr->ia_size); if (error) goto out; diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 861dae6..f05b615 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -183,7 +183,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent void affs_free_prealloc(struct inode *inode); extern void affs_truncate(struct inode *); -int affs_file_fsync(struct file *, struct dentry *, int); +int affs_file_fsync(struct file *, int); /* dir.c */ diff --git a/fs/affs/file.c b/fs/affs/file.c index 184e55c..322710c 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -916,9 +916,9 @@ affs_truncate(struct inode *inode) affs_free_prealloc(inode); } -int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +int affs_file_fsync(struct file *filp, int datasync) { - struct inode * inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int ret, err; ret = write_inode_now(inode, 0); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 807f284..5f679b77 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -740,7 +740,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern int afs_writeback_all(struct afs_vnode *); -extern int afs_fsync(struct file *, struct dentry *, int); +extern int afs_fsync(struct file *, int); /*****************************************************************************/ diff --git a/fs/afs/server.c b/fs/afs/server.c index f490995..9fdc7fe 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, memcpy(&server->addr, addr, sizeof(struct in_addr)); server->addr.s_addr = addr->s_addr; + _leave(" = %p{%d}", server, atomic_read(&server->usage)); + } else { + _leave(" = NULL [nomem]"); } - - _leave(" = %p{%d}", server, atomic_read(&server->usage)); return server; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 3bed54a..3dab9e9 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -701,8 +701,9 @@ int afs_writeback_all(struct afs_vnode *vnode) * - the return status from this call provides a reliable indication of * whether any write errors occurred for this process. */ -int afs_fsync(struct file *file, struct dentry *dentry, int datasync) +int afs_fsync(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct afs_writeback *wb, *xwb; struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); int ret; @@ -527,7 +527,7 @@ static void aio_fput_routine(struct work_struct *data) /* Complete the fput(s) */ if (req->ki_filp != NULL) - __fput(req->ki_filp); + fput(req->ki_filp); /* Link the iocb into the context's free list */ spin_lock_irq(&ctx->ctx_lock); @@ -560,11 +560,11 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* * Try to optimize the aio and eventfd file* puts, by avoiding to - * schedule work in case it is not __fput() time. In normal cases, + * schedule work in case it is not final fput() time. In normal cases, * we would not be holding the last reference to the file*, so * this function will be executed w/out any aio kthread wakeup. */ - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(!fput_atomic(req->ki_filp))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9bd4b38..e4b75d6 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -205,7 +205,7 @@ static struct inode *anon_inode_mkinode(void) * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; - inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE; @@ -67,14 +67,14 @@ EXPORT_SYMBOL(inode_change_ok); * @offset: the new size to assign to the inode * @Returns: 0 on success, -ve errno on failure * + * inode_newsize_ok must be called with i_mutex held. + * * inode_newsize_ok will check filesystem limits and ulimits to check that the * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ * when necessary. Caller must not proceed with inode size change if failure is * returned. @inode must be a file (not directory), with appropriate * permissions to allow truncate (inode_newsize_ok does NOT check these * conditions). - * - * inode_newsize_ok must be called with i_mutex held. */ int inode_newsize_ok(const struct inode *inode, loff_t offset) { @@ -104,17 +104,25 @@ out_big: } EXPORT_SYMBOL(inode_newsize_ok); -int inode_setattr(struct inode * inode, struct iattr * attr) +/** + * generic_setattr - copy simple metadata updates into the generic inode + * @inode: the inode to be updated + * @attr: the new attributes + * + * generic_setattr must be called with i_mutex held. + * + * generic_setattr updates the inode's metadata with that specified + * in attr. Noticably missing is inode size update, which is more complex + * as it requires pagecache updates. See simple_setsize. + * + * The inode is not marked as dirty after this operation. The rationale is + * that for "simple" filesystems, the struct inode is the inode storage. + * The caller is free to mark the inode dirty afterwards if needed. + */ +void generic_setattr(struct inode *inode, const struct iattr *attr) { unsigned int ia_valid = attr->ia_valid; - if (ia_valid & ATTR_SIZE && - attr->ia_size != i_size_read(inode)) { - int error = vmtruncate(inode, attr->ia_size); - if (error) - return error; - } - if (ia_valid & ATTR_UID) inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) @@ -135,6 +143,28 @@ int inode_setattr(struct inode * inode, struct iattr * attr) mode &= ~S_ISGID; inode->i_mode = mode; } +} +EXPORT_SYMBOL(generic_setattr); + +/* + * note this function is deprecated, the new truncate sequence should be + * used instead -- see eg. simple_setsize, generic_setattr. + */ +int inode_setattr(struct inode *inode, const struct iattr *attr) +{ + unsigned int ia_valid = attr->ia_valid; + + if (ia_valid & ATTR_SIZE && + attr->ia_size != i_size_read(inode)) { + int error; + + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + generic_setattr(inode, attr); + mark_inode_dirty(inode); return 0; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index a05287a..52e59bf 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -93,8 +93,7 @@ static int bad_file_release(struct inode *inode, struct file *filp) return -EIO; } -static int bad_file_fsync(struct file *file, struct dentry *dentry, - int datasync) +static int bad_file_fsync(struct file *file, int datasync) { return -EIO; } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 8f73841..d967e05 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -78,7 +78,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) const struct file_operations bfs_dir_operations = { .read = generic_read_dir, .readdir = bfs_readdir, - .fsync = simple_fsync, + .fsync = generic_file_fsync, .llseek = generic_file_llseek, }; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2c5f9a0..63039ed 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( /* clear any space allocated but not loaded */ if (phdr->p_filesz < phdr->p_memsz) { - ret = clear_user((void *) (seg->addr + phdr->p_filesz), - phdr->p_memsz - phdr->p_filesz); - if (ret) - return ret; + if (clear_user((void *) (seg->addr + phdr->p_filesz), + phdr->p_memsz - phdr->p_filesz)) + return -EFAULT; } if (mm) { @@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, struct elf32_fdpic_loadseg *seg; struct elf32_phdr *phdr; unsigned long load_addr, delta_vaddr; - int loop, dvset, ret; + int loop, dvset; load_addr = params->load_addr; delta_vaddr = 0; @@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, * PT_LOAD */ if (prot & PROT_WRITE && disp > 0) { kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); - ret = clear_user((void __user *) maddr, disp); - if (ret) - return ret; + if (clear_user((void __user *) maddr, disp)) + return -EFAULT; maddr += disp; } @@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, if (prot & PROT_WRITE && excess1 > 0) { kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr + phdr->p_filesz, excess1); - ret = clear_user((void __user *) maddr + phdr->p_filesz, - excess1); - if (ret) - return ret; + if (clear_user((void __user *) maddr + phdr->p_filesz, + excess1)) + return -EFAULT; } #else if (excess > 0) { kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr + phdr->p_filesz, excess); - ret = clear_user((void *) maddr + phdr->p_filesz, excess); - if (ret) - return ret; + if (clear_user((void *) maddr + phdr->p_filesz, excess)) + return -EFAULT; } #endif diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 49566c1..b6ab27c 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -56,15 +56,22 @@ #endif /* - * User data (stack, data section and bss) needs to be aligned - * for the same reasons as SLAB memory is, and to the same amount. - * Avoid duplicating architecture specific code by using the same - * macro as with SLAB allocation: + * User data (data section and bss) needs to be aligned. + * We pick 0x20 here because it is the max value elf2flt has always + * used in producing FLAT files, and because it seems to be large + * enough to make all the gcc alignment related tests happy. + */ +#define FLAT_DATA_ALIGN (0x20) + +/* + * User data (stack) also needs to be aligned. + * Here we can be a bit looser than the data sections since this + * needs to only meet arch ABI requirements. */ #ifdef ARCH_SLAB_MINALIGN -#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN) +#define FLAT_STACK_ALIGN (ARCH_SLAB_MINALIGN) #else -#define FLAT_DATA_ALIGN (sizeof(void *)) +#define FLAT_STACK_ALIGN (sizeof(void *)) #endif #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ @@ -129,7 +136,7 @@ static unsigned long create_flat_tables( sp = (unsigned long *)p; sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); - sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN); + sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN); argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); envp = argv + (argc + 1); @@ -589,7 +596,7 @@ static int load_flat_file(struct linux_binprm * bprm, if (IS_ERR_VALUE(result)) { printk("Unable to read data+bss, errno %d\n", (int)-result); do_munmap(current->mm, textpos, text_len); - do_munmap(current->mm, realdatastart, data_len + extra); + do_munmap(current->mm, realdatastart, len); ret = result; goto err; } @@ -876,7 +883,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ - stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ + stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */ res = load_flat_file(bprm, &libinfo, 0, &stack_len); if (IS_ERR_VALUE(res)) diff --git a/fs/block_dev.c b/fs/block_dev.c index 26e5f50..99d6af8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -172,8 +172,9 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), - iov, offset, nr_segs, blkdev_get_blocks, NULL); + return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode, + I_BDEV(inode), iov, offset, nr_segs, + blkdev_get_blocks, NULL); } int __sync_blockdev(struct block_device *bdev, int wait) @@ -309,8 +310,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - blkdev_get_block); + return block_write_begin_newtrunc(file, mapping, pos, len, flags, + pagep, fsdata, blkdev_get_block); } static int blkdev_write_end(struct file *file, struct address_space *mapping, @@ -358,12 +359,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) return retval; } -/* - * Filp is never NULL; the only case when ->fsync() is called with - * NULL first argument is nfsd_sync_dir() and that's not a directory. - */ - -int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) +int blkdev_fsync(struct file *filp, int datasync) { struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); @@ -710,8 +706,13 @@ retry: * @bdev is about to be opened exclusively. Check @bdev can be opened * exclusively and mark that an exclusive open is in progress. Each * successful call to this function must be matched with a call to - * either bd_claim() or bd_abort_claiming(). If this function - * succeeds, the matching bd_claim() is guaranteed to succeed. + * either bd_finish_claiming() or bd_abort_claiming() (which do not + * fail). + * + * This function is used to gain exclusive access to the block device + * without actually causing other exclusive open attempts to fail. It + * should be used when the open sequence itself requires exclusive + * access but may subsequently fail. * * CONTEXT: * Might sleep. @@ -738,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, return ERR_PTR(-ENXIO); whole = bdget_disk(disk, 0); + module_put(disk->fops->owner); put_disk(disk); if (!whole) return ERR_PTR(-ENOMEM); @@ -786,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder) __bd_abort_claiming(whole, holder); /* releases bdev_lock */ } +/* increment holders when we have a legitimate claim. requires bdev_lock */ +static void __bd_claim(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + /* note that for a whole device bd_holders + * will be incremented twice, and bd_holder will + * be set to bd_claim before being set to holder + */ + whole->bd_holders++; + whole->bd_holder = bd_claim; + bdev->bd_holders++; + bdev->bd_holder = holder; +} + +/** + * bd_finish_claiming - finish claiming a block device + * @bdev: block device of interest (passed to bd_start_claiming()) + * @whole: whole block device returned by bd_start_claiming() + * @holder: holder trying to claim @bdev + * + * Finish a claiming block started by bd_start_claiming(). + * + * CONTEXT: + * Grabs and releases bdev_lock. + */ +static void bd_finish_claiming(struct block_device *bdev, + struct block_device *whole, void *holder) +{ + spin_lock(&bdev_lock); + BUG_ON(!bd_may_claim(bdev, whole, holder)); + __bd_claim(bdev, whole, holder); + __bd_abort_claiming(whole, holder); /* not actually an abort */ +} + /** * bd_claim - claim a block device * @bdev: block device to claim * @holder: holder trying to claim @bdev * - * Try to claim @bdev which must have been opened successfully. This - * function may be called with or without preceding - * blk_start_claiming(). In the former case, this function is always - * successful and terminates the claiming block. + * Try to claim @bdev which must have been opened successfully. * * CONTEXT: * Might sleep. @@ -810,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder) might_sleep(); spin_lock(&bdev_lock); - res = bd_prepare_to_claim(bdev, whole, holder); - if (res == 0) { - /* note that for a whole device bd_holders - * will be incremented twice, and bd_holder will - * be set to bd_claim before being set to holder - */ - whole->bd_holders++; - whole->bd_holder = bd_claim; - bdev->bd_holders++; - bdev->bd_holder = holder; - } - - if (whole->bd_claiming) - __bd_abort_claiming(whole, holder); /* releases bdev_lock */ - else - spin_unlock(&bdev_lock); + if (res == 0) + __bd_claim(bdev, whole, holder); + spin_unlock(&bdev_lock); return res; } @@ -1480,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (whole) { if (res == 0) - BUG_ON(bd_claim(bdev, filp) != 0); + bd_finish_claiming(bdev, whole, filp); else bd_abort_claiming(whole, filp); } @@ -1716,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) goto out_blkdev_put; - BUG_ON(bd_claim(bdev, holder) != 0); + bd_finish_claiming(bdev, whole, holder); return bdev; out_blkdev_put: diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e9bf864..29c2009 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2434,7 +2434,7 @@ void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); /* file.c */ -int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); +int btrfs_sync_file(struct file *file, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 79437c5..787b50a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1101,8 +1101,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * important optimization for directories because holding the mutex prevents * new operations on the dir while we write to disk. */ -int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) +int btrfs_sync_file(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; diff --git a/fs/buffer.c b/fs/buffer.c index e8aa708..d54812b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, } /* - * block_write_begin takes care of the basic task of block allocation and - * bringing partial write blocks uptodate first. - * - * If *pagep is not NULL, then block_write_begin uses the locked page - * at *pagep rather than allocating its own. In this case, the page will - * not be unlocked or deallocated on failure. + * Filesystems implementing the new truncate sequence should use the + * _newtrunc postfix variant which won't incorrectly call vmtruncate. + * The filesystem needs to handle block truncation upon failure. */ -int block_write_begin(struct file *file, struct address_space *mapping, +int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, get_block_t *get_block) @@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); *pagep = NULL; - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - */ - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); } } out: return status; } +EXPORT_SYMBOL(block_write_begin_newtrunc); + +/* + * block_write_begin takes care of the basic task of block allocation and + * bringing partial write blocks uptodate first. + * + * If *pagep is not NULL, then block_write_begin uses the locked page + * at *pagep rather than allocating its own. In this case, the page will + * not be unlocked or deallocated on failure. + */ +int block_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block) +{ + int ret; + + ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, + pagep, fsdata, get_block); + + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. Don't need + * i_size_read because we hold i_mutex. + * + * Filesystems which pass down their own page also cannot + * call into vmtruncate here because it would lead to lock + * inversion problems (*pagep is locked). This is a further + * example of where the old truncate sequence is inadequate. + */ + if (unlikely(ret) && *pagep == NULL) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; +} EXPORT_SYMBOL(block_write_begin); int block_write_end(struct file *file, struct address_space *mapping, @@ -2324,7 +2351,7 @@ out: * For moronic filesystems that do not allow holes in file. * We may have to extend the file. */ -int cont_write_begin(struct file *file, struct address_space *mapping, +int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, get_block_t *get_block, loff_t *bytes) @@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping, } *pagep = NULL; - err = block_write_begin(file, mapping, pos, len, + err = block_write_begin_newtrunc(file, mapping, pos, len, flags, pagep, fsdata, get_block); out: return err; } +EXPORT_SYMBOL(cont_write_begin_newtrunc); + +int cont_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block, loff_t *bytes) +{ + int ret; + + ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags, + pagep, fsdata, get_block, bytes); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; +} EXPORT_SYMBOL(cont_write_begin); int block_prepare_write(struct page *page, unsigned from, unsigned to, @@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write); * * We are not allowed to take the i_mutex here so we have to play games to * protect against truncate races as the page could now be beyond EOF. Because - * vmtruncate() writes the inode size before removing pages, once we have the + * truncate writes the inode size before removing pages, once we have the * page lock we can determine safely if the page is beyond EOF. If it is not * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. @@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head) } /* - * On entry, the page is fully not uptodate. - * On exit the page is fully uptodate in the areas outside (from,to) + * Filesystems implementing the new truncate sequence should use the + * _newtrunc postfix variant which won't incorrectly call vmtruncate. + * The filesystem needs to handle block truncation upon failure. */ -int nobh_write_begin(struct file *file, struct address_space *mapping, +int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, get_block_t *get_block) @@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, get_block); + return block_write_begin_newtrunc(file, mapping, pos, len, + flags, pagep, fsdata, get_block); } if (PageMappedToDisk(page)) @@ -2605,8 +2652,34 @@ out_release: page_cache_release(page); *pagep = NULL; - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); + return ret; +} +EXPORT_SYMBOL(nobh_write_begin_newtrunc); + +/* + * On entry, the page is fully not uptodate. + * On exit the page is fully uptodate in the areas outside (from,to) + */ +int nobh_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block) +{ + int ret; + + ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, + pagep, fsdata, get_block); + + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. Don't need + * i_size_read because we hold i_mutex. + */ + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } return ret; } diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 9f46de2..89490bea 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -1,7 +1,6 @@ #include "ceph_debug.h" #include <linux/module.h> -#include <linux/slab.h> #include <linux/err.h> #include <linux/slab.h> @@ -217,8 +216,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, if (ac->protocol != protocol) { ret = ceph_auth_init_protocol(ac, protocol); if (ret) { - pr_err("error %d on auth method %s init\n", - ret, ac->ops->name); + pr_err("error %d on auth protocol %d init\n", + ret, protocol); goto out; } } @@ -247,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac, if (!ac->protocol) return ceph_auth_build_hello(ac, msg_buf, msg_len); BUG_ON(!ac->ops); - if (!ac->ops->is_authenticated(ac)) + if (ac->ops->should_authenticate(ac)) return ceph_build_auth_request(ac, msg_buf, msg_len); return 0; } diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index 4429a70..d38a2fb 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h @@ -24,6 +24,12 @@ struct ceph_auth_client_ops { int (*is_authenticated)(struct ceph_auth_client *ac); /* + * true if we should (re)authenticate, e.g., when our tickets + * are getting old and crusty. + */ + int (*should_authenticate)(struct ceph_auth_client *ac); + + /* * build requests and process replies during monitor * handshake. if handle_reply returns -EAGAIN, we build * another request. diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 24407c1..ad1dc21 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac) return !xi->starting; } +static int should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi = ac->private; + + return xi->starting; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -98,6 +105,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .reset = reset, .destroy = destroy, .is_authenticated = is_authenticated, + .should_authenticate = should_authenticate, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 7b20623..83d4d27 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) return (ac->want_keys & xi->have_keys) == ac->want_keys; } +static int ceph_x_should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + int need; + + ceph_x_validate_tickets(ac, &need); + dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", + ac->want_keys, need, xi->have_keys); + return need != 0; +} + static int ceph_x_encrypt_buflen(int ilen) { return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + @@ -620,6 +631,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static const struct ceph_auth_client_ops ceph_x_ops = { .name = "x", .is_authenticated = ceph_x_is_authenticated, + .should_authenticate = ceph_x_should_authenticate, .build_request = ceph_x_build_request, .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0dd0b81..619b616 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -981,6 +981,46 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } +static void __queue_cap_release(struct ceph_mds_session *session, + u64 ino, u64 cap_id, u32 migrate_seq, + u32 issue_seq) +{ + struct ceph_msg *msg; + struct ceph_mds_cap_release *head; + struct ceph_mds_cap_item *item; + + spin_lock(&session->s_cap_lock); + BUG_ON(!session->s_num_cap_releases); + msg = list_first_entry(&session->s_cap_releases, + struct ceph_msg, list_head); + + dout(" adding %llx release to mds%d msg %p (%d left)\n", + ino, session->s_mds, msg, session->s_num_cap_releases); + + BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); + head = msg->front.iov_base; + head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); + item = msg->front.iov_base + msg->front.iov_len; + item->ino = cpu_to_le64(ino); + item->cap_id = cpu_to_le64(cap_id); + item->migrate_seq = cpu_to_le32(migrate_seq); + item->seq = cpu_to_le32(issue_seq); + + session->s_num_cap_releases--; + + msg->front.iov_len += sizeof(*item); + if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + dout(" release msg %p full\n", msg); + list_move_tail(&msg->list_head, &session->s_cap_releases_done); + } else { + dout(" release msg %p at %d/%d (%d)\n", msg, + (int)le32_to_cpu(head->num), + (int)CEPH_CAPS_PER_RELEASE, + (int)msg->front.iov_len); + } + spin_unlock(&session->s_cap_lock); +} + /* * Queue cap releases when an inode is dropped from our cache. Since * inode is about to be destroyed, there is no need for i_lock. @@ -994,41 +1034,9 @@ void ceph_queue_caps_release(struct inode *inode) while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); struct ceph_mds_session *session = cap->session; - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - struct ceph_mds_cap_item *item; - spin_lock(&session->s_cap_lock); - BUG_ON(!session->s_num_cap_releases); - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - - dout(" adding %p release to mds%d msg %p (%d left)\n", - inode, session->s_mds, msg, session->s_num_cap_releases); - - BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); - head = msg->front.iov_base; - head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); - item = msg->front.iov_base + msg->front.iov_len; - item->ino = cpu_to_le64(ceph_ino(inode)); - item->cap_id = cpu_to_le64(cap->cap_id); - item->migrate_seq = cpu_to_le32(cap->mseq); - item->seq = cpu_to_le32(cap->issue_seq); - - session->s_num_cap_releases--; - - msg->front.iov_len += sizeof(*item); - if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { - dout(" release msg %p full\n", msg); - list_move_tail(&msg->list_head, - &session->s_cap_releases_done); - } else { - dout(" release msg %p at %d/%d (%d)\n", msg, - (int)le32_to_cpu(head->num), - (int)CEPH_CAPS_PER_RELEASE, - (int)msg->front.iov_len); - } - spin_unlock(&session->s_cap_lock); + __queue_cap_release(session, ceph_ino(inode), cap->cap_id, + cap->mseq, cap->issue_seq); p = rb_next(p); __ceph_remove_cap(cap); } @@ -1776,9 +1784,9 @@ out: spin_unlock(&ci->i_unsafe_lock); } -int ceph_fsync(struct file *file, struct dentry *dentry, int datasync) +int ceph_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); unsigned flush_tid; int ret; @@ -2655,7 +2663,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; int mds = session->s_mds; int op; - u32 seq; + u32 seq, mseq; struct ceph_vino vino; u64 cap_id; u64 size, max_size; @@ -2675,6 +2683,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap = CEPH_NOSNAP; cap_id = le64_to_cpu(h->cap_id); seq = le32_to_cpu(h->seq); + mseq = le32_to_cpu(h->migrate_seq); size = le64_to_cpu(h->size); max_size = le64_to_cpu(h->max_size); @@ -2689,6 +2698,18 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap, inode); if (!inode) { dout(" i don't have ino %llx\n", vino.ino); + + if (op == CEPH_CAP_OP_IMPORT) + __queue_cap_release(session, vino.ino, cap_id, + mseq, seq); + + /* + * send any full release message to try to move things + * along for the mds (who clearly thinks we still have this + * cap). + */ + ceph_add_cap_releases(mdsc, session, -1); + ceph_send_cap_releases(mdsc, session); goto done; } @@ -2714,7 +2735,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&inode->i_lock); cap = __get_cap_for_mds(ceph_inode(inode), mds); if (!cap) { - dout("no cap on %p ino %llx.%llx from mds%d, releasing\n", + dout(" no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), mds); spin_unlock(&inode->i_lock); goto done; diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 3b9eeed..2fa992e 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -265,16 +265,17 @@ extern const char *ceph_mds_state_name(int s); * - they also define the lock ordering by the MDS * - a few of these are internal to the mds */ -#define CEPH_LOCK_DN 1 -#define CEPH_LOCK_ISNAP 2 -#define CEPH_LOCK_IVERSION 4 /* mds internal */ -#define CEPH_LOCK_IFILE 8 /* mds internal */ -#define CEPH_LOCK_IAUTH 32 -#define CEPH_LOCK_ILINK 64 -#define CEPH_LOCK_IDFT 128 /* dir frag tree */ -#define CEPH_LOCK_INEST 256 /* mds internal */ -#define CEPH_LOCK_IXATTR 512 -#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ /* client_session ops */ enum { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4fd3090..f857193 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -587,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; /* we only need inode linkage */ @@ -1107,10 +1107,9 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, * an fsync() on a dir will wait for any uncommitted directory * operations to commit. */ -static int ceph_dir_fsync(struct file *file, struct dentry *dentry, - int datasync) +static int ceph_dir_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); struct list_head *head = &ci->i_unsafe_dirops; struct ceph_mds_request *req; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 1744764..4480cb1 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -133,7 +133,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_ino1 = vino; req->r_ino2.ino = cfh->parent_ino; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6512b67..6251a15 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -230,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a81b8b6..ab47f46 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -69,7 +69,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) BUG_ON(!S_ISDIR(parent->i_mode)); if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return inode; inode->i_mode = parent->i_mode; inode->i_uid = parent->i_uid; inode->i_gid = parent->i_gid; @@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_lock(&dcache_lock); spin_lock(&dn->d_lock); - list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); + list_move(&dn->d_u.d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 885aa57..1766947 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc, * * Called under s_mutex. */ -static int add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session, - int extra) +int ceph_add_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int extra) { struct ceph_msg *msg; struct ceph_mds_cap_release *head; @@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) /* * called under s_mutex */ -static void send_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) +void ceph_send_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) { struct ceph_msg *msg; @@ -1768,12 +1768,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); if (req->r_timeout) { - err = (long)wait_for_completion_interruptible_timeout( + err = (long)wait_for_completion_killable_timeout( &req->r_completion, req->r_timeout); if (err == 0) err = -EIO; } else { - err = wait_for_completion_interruptible(&req->r_completion); + err = wait_for_completion_killable(&req->r_completion); } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); @@ -1980,7 +1980,7 @@ out_err: } mutex_unlock(&mdsc->mutex); - add_cap_releases(mdsc, req->r_session, -1); + ceph_add_cap_releases(mdsc, req->r_session, -1); mutex_unlock(&session->s_mutex); /* kick calling process */ @@ -2014,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); req = __lookup_request(mdsc, tid); if (!req) { - dout("forward %llu to mds%d - req dne\n", tid, next_mds); + dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); goto out; /* dup reply? */ } - if (fwd_seq <= req->r_num_fwd) { - dout("forward %llu to mds%d - old seq %d <= %d\n", + if (req->r_aborted) { + dout("forward tid %llu aborted, unregistering\n", tid); + __unregister_request(mdsc, req); + } else if (fwd_seq <= req->r_num_fwd) { + dout("forward tid %llu to mds%d - old seq %d <= %d\n", tid, next_mds, req->r_num_fwd, fwd_seq); } else { /* resend. forward race not possible; mds would drop */ - dout("forward %llu to mds%d (we resend)\n", tid, next_mds); + dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); + BUG_ON(req->r_err); + BUG_ON(req->r_got_result); req->r_num_fwd = fwd_seq; req->r_resend_mds = next_mds; put_request_session(req); @@ -2428,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_dentry_info *di; int mds = session->s_mds; struct ceph_mds_lease *h = msg->front.iov_base; + u32 seq; struct ceph_vino vino; int mask; struct qstr dname; @@ -2441,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; mask = le16_to_cpu(h->mask); + seq = le32_to_cpu(h->seq); dname.name = (void *)h + sizeof(*h) + sizeof(u32); dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); if (dname.len != get_unaligned_le32(h+1)) @@ -2451,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc, /* lookup inode */ inode = ceph_find_inode(sb, vino); - dout("handle_lease '%s', mask %d, ino %llx %p\n", - ceph_lease_op_name(h->action), mask, vino.ino, inode); + dout("handle_lease %s, mask %d, ino %llx %p %.*s\n", + ceph_lease_op_name(h->action), mask, vino.ino, inode, + dname.len, dname.name); if (inode == NULL) { dout("handle_lease no inode %llx\n", vino.ino); goto release; @@ -2477,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc, switch (h->action) { case CEPH_MDS_LEASE_REVOKE: if (di && di->lease_session == session) { - h->seq = cpu_to_le32(di->lease_seq); + if (ceph_seq_cmp(di->lease_seq, seq) > 0) + h->seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); } release = 1; @@ -2491,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, unsigned long duration = le32_to_cpu(h->duration_ms) * HZ / 1000; - di->lease_seq = le32_to_cpu(h->seq); + di->lease_seq = seq; dentry->d_time = di->lease_renew_from + duration; di->lease_renew_after = di->lease_renew_from + (duration >> 1); @@ -2541,7 +2550,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, return; lease = msg->front.iov_base; lease->action = action; - lease->mask = cpu_to_le16(CEPH_LOCK_DN); + lease->mask = cpu_to_le16(1); lease->ino = cpu_to_le64(ceph_vino(inode).ino); lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->seq = cpu_to_le32(seq); @@ -2571,7 +2580,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, BUG_ON(inode == NULL); BUG_ON(dentry == NULL); - BUG_ON(mask != CEPH_LOCK_DN); + BUG_ON(mask == 0); /* is dentry lease valid? */ spin_lock(&dentry->d_lock); @@ -2681,10 +2690,10 @@ static void delayed_work(struct work_struct *work) send_renew_caps(mdsc, s); else ceph_con_keepalive(&s->s_con); - add_cap_releases(mdsc, s, -1); + ceph_add_cap_releases(mdsc, s, -1); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) - send_cap_releases(mdsc, s); + ceph_send_cap_releases(mdsc, s); mutex_unlock(&s->s_mutex); ceph_put_mds_session(s); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d9936c4..b292fa4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -322,6 +322,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int extra); +extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session); + extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 60b7483..64b8b1f 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -120,6 +120,12 @@ void ceph_msgr_exit(void) destroy_workqueue(ceph_msgr_wq); } +void ceph_msgr_flush() +{ + flush_workqueue(ceph_msgr_wq); +} + + /* * socket callback functions */ diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 00a9430..76fbc95 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -213,6 +213,7 @@ extern int ceph_parse_ips(const char *c, const char *end, extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); +extern void ceph_msgr_flush(void); extern struct ceph_messenger *ceph_messenger_create( struct ceph_entity_addr *myaddr); diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index f6510a4..07a5399 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref) ceph_msg_put(req->reply); if (req->request) ceph_msg_put(req->request); + + kfree(req); } static void put_generic_request(struct ceph_mon_generic_request *req) @@ -704,8 +706,11 @@ static void handle_auth_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) { int ret; + int was_auth = 0; mutex_lock(&monc->mutex); + if (monc->auth->ops) + was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, msg->front.iov_len, @@ -716,7 +721,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, wake_up(&monc->client->auth_wq); } else if (ret > 0) { __send_prepared_auth_request(monc, ret); - } else if (monc->auth->ops->is_authenticated(monc->auth)) { + } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index afa7bb3..d25b4ad 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -361,8 +361,13 @@ static void put_osd(struct ceph_osd *osd) { dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), atomic_read(&osd->o_ref) - 1); - if (atomic_dec_and_test(&osd->o_ref)) + if (atomic_dec_and_test(&osd->o_ref)) { + struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; + + if (osd->o_authorizer) + ac->ops->destroy_authorizer(ac, osd->o_authorizer); kfree(osd); + } } /* diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index cfdd8f4..ddc656f 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -706,7 +706,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, len, *p, end); newcrush = crush_decode(*p, min(*p+len, end)); if (IS_ERR(newcrush)) - return ERR_PTR(PTR_ERR(newcrush)); + return ERR_CAST(newcrush); } /* new flags? */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7c663d9..fa87f51 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; - buf->f_namelen = PATH_MAX; + buf->f_namelen = NAME_MAX; buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ @@ -669,9 +669,17 @@ static void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_mdsc_stop(&client->mdsc); - ceph_monc_stop(&client->monc); ceph_osdc_stop(&client->osdc); + /* + * make sure mds and osd connections close out before destroying + * the auth module, which is needed to free those connections' + * ceph_authorizers. + */ + ceph_msgr_flush(); + + ceph_monc_stop(&client->monc); + ceph_adjust_min_caps(-client->min_caps); ceph_debugfs_client_cleanup(client); @@ -738,7 +746,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client, dout("open_root_inode opening '%s'\n", path); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_path1 = kstrdup(path, GFP_NOFS); req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; @@ -918,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data) /* * construct our own bdi so we can control readahead, etc. */ -static atomic_long_t bdi_seq = ATOMIC_INIT(0); +static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3725c9e..10a4a40 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -10,7 +10,6 @@ #include <linux/fs.h> #include <linux/mempool.h> #include <linux/pagemap.h> -#include <linux/slab.h> #include <linux/wait.h> #include <linux/writeback.h> #include <linux/slab.h> @@ -811,7 +810,7 @@ extern void ceph_put_cap(struct ceph_cap *cap); extern void ceph_queue_caps_release(struct inode *inode); extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); -extern int ceph_fsync(struct file *file, struct dentry *dentry, int datasync); +extern int ceph_fsync(struct file *file, int datasync); extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern int ceph_get_cap_mds(struct inode *inode); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0242ff9..a7eb65c 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -84,7 +84,7 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data, extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, size_t write_size, loff_t *poffset); extern int cifs_lock(struct file *, int, struct file_lock *); -extern int cifs_fsync(struct file *, struct dentry *, int); +extern int cifs_fsync(struct file *, int); extern int cifs_flush(struct file *, fl_owner_t id); extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a83541e..75541af 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1676,7 +1676,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, return rc; } -int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) +int cifs_fsync(struct file *file, int datasync) { int xid; int rc = 0; @@ -1688,7 +1688,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) xid = GetXid(); cFYI(1, "Sync file - name: %s datasync: 0x%x", - dentry->d_name.name, datasync); + file->f_path.dentry->d_name.name, datasync); rc = filemap_write_and_wait(inode->i_mapping); if (rc == 0) { @@ -1952,6 +1952,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping, bytes_read -= PAGE_CACHE_SIZE; continue; } + page_cache_release(page); target = kmap_atomic(page, KM_USER0); diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index d99860a..6b443ff 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h @@ -11,8 +11,7 @@ extern int coda_fake_statfs; void coda_destroy_inodecache(void); int coda_init_inodecache(void); -int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, - int datasync); +int coda_fsync(struct file *coda_file, int datasync); void coda_sysctl_init(void); void coda_sysctl_clean(void); diff --git a/fs/coda/file.c b/fs/coda/file.c index 7196077..ad3cd2a 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -202,10 +202,10 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) return 0; } -int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) +int coda_fsync(struct file *coda_file, int datasync) { struct file *host_file; - struct inode *coda_inode = coda_dentry->d_inode; + struct inode *coda_inode = coda_file->f_path.dentry->d_inode; struct coda_file_info *cfi; int err = 0; diff --git a/fs/compat.c b/fs/compat.c index f0b391c..6490d21 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -626,7 +626,7 @@ ssize_t compat_rw_copy_check_uvector(int type, tot_len += len; if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ goto out; - if (!access_ok(vrfy_dir(type), buf, len)) { + if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) { ret = -EFAULT; goto out; } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index c8af2d9..cf78d44 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -73,15 +73,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) return -EINVAL; sd_iattr = sd->s_iattr; - - error = inode_change_ok(inode, iattr); - if (error) - return error; - - error = inode_setattr(inode, iattr); - if (error) - return error; - if (!sd_iattr) { /* setting attributes for the first time, allocate now */ sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); @@ -94,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; sd->s_iattr = sd_iattr; } - /* attributes were changed atleast once in past */ + error = simple_setattr(dentry, iattr); + if (error) + return error; + if (ia_valid & ATTR_UID) sd_iattr->ia_uid = iattr->ia_uid; if (ia_valid & ATTR_GID) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 4d74fc7..0210898 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -277,8 +277,10 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n" DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n"); + /* - * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value + * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value * * These functions are exactly the same as the above functions (but use a hex * output for the decimal challenged). For details look at the above unsigned @@ -357,6 +359,23 @@ struct dentry *debugfs_create_x32(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_x32); +/** + * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read to and write + * from. + */ +struct dentry *debugfs_create_x64(const char *name, mode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_x64); +} +EXPORT_SYMBOL_GPL(debugfs_create_x64); + static int debugfs_size_t_set(void *data, u64 val) { diff --git a/fs/direct-io.c b/fs/direct-io.c index da111aa..7600aac 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1134,27 +1134,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, return ret; } -/* - * This is a library function for use by filesystem drivers. - * - * The locking rules are governed by the flags parameter: - * - if the flags value contains DIO_LOCKING we use a fancy locking - * scheme for dumb filesystems. - * For writes this function is called under i_mutex and returns with - * i_mutex held, for reads, i_mutex is not held on entry, but it is - * taken and dropped again before returning. - * For reads and writes i_alloc_sem is taken in shared mode and released - * on I/O completion (which may happen asynchronously after returning to - * the caller). - * - * - if the flags value does NOT contain DIO_LOCKING we don't use any - * internal locking but rather rely on the filesystem to synchronize - * direct I/O reads/writes versus each other and truncate. - * For reads and writes both i_mutex and i_alloc_sem are not held on - * entry and are never taken. - */ ssize_t -__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, +__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) @@ -1247,9 +1228,46 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, nr_segs, blkbits, get_block, end_io, submit_io, dio); +out: + return retval; +} +EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc); + +/* + * This is a library function for use by filesystem drivers. + * + * The locking rules are governed by the flags parameter: + * - if the flags value contains DIO_LOCKING we use a fancy locking + * scheme for dumb filesystems. + * For writes this function is called under i_mutex and returns with + * i_mutex held, for reads, i_mutex is not held on entry, but it is + * taken and dropped again before returning. + * For reads and writes i_alloc_sem is taken in shared mode and released + * on I/O completion (which may happen asynchronously after returning to + * the caller). + * + * - if the flags value does NOT contain DIO_LOCKING we don't use any + * internal locking but rather rely on the filesystem to synchronize + * direct I/O reads/writes versus each other and truncate. + * For reads and writes both i_mutex and i_alloc_sem are not held on + * entry and are never taken. + */ +ssize_t +__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags) +{ + ssize_t retval; + + retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, + offset, nr_segs, get_block, end_io, submit_io, flags); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again for DIO_LOCKING. + * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in + * their own manner. This is a further example of where the old + * truncate sequence is inadequate. * * NOTE: filesystems with their own locking have to handle this * on their own. @@ -1257,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (flags & DIO_LOCKING) { if (unlikely((rw & WRITE) && retval < 0)) { loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + if (end > isize) vmtruncate(inode, isize); } } -out: return retval; } EXPORT_SYMBOL(__blockdev_direct_IO); diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 3bdddbc..e8fcf4e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -274,7 +274,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) } static int -ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) +ecryptfs_fsync(struct file *file, int datasync) { return vfs_fsync(ecryptfs_file_to_lower(file), datasync); } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 65dee2f..31ef525 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -805,7 +805,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, - (ia->ia_size & ~PAGE_CACHE_MASK)); if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { - rc = vmtruncate(inode, ia->ia_size); + rc = simple_setsize(inode, ia->ia_size); if (rc) goto out; lower_ia->ia_size = ia->ia_size; @@ -830,7 +830,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, goto out; } } - vmtruncate(inode, ia->ia_size); + simple_setsize(inode, ia->ia_size); rc = ecryptfs_write_inode_size_to_metadata(inode); if (rc) { printk(KERN_ERR "Problem with " diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 839b9dc..fef6899 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -40,12 +40,11 @@ static int exofs_release_file(struct inode *inode, struct file *filp) return 0; } -static int exofs_file_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int exofs_file_fsync(struct file *filp, int datasync) { int ret; struct address_space *mapping = filp->f_mapping; - struct inode *inode = dentry->d_inode; + struct inode *inode = mapping->host; struct super_block *sb; ret = filemap_write_and_wait(mapping); @@ -66,7 +65,7 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry, static int exofs_flush(struct file *file, fl_owner_t id) { - exofs_file_fsync(file, file->f_path.dentry, 1); + exofs_file_fsync(file, 1); /* TODO: Flush the OSD target */ return 0; } diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 0b038e4..52b34f1 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -122,7 +122,6 @@ extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_delete_inode (struct inode *); extern int ext2_sync_inode (struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); extern void ext2_get_inode_flags(struct ext2_inode_info *); @@ -155,7 +154,7 @@ extern void ext2_write_super (struct super_block *); extern const struct file_operations ext2_dir_operations; /* file.c */ -extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync); +extern int ext2_fsync(struct file *file, int datasync); extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; extern const struct file_operations ext2_xip_file_operations; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5d198d0..49eec94 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp) return 0; } -int ext2_fsync(struct file *file, struct dentry *dentry, int datasync) +int ext2_fsync(struct file *file, int datasync) { int ret; - struct super_block *sb = dentry->d_inode->i_sb; + struct super_block *sb = file->f_mapping->host->i_sb; struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; - ret = simple_fsync(file, dentry, datasync); + ret = generic_file_fsync(file, datasync); if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { /* We don't really know where the IO error happened... */ ext2_error(sb, __func__, @@ -95,7 +95,6 @@ const struct file_operations ext2_xip_file_operations = { #endif const struct inode_operations ext2_file_inode_operations = { - .truncate = ext2_truncate, #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 527c46d..3675088 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -54,6 +54,18 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode) inode->i_blocks - ea_blocks == 0); } +static void ext2_truncate_blocks(struct inode *inode, loff_t offset); + +static void ext2_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + ext2_truncate_blocks(inode, inode->i_size); + } +} + /* * Called at the last iput() if i_nlink is zero. */ @@ -71,7 +83,7 @@ void ext2_delete_inode (struct inode * inode) inode->i_size = 0; if (inode->i_blocks) - ext2_truncate (inode); + ext2_truncate_blocks(inode, 0); ext2_free_inode (inode); return; @@ -757,8 +769,8 @@ int __ext2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext2_get_block); + return block_write_begin_newtrunc(file, mapping, pos, len, flags, + pagep, fsdata, ext2_get_block); } static int @@ -766,8 +778,25 @@ ext2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); + ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata); + if (ret < 0) + ext2_write_failed(mapping, pos + len); + return ret; +} + +static int ext2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + int ret; + + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (ret < len) + ext2_write_failed(mapping, pos + len); + return ret; } static int @@ -775,13 +804,18 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + /* * Dir-in-pagecache still uses ext2_write_begin. Would have to rework * directory handling code to pass around offsets rather than struct * pages in order to make this work easily. */ - return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext2_get_block); + ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep, + fsdata, ext2_get_block); + if (ret < 0) + ext2_write_failed(mapping, pos + len); + return ret; } static int ext2_nobh_writepage(struct page *page, @@ -800,10 +834,15 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, ext2_get_block, NULL); + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + + ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, ext2_get_block, NULL); + if (ret < 0 && (rw & WRITE)) + ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); + return ret; } static int @@ -818,7 +857,7 @@ const struct address_space_operations ext2_aops = { .writepage = ext2_writepage, .sync_page = block_sync_page, .write_begin = ext2_write_begin, - .write_end = generic_write_end, + .write_end = ext2_write_end, .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, @@ -1027,7 +1066,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de ext2_free_data(inode, p, q); } -void ext2_truncate(struct inode *inode) +static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) { __le32 *i_data = EXT2_I(inode)->i_data; struct ext2_inode_info *ei = EXT2_I(inode); @@ -1039,27 +1078,8 @@ void ext2_truncate(struct inode *inode) int n; long iblock; unsigned blocksize; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - if (ext2_inode_is_fast_symlink(inode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - blocksize = inode->i_sb->s_blocksize; - iblock = (inode->i_size + blocksize-1) - >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); - - if (mapping_is_xip(inode->i_mapping)) - xip_truncate_page(inode->i_mapping, inode->i_size); - else if (test_opt(inode->i_sb, NOBH)) - nobh_truncate_page(inode->i_mapping, - inode->i_size, ext2_get_block); - else - block_truncate_page(inode->i_mapping, - inode->i_size, ext2_get_block); + iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); n = ext2_block_to_path(inode, iblock, offsets, NULL); if (n == 0) @@ -1127,6 +1147,62 @@ do_indirects: ext2_discard_reservation(inode); mutex_unlock(&ei->truncate_mutex); +} + +static void ext2_truncate_blocks(struct inode *inode, loff_t offset) +{ + /* + * XXX: it seems like a bug here that we don't allow + * IS_APPEND inode to have blocks-past-i_size trimmed off. + * review and fix this. + * + * Also would be nice to be able to handle IO errors and such, + * but that's probably too much to ask. + */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + if (ext2_inode_is_fast_symlink(inode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + __ext2_truncate_blocks(inode, offset); +} + +int ext2_setsize(struct inode *inode, loff_t newsize) +{ + loff_t oldsize; + int error; + + error = inode_newsize_ok(inode, newsize); + if (error) + return error; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; + if (ext2_inode_is_fast_symlink(inode)) + return -EINVAL; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return -EPERM; + + if (mapping_is_xip(inode->i_mapping)) + error = xip_truncate_page(inode->i_mapping, newsize); + else if (test_opt(inode->i_sb, NOBH)) + error = nobh_truncate_page(inode->i_mapping, + newsize, ext2_get_block); + else + error = block_truncate_page(inode->i_mapping, + newsize, ext2_get_block); + if (error) + return error; + + oldsize = inode->i_size; + i_size_write(inode, newsize); + truncate_pagecache(inode, oldsize, newsize); + + __ext2_truncate_blocks(inode, newsize); + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; if (inode_needs_sync(inode)) { sync_mapping_buffers(inode->i_mapping); @@ -1134,6 +1210,8 @@ do_indirects: } else { mark_inode_dirty(inode); } + + return 0; } static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, @@ -1474,8 +1552,15 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; } - error = inode_setattr(inode, iattr); - if (!error && (iattr->ia_valid & ATTR_MODE)) + if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) { + error = ext2_setsize(inode, iattr->ia_size); + if (error) + return error; + } + generic_setattr(inode, iattr); + if (iattr->ia_valid & ATTR_MODE) error = ext2_acl_chmod(inode); + mark_inode_dirty(inode); + return error; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 71e9eb1..7ff43f4 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -119,6 +119,8 @@ static void ext2_put_super (struct super_block * sb) int i; struct ext2_sb_info *sbi = EXT2_SB(sb); + dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + if (sb->s_dirt) ext2_write_super(sb); @@ -1063,6 +1065,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext2_sops; sb->s_export_op = &ext2_export_ops; sb->s_xattr = ext2_xattr_handlers; + +#ifdef CONFIG_QUOTA + sb->dq_op = &dquot_operations; + sb->s_qcop = &dquot_quotactl_ops; +#endif + root = ext2_iget(sb, EXT2_ROOT_INO); if (IS_ERR(root)) { ret = PTR_ERR(root); @@ -1241,6 +1249,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) spin_unlock(&sbi->s_lock); return 0; } + /* * OK, we are remounting a valid rw partition rdonly, so set * the rdonly flag and then mark the partition as valid again. @@ -1248,6 +1257,13 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) es->s_state = cpu_to_le16(sbi->s_mount_state); es->s_mtime = cpu_to_le32(get_seconds()); spin_unlock(&sbi->s_lock); + + err = dquot_suspend(sb, -1); + if (err < 0) { + spin_lock(&sbi->s_lock); + goto restore_opts; + } + ext2_sync_super(sb, es, 1); } else { __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, @@ -1269,8 +1285,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) if (!ext2_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; spin_unlock(&sbi->s_lock); + ext2_write_super(sb); + + dquot_resume(sb, -1); } + return 0; restore_opts: sbi->s_mount_opt = old_opts.s_mount_opt; diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 373fa90..e2e72c3 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -297,7 +297,7 @@ static void free_rb_tree_fname(struct rb_root *root) kfree (old); } if (!parent) - root->rb_node = NULL; + *root = RB_ROOT; else if (parent->rb_left == n) parent->rb_left = NULL; else if (parent->rb_right == n) diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index fcf7487..d7e9f74 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -43,9 +43,9 @@ * inode to disk. */ -int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) +int ext3_sync_file(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ext3_inode_info *ei = EXT3_I(inode); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; int ret, needs_barrier = 0; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 0fc1293..6c953bb 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -410,6 +410,8 @@ static void ext3_put_super (struct super_block * sb) struct ext3_super_block *es = sbi->s_es; int i, err; + dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + lock_kernel(); ext3_xattr_put_super(sb); @@ -748,7 +750,7 @@ static int ext3_release_dquot(struct dquot *dquot); static int ext3_mark_dquot_dirty(struct dquot *dquot); static int ext3_write_info(struct super_block *sb, int type); static int ext3_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount); + char *path); static int ext3_quota_on_mount(struct super_block *sb, int type); static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); @@ -767,12 +769,12 @@ static const struct dquot_operations ext3_quota_operations = { static const struct quotactl_ops ext3_qctl_operations = { .quota_on = ext3_quota_on, - .quota_off = vfs_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk + .quota_off = dquot_quota_off, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk }; #endif @@ -1527,7 +1529,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { if (sb_dqopt(sb)->files[i]) - vfs_quota_off(sb, i, 0); + dquot_quota_off(sb, i); } #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -2551,6 +2553,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) ext3_fsblk_t n_blocks_count = 0; unsigned long old_sb_flags; struct ext3_mount_options old_opts; + int enable_quota = 0; int err; #ifdef CONFIG_QUOTA int i; @@ -2597,6 +2600,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) } if (*flags & MS_RDONLY) { + err = dquot_suspend(sb, -1); + if (err < 0) + goto restore_opts; + /* * First of all, the unconditional stuff we have to do * to disable replay of the journal when we next remount @@ -2651,6 +2658,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) goto restore_opts; if (!ext3_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; + enable_quota = 1; } } #ifdef CONFIG_QUOTA @@ -2662,6 +2670,9 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) #endif unlock_super(sb); unlock_kernel(); + + if (enable_quota) + dquot_resume(sb, -1); return 0; restore_opts: sb->s_flags = old_sb_flags; @@ -2851,24 +2862,21 @@ static int ext3_write_info(struct super_block *sb, int type) */ static int ext3_quota_on_mount(struct super_block *sb, int type) { - return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], - EXT3_SB(sb)->s_jquota_fmt, type); + return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], + EXT3_SB(sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ static int ext3_quota_on(struct super_block *sb, int type, int format_id, - char *name, int remount) + char *name) { int err; struct path path; if (!test_opt(sb, QUOTA)) return -EINVAL; - /* When remounting, no checks are needed and in fact, name is NULL */ - if (remount) - return vfs_quota_on(sb, type, format_id, name, remount); err = kern_path(name, LOOKUP_FOLLOW, &path); if (err) @@ -2906,7 +2914,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, } } - err = vfs_quota_on_path(sb, type, format_id, &path); + err = dquot_quota_on_path(sb, type, format_id, &path); path_put(&path); return err; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 60bd310..19a4de5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1519,7 +1519,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, extern void ext4_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ -extern int ext4_sync_file(struct file *, struct dentry *, int); +extern int ext4_sync_file(struct file *, int); /* hash.c */ extern int ext4fs_dirhash(const char *name, int len, struct diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index b6a74f9..592adf2 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -71,9 +71,9 @@ static void ext4_sync_parent(struct inode *inode) * i_mutex lock is held when entering and exiting this function */ -int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) +int ext4_sync_file(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; int ret; @@ -81,7 +81,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) J_ASSERT(ext4_journal_current_handle() == NULL); - trace_ext4_sync_file(file, dentry, datasync); + trace_ext4_sync_file(file, datasync); if (inode->i_sb->s_flags & MS_RDONLY) return 0; @@ -91,7 +91,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) return ret; if (!journal) { - ret = simple_fsync(file, dentry, datasync); + ret = generic_file_fsync(file, datasync); if (!ret && !list_empty(&inode->i_dentry)) ext4_sync_parent(inode); return ret; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 19df61c..42272d6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4942,20 +4942,26 @@ void ext4_set_inode_flags(struct inode *inode) /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ void ext4_get_inode_flags(struct ext4_inode_info *ei) { - unsigned int flags = ei->vfs_inode.i_flags; - - ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL| - EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL); - if (flags & S_SYNC) - ei->i_flags |= EXT4_SYNC_FL; - if (flags & S_APPEND) - ei->i_flags |= EXT4_APPEND_FL; - if (flags & S_IMMUTABLE) - ei->i_flags |= EXT4_IMMUTABLE_FL; - if (flags & S_NOATIME) - ei->i_flags |= EXT4_NOATIME_FL; - if (flags & S_DIRSYNC) - ei->i_flags |= EXT4_DIRSYNC_FL; + unsigned int vfs_fl; + unsigned long old_fl, new_fl; + + do { + vfs_fl = ei->vfs_inode.i_flags; + old_fl = ei->i_flags; + new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| + EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| + EXT4_DIRSYNC_FL); + if (vfs_fl & S_SYNC) + new_fl |= EXT4_SYNC_FL; + if (vfs_fl & S_APPEND) + new_fl |= EXT4_APPEND_FL; + if (vfs_fl & S_IMMUTABLE) + new_fl |= EXT4_IMMUTABLE_FL; + if (vfs_fl & S_NOATIME) + new_fl |= EXT4_NOATIME_FL; + if (vfs_fl & S_DIRSYNC) + new_fl |= EXT4_DIRSYNC_FL; + } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); } static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, @@ -5191,7 +5197,7 @@ static int ext4_inode_blocks_set(handle_t *handle, */ raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); raw_inode->i_blocks_high = 0; - ei->i_flags &= ~EXT4_HUGE_FILE_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); return 0; } if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) @@ -5204,9 +5210,9 @@ static int ext4_inode_blocks_set(handle_t *handle, */ raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); - ei->i_flags &= ~EXT4_HUGE_FILE_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); } else { - ei->i_flags |= EXT4_HUGE_FILE_FL; + ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); /* i_block is stored in file system block size */ i_blocks = i_blocks >> (inode->i_blkbits - 9); raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3a6c92a..52abfa1 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -960,6 +960,9 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } + if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) + return -EPERM; + /* Ext4 move extent does not support swapfile */ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { ext4_debug("ext4 move extent: The argument files should " diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 49d88c0..4e8983a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -646,6 +646,8 @@ static void ext4_put_super(struct super_block *sb) struct ext4_super_block *es = sbi->s_es; int i, err; + dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + flush_workqueue(sbi->dio_unwritten_wq); destroy_workqueue(sbi->dio_unwritten_wq); @@ -1062,7 +1064,7 @@ static int ext4_release_dquot(struct dquot *dquot); static int ext4_mark_dquot_dirty(struct dquot *dquot); static int ext4_write_info(struct super_block *sb, int type); static int ext4_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount); + char *path); static int ext4_quota_on_mount(struct super_block *sb, int type); static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); @@ -1084,12 +1086,12 @@ static const struct dquot_operations ext4_quota_operations = { static const struct quotactl_ops ext4_qctl_operations = { .quota_on = ext4_quota_on, - .quota_off = vfs_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk + .quota_off = dquot_quota_off, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk }; #endif @@ -2054,7 +2056,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { if (sb_dqopt(sb)->files[i]) - vfs_quota_off(sb, i, 0); + dquot_quota_off(sb, i); } #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -3576,6 +3578,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_fsblk_t n_blocks_count = 0; unsigned long old_sb_flags; struct ext4_mount_options old_opts; + int enable_quota = 0; ext4_group_t g; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; int err; @@ -3633,6 +3636,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (*flags & MS_RDONLY) { + err = dquot_suspend(sb, -1); + if (err < 0) + goto restore_opts; + /* * First of all, the unconditional stuff we have to do * to disable replay of the journal when we next remount @@ -3701,6 +3708,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; + enable_quota = 1; } } ext4_setup_system_zone(sb); @@ -3716,6 +3724,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #endif unlock_super(sb); unlock_kernel(); + if (enable_quota) + dquot_resume(sb, -1); ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); @@ -3913,24 +3923,21 @@ static int ext4_write_info(struct super_block *sb, int type) */ static int ext4_quota_on_mount(struct super_block *sb, int type) { - return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], - EXT4_SB(sb)->s_jquota_fmt, type); + return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], + EXT4_SB(sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ static int ext4_quota_on(struct super_block *sb, int type, int format_id, - char *name, int remount) + char *name) { int err; struct path path; if (!test_opt(sb, QUOTA)) return -EINVAL; - /* When remounting, no checks are needed and in fact, name is NULL */ - if (remount) - return vfs_quota_on(sb, type, format_id, name, remount); err = kern_path(name, LOOKUP_FOLLOW, &path); if (err) @@ -3969,7 +3976,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, } } - err = vfs_quota_on_path(sb, type, format_id, &path); + err = dquot_quota_on_path(sb, type, format_id, &path); path_put(&path); return err; } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 53dba57..27ac257 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -306,11 +306,11 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, extern const struct file_operations fat_file_operations; extern const struct inode_operations fat_file_inode_operations; extern int fat_setattr(struct dentry * dentry, struct iattr * attr); -extern void fat_truncate(struct inode *inode); +extern int fat_setsize(struct inode *inode, loff_t offset); +extern void fat_truncate_blocks(struct inode *inode, loff_t offset); extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -extern int fat_file_fsync(struct file *file, struct dentry *dentry, - int datasync); +extern int fat_file_fsync(struct file *file, int datasync); /* fat/inode.c */ extern void fat_attach(struct inode *inode, loff_t i_pos); diff --git a/fs/fat/file.c b/fs/fat/file.c index a14c2f6..990dfae 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -149,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp) return 0; } -int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +int fat_file_fsync(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int res, err; - res = simple_fsync(filp, dentry, datasync); + res = generic_file_fsync(filp, datasync); err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); return res ? res : err; @@ -283,7 +283,7 @@ static int fat_free(struct inode *inode, int skip) return fat_free_clusters(inode, free_start); } -void fat_truncate(struct inode *inode) +void fat_truncate_blocks(struct inode *inode, loff_t offset) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); const unsigned int cluster_size = sbi->cluster_size; @@ -293,10 +293,10 @@ void fat_truncate(struct inode *inode) * This protects against truncating a file bigger than it was then * trying to write into the hole. */ - if (MSDOS_I(inode)->mmu_private > inode->i_size) - MSDOS_I(inode)->mmu_private = inode->i_size; + if (MSDOS_I(inode)->mmu_private > offset) + MSDOS_I(inode)->mmu_private = offset; - nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; + nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits; fat_free(inode, nr_clusters); fat_flush_inodes(inode->i_sb, inode, NULL); @@ -364,6 +364,18 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) return 0; } +int fat_setsize(struct inode *inode, loff_t offset) +{ + int error; + + error = simple_setsize(inode, offset); + if (error) + return error; + fat_truncate_blocks(inode, offset); + + return error; +} + #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) /* valid file mode bits */ #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) @@ -378,7 +390,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) /* * Expand the file. Since inode_setattr() updates ->i_size * before calling the ->truncate(), but FAT needs to fill the - * hole before it. + * hole before it. XXX: this is no longer true with new truncate + * sequence. */ if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_size > inode->i_size) { @@ -427,15 +440,20 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_MODE; } - if (attr->ia_valid) - error = inode_setattr(inode, attr); + if (attr->ia_valid & ATTR_SIZE) { + error = fat_setsize(inode, attr->ia_size); + if (error) + goto out; + } + + generic_setattr(inode, attr); + mark_inode_dirty(inode); out: return error; } EXPORT_SYMBOL_GPL(fat_setattr); const struct inode_operations fat_file_inode_operations = { - .truncate = fat_truncate, .setattr = fat_setattr, .getattr = fat_getattr, }; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ed33904..7bf45ae 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -142,14 +142,29 @@ static int fat_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, fat_get_block); } +static void fat_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + fat_truncate_blocks(inode, inode->i_size); + } +} + static int fat_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int err; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - fat_get_block, + err = cont_write_begin_newtrunc(file, mapping, pos, len, flags, + pagep, fsdata, fat_get_block, &MSDOS_I(mapping->host)->mmu_private); + if (err < 0) + fat_write_failed(mapping, pos + len); + return err; } static int fat_write_end(struct file *file, struct address_space *mapping, @@ -159,6 +174,8 @@ static int fat_write_end(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; int err; err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + if (err < len) + fat_write_failed(mapping, pos + len); if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; MSDOS_I(inode)->i_attrs |= ATTR_ARCH; @@ -172,7 +189,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; if (rw == WRITE) { /* @@ -193,8 +212,12 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, fat_get_block, NULL); + ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, fat_get_block, NULL); + if (ret < 0 && (rw & WRITE)) + fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); + + return ret; } static sector_t _fat_bmap(struct address_space *mapping, sector_t block) @@ -429,7 +452,7 @@ static void fat_delete_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; - fat_truncate(inode); + fat_truncate_blocks(inode, 0); clear_inode(inode); } @@ -274,7 +274,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg) ret = copy_from_user(&owner, owner_p, sizeof(owner)); if (ret) - return ret; + return -EFAULT; switch (owner.type) { case F_OWNER_TID: @@ -332,8 +332,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg) } read_unlock(&filp->f_owner.lock); - if (!ret) + if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); + if (ret) + ret = -EFAULT; + } return ret; } diff --git a/fs/file_table.c b/fs/file_table.c index 32d12b7..5c7d10e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -194,14 +194,6 @@ struct file *alloc_file(struct path *path, fmode_t mode, } EXPORT_SYMBOL(alloc_file); -void fput(struct file *file) -{ - if (atomic_long_dec_and_test(&file->f_count)) - __fput(file); -} - -EXPORT_SYMBOL(fput); - /** * drop_file_write_access - give up ability to write to a file * @file: the file to which we will stop writing @@ -227,10 +219,9 @@ void drop_file_write_access(struct file *file) } EXPORT_SYMBOL_GPL(drop_file_write_access); -/* __fput is called from task context when aio completion releases the last - * last use of a struct file *. Do not use otherwise. +/* the real guts of fput() - releasing the last reference to file */ -void __fput(struct file *file) +static void __fput(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; @@ -268,6 +259,14 @@ void __fput(struct file *file) mntput(mnt); } +void fput(struct file *file) +{ + if (atomic_long_dec_and_test(&file->f_count)) + __fput(file); +} + +EXPORT_SYMBOL(fput); + struct file *fget(unsigned int fd) { struct file *file; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ea8592b..1d1088f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -45,7 +45,6 @@ struct wb_writeback_args { unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; - unsigned int sb_pinned:1; }; /* @@ -193,8 +192,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) } static void bdi_alloc_queue_work(struct backing_dev_info *bdi, - struct wb_writeback_args *args, - int wait) + struct wb_writeback_args *args) { struct bdi_work *work; @@ -206,8 +204,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, if (work) { bdi_work_init(work, args); bdi_queue_work(bdi, work); - if (wait) - bdi_wait_on_work_clear(work); } else { struct bdi_writeback *wb = &bdi->wb; @@ -234,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, - /* - * Setting sb_pinned is not necessary for WB_SYNC_ALL, but - * lets make it explicitly clear. - */ - .sb_pinned = 1, }; struct bdi_work work; @@ -254,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * @bdi: the backing device to write from * @sb: write inodes from this super_block * @nr_pages: the number of pages to write - * @sb_locked: caller already holds sb umount sem. * * Description: * This does WB_SYNC_NONE opportunistic writeback. The IO is only * started when this function returns, we make no guarentees on - * completion. Caller specifies whether sb umount sem is held already or not. + * completion. Caller need not hold sb s_umount semaphore. * */ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages, int sb_locked) + long nr_pages) { struct wb_writeback_args args = { .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, - .sb_pinned = sb_locked, }; /* @@ -282,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, args.for_background = 1; } - bdi_alloc_queue_work(bdi, &args, sb_locked); + bdi_alloc_queue_work(bdi, &args); } /* @@ -595,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, /* * Caller must already hold the ref for this */ - if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { + if (wbc->sync_mode == WB_SYNC_ALL) { WARN_ON(!rwsem_is_locked(&sb->s_umount)); return SB_NOT_PINNED; } @@ -769,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb, .for_kupdate = args->for_kupdate, .for_background = args->for_background, .range_cyclic = args->range_cyclic, - .sb_pinned = args->sb_pinned, }; unsigned long oldest_jif; long wrote = 0; @@ -912,7 +900,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) while ((work = get_next_work_item(bdi, wb)) != NULL) { struct wb_writeback_args args = work->args; - int post_clear; /* * Override sync mode, in case we must wait for completion @@ -920,13 +907,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) if (force_wait) work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; - post_clear = WB_SYNC_ALL || args.sb_pinned; - /* * If this isn't a data integrity operation, just notify * that we have seen this work and we are now starting it. */ - if (!post_clear) + if (args.sync_mode == WB_SYNC_NONE) wb_clear_pending(wb, work); wrote += wb_writeback(wb, &args); @@ -935,7 +920,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * This is a data integrity writeback, so only do the * notification when we have completed the work. */ - if (post_clear) + if (args.sync_mode == WB_SYNC_ALL) wb_clear_pending(wb, work); } @@ -1011,7 +996,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) if (!bdi_has_dirty_io(bdi)) continue; - bdi_alloc_queue_work(bdi, &args, 0); + bdi_alloc_queue_work(bdi, &args); } rcu_read_unlock(); @@ -1220,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb) iput(old_inode); } -static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) -{ - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; - - nr_to_write = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - - bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); -} - /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock @@ -1243,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) */ void writeback_inodes_sb(struct super_block *sb) { - __writeback_inodes_sb(sb, 0); -} -EXPORT_SYMBOL(writeback_inodes_sb); + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); + long nr_to_write; -/** - * writeback_inodes_sb_locked - writeback dirty inodes from given super_block - * @sb: the superblock - * - * Like writeback_inodes_sb(), except the caller already holds the - * sb umount sem. - */ -void writeback_inodes_sb_locked(struct super_block *sb) -{ - __writeback_inodes_sb(sb, 1); + nr_to_write = nr_dirty + nr_unstable + + (inodes_stat.nr_inodes - inodes_stat.nr_unused); + + bdi_start_writeback(sb->s_bdi, sb, nr_to_write); } +EXPORT_SYMBOL(writeback_inodes_sb); /** * writeback_inodes_sb_if_idle - start writeback if none underway diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 47aefd3..723b889 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -710,30 +710,26 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; } - if (page) { - radix_tree_tag_set(&cookie->stores, page->index, - FSCACHE_COOKIE_STORING_TAG); - radix_tree_tag_clear(&cookie->stores, page->index, - FSCACHE_COOKIE_PENDING_TAG); - } + radix_tree_tag_set(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG); + radix_tree_tag_clear(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG); spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); - if (page) { - fscache_set_op_state(&op->op, "Store"); - fscache_stat(&fscache_n_store_pages); - fscache_stat(&fscache_n_cop_write_page); - ret = object->cache->ops->write_page(op, page); - fscache_stat_d(&fscache_n_cop_write_page); - fscache_set_op_state(&op->op, "EndWrite"); - fscache_end_page_write(object, page); - if (ret < 0) { - fscache_set_op_state(&op->op, "Abort"); - fscache_abort_object(object); - } else { - fscache_enqueue_operation(&op->op); - } + fscache_set_op_state(&op->op, "Store"); + fscache_stat(&fscache_n_store_pages); + fscache_stat(&fscache_n_cop_write_page); + ret = object->cache->ops->write_page(op, page); + fscache_stat_d(&fscache_n_cop_write_page); + fscache_set_op_state(&op->op, "EndWrite"); + fscache_end_page_write(object, page); + if (ret < 0) { + fscache_set_op_state(&op->op, "Abort"); + fscache_abort_object(object); + } else { + fscache_enqueue_operation(&op->op); } _leave(""); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e53df5e..9424796 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -16,6 +16,9 @@ #include <linux/pagemap.h> #include <linux/file.h> #include <linux/slab.h> +#include <linux/pipe_fs_i.h> +#include <linux/swap.h> +#include <linux/splice.h> MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); @@ -499,6 +502,9 @@ struct fuse_copy_state { int write; struct fuse_req *req; const struct iovec *iov; + struct pipe_buffer *pipebufs; + struct pipe_buffer *currbuf; + struct pipe_inode_info *pipe; unsigned long nr_segs; unsigned long seglen; unsigned long addr; @@ -506,16 +512,16 @@ struct fuse_copy_state { void *mapaddr; void *buf; unsigned len; + unsigned move_pages:1; }; static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, - int write, struct fuse_req *req, + int write, const struct iovec *iov, unsigned long nr_segs) { memset(cs, 0, sizeof(*cs)); cs->fc = fc; cs->write = write; - cs->req = req; cs->iov = iov; cs->nr_segs = nr_segs; } @@ -523,7 +529,18 @@ static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, /* Unmap and put previous page of userspace buffer */ static void fuse_copy_finish(struct fuse_copy_state *cs) { - if (cs->mapaddr) { + if (cs->currbuf) { + struct pipe_buffer *buf = cs->currbuf; + + if (!cs->write) { + buf->ops->unmap(cs->pipe, buf, cs->mapaddr); + } else { + kunmap_atomic(cs->mapaddr, KM_USER0); + buf->len = PAGE_SIZE - cs->len; + } + cs->currbuf = NULL; + cs->mapaddr = NULL; + } else if (cs->mapaddr) { kunmap_atomic(cs->mapaddr, KM_USER0); if (cs->write) { flush_dcache_page(cs->pg); @@ -545,26 +562,61 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) unlock_request(cs->fc, cs->req); fuse_copy_finish(cs); - if (!cs->seglen) { - BUG_ON(!cs->nr_segs); - cs->seglen = cs->iov[0].iov_len; - cs->addr = (unsigned long) cs->iov[0].iov_base; - cs->iov++; - cs->nr_segs--; + if (cs->pipebufs) { + struct pipe_buffer *buf = cs->pipebufs; + + if (!cs->write) { + err = buf->ops->confirm(cs->pipe, buf); + if (err) + return err; + + BUG_ON(!cs->nr_segs); + cs->currbuf = buf; + cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); + cs->len = buf->len; + cs->buf = cs->mapaddr + buf->offset; + cs->pipebufs++; + cs->nr_segs--; + } else { + struct page *page; + + if (cs->nr_segs == cs->pipe->buffers) + return -EIO; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return -ENOMEM; + + buf->page = page; + buf->offset = 0; + buf->len = 0; + + cs->currbuf = buf; + cs->mapaddr = kmap_atomic(page, KM_USER0); + cs->buf = cs->mapaddr; + cs->len = PAGE_SIZE; + cs->pipebufs++; + cs->nr_segs++; + } + } else { + if (!cs->seglen) { + BUG_ON(!cs->nr_segs); + cs->seglen = cs->iov[0].iov_len; + cs->addr = (unsigned long) cs->iov[0].iov_base; + cs->iov++; + cs->nr_segs--; + } + err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg); + if (err < 0) + return err; + BUG_ON(err != 1); + offset = cs->addr % PAGE_SIZE; + cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); + cs->buf = cs->mapaddr + offset; + cs->len = min(PAGE_SIZE - offset, cs->seglen); + cs->seglen -= cs->len; + cs->addr += cs->len; } - down_read(¤t->mm->mmap_sem); - err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, - &cs->pg, NULL); - up_read(¤t->mm->mmap_sem); - if (err < 0) - return err; - BUG_ON(err != 1); - offset = cs->addr % PAGE_SIZE; - cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); - cs->buf = cs->mapaddr + offset; - cs->len = min(PAGE_SIZE - offset, cs->seglen); - cs->seglen -= cs->len; - cs->addr += cs->len; return lock_request(cs->fc, cs->req); } @@ -586,23 +638,178 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) return ncpy; } +static int fuse_check_page(struct page *page) +{ + if (page_mapcount(page) || + page->mapping != NULL || + page_count(page) != 1 || + (page->flags & PAGE_FLAGS_CHECK_AT_PREP & + ~(1 << PG_locked | + 1 << PG_referenced | + 1 << PG_uptodate | + 1 << PG_lru | + 1 << PG_active | + 1 << PG_reclaim))) { + printk(KERN_WARNING "fuse: trying to steal weird page\n"); + printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); + return 1; + } + return 0; +} + +static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) +{ + int err; + struct page *oldpage = *pagep; + struct page *newpage; + struct pipe_buffer *buf = cs->pipebufs; + struct address_space *mapping; + pgoff_t index; + + unlock_request(cs->fc, cs->req); + fuse_copy_finish(cs); + + err = buf->ops->confirm(cs->pipe, buf); + if (err) + return err; + + BUG_ON(!cs->nr_segs); + cs->currbuf = buf; + cs->len = buf->len; + cs->pipebufs++; + cs->nr_segs--; + + if (cs->len != PAGE_SIZE) + goto out_fallback; + + if (buf->ops->steal(cs->pipe, buf) != 0) + goto out_fallback; + + newpage = buf->page; + + if (WARN_ON(!PageUptodate(newpage))) + return -EIO; + + ClearPageMappedToDisk(newpage); + + if (fuse_check_page(newpage) != 0) + goto out_fallback_unlock; + + mapping = oldpage->mapping; + index = oldpage->index; + + /* + * This is a new and locked page, it shouldn't be mapped or + * have any special flags on it + */ + if (WARN_ON(page_mapped(oldpage))) + goto out_fallback_unlock; + if (WARN_ON(page_has_private(oldpage))) + goto out_fallback_unlock; + if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage))) + goto out_fallback_unlock; + if (WARN_ON(PageMlocked(oldpage))) + goto out_fallback_unlock; + + remove_from_page_cache(oldpage); + page_cache_release(oldpage); + + err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL); + if (err) { + printk(KERN_WARNING "fuse_try_move_page: failed to add page"); + goto out_fallback_unlock; + } + page_cache_get(newpage); + + if (!(buf->flags & PIPE_BUF_FLAG_LRU)) + lru_cache_add_file(newpage); + + err = 0; + spin_lock(&cs->fc->lock); + if (cs->req->aborted) + err = -ENOENT; + else + *pagep = newpage; + spin_unlock(&cs->fc->lock); + + if (err) { + unlock_page(newpage); + page_cache_release(newpage); + return err; + } + + unlock_page(oldpage); + page_cache_release(oldpage); + cs->len = 0; + + return 0; + +out_fallback_unlock: + unlock_page(newpage); +out_fallback: + cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); + cs->buf = cs->mapaddr + buf->offset; + + err = lock_request(cs->fc, cs->req); + if (err) + return err; + + return 1; +} + +static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, + unsigned offset, unsigned count) +{ + struct pipe_buffer *buf; + + if (cs->nr_segs == cs->pipe->buffers) + return -EIO; + + unlock_request(cs->fc, cs->req); + fuse_copy_finish(cs); + + buf = cs->pipebufs; + page_cache_get(page); + buf->page = page; + buf->offset = offset; + buf->len = count; + + cs->pipebufs++; + cs->nr_segs++; + cs->len = 0; + + return 0; +} + /* * Copy a page in the request to/from the userspace buffer. Must be * done atomically */ -static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, +static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, unsigned offset, unsigned count, int zeroing) { + int err; + struct page *page = *pagep; + if (page && zeroing && count < PAGE_SIZE) { void *mapaddr = kmap_atomic(page, KM_USER1); memset(mapaddr, 0, PAGE_SIZE); kunmap_atomic(mapaddr, KM_USER1); } while (count) { - if (!cs->len) { - int err = fuse_copy_fill(cs); - if (err) - return err; + if (cs->write && cs->pipebufs && page) { + return fuse_ref_page(cs, page, offset, count); + } else if (!cs->len) { + if (cs->move_pages && page && + offset == 0 && count == PAGE_SIZE) { + err = fuse_try_move_page(cs, pagep); + if (err <= 0) + return err; + } else { + err = fuse_copy_fill(cs); + if (err) + return err; + } } if (page) { void *mapaddr = kmap_atomic(page, KM_USER1); @@ -627,8 +834,10 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { - struct page *page = req->pages[i]; - int err = fuse_copy_page(cs, page, offset, count, zeroing); + int err; + + err = fuse_copy_page(cs, &req->pages[i], offset, count, + zeroing); if (err) return err; @@ -705,11 +914,10 @@ __acquires(&fc->lock) * * Called with fc->lock held, releases it */ -static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, - const struct iovec *iov, unsigned long nr_segs) +static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, + size_t nbytes, struct fuse_req *req) __releases(&fc->lock) { - struct fuse_copy_state cs; struct fuse_in_header ih; struct fuse_interrupt_in arg; unsigned reqsize = sizeof(ih) + sizeof(arg); @@ -725,14 +933,13 @@ __releases(&fc->lock) arg.unique = req->in.h.unique; spin_unlock(&fc->lock); - if (iov_length(iov, nr_segs) < reqsize) + if (nbytes < reqsize) return -EINVAL; - fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs); - err = fuse_copy_one(&cs, &ih, sizeof(ih)); + err = fuse_copy_one(cs, &ih, sizeof(ih)); if (!err) - err = fuse_copy_one(&cs, &arg, sizeof(arg)); - fuse_copy_finish(&cs); + err = fuse_copy_one(cs, &arg, sizeof(arg)); + fuse_copy_finish(cs); return err ? err : reqsize; } @@ -746,18 +953,13 @@ __releases(&fc->lock) * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ -static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, + struct fuse_copy_state *cs, size_t nbytes) { int err; struct fuse_req *req; struct fuse_in *in; - struct fuse_copy_state cs; unsigned reqsize; - struct file *file = iocb->ki_filp; - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) - return -EPERM; restart: spin_lock(&fc->lock); @@ -777,7 +979,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, if (!list_empty(&fc->interrupts)) { req = list_entry(fc->interrupts.next, struct fuse_req, intr_entry); - return fuse_read_interrupt(fc, req, iov, nr_segs); + return fuse_read_interrupt(fc, cs, nbytes, req); } req = list_entry(fc->pending.next, struct fuse_req, list); @@ -787,7 +989,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, in = &req->in; reqsize = in->h.len; /* If request is too large, reply with an error and restart the read */ - if (iov_length(iov, nr_segs) < reqsize) { + if (nbytes < reqsize) { req->out.h.error = -EIO; /* SETXATTR is special, since it may contain too large data */ if (in->h.opcode == FUSE_SETXATTR) @@ -796,12 +998,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, goto restart; } spin_unlock(&fc->lock); - fuse_copy_init(&cs, fc, 1, req, iov, nr_segs); - err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); + cs->req = req; + err = fuse_copy_one(cs, &in->h, sizeof(in->h)); if (!err) - err = fuse_copy_args(&cs, in->numargs, in->argpages, + err = fuse_copy_args(cs, in->numargs, in->argpages, (struct fuse_arg *) in->args, 0); - fuse_copy_finish(&cs); + fuse_copy_finish(cs); spin_lock(&fc->lock); req->locked = 0; if (req->aborted) { @@ -829,6 +1031,110 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, return err; } +static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct fuse_copy_state cs; + struct file *file = iocb->ki_filp; + struct fuse_conn *fc = fuse_get_conn(file); + if (!fc) + return -EPERM; + + fuse_copy_init(&cs, fc, 1, iov, nr_segs); + + return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); +} + +static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + +static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = generic_pipe_buf_release, + .steal = fuse_dev_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + int ret; + int page_nr = 0; + int do_wakeup = 0; + struct pipe_buffer *bufs; + struct fuse_copy_state cs; + struct fuse_conn *fc = fuse_get_conn(in); + if (!fc) + return -EPERM; + + bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); + if (!bufs) + return -ENOMEM; + + fuse_copy_init(&cs, fc, 1, NULL, 0); + cs.pipebufs = bufs; + cs.pipe = pipe; + ret = fuse_dev_do_read(fc, in, &cs, len); + if (ret < 0) + goto out; + + ret = 0; + pipe_lock(pipe); + + if (!pipe->readers) { + send_sig(SIGPIPE, current, 0); + if (!ret) + ret = -EPIPE; + goto out_unlock; + } + + if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { + ret = -EIO; + goto out_unlock; + } + + while (page_nr < cs.nr_segs) { + int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); + struct pipe_buffer *buf = pipe->bufs + newbuf; + + buf->page = bufs[page_nr].page; + buf->offset = bufs[page_nr].offset; + buf->len = bufs[page_nr].len; + buf->ops = &fuse_dev_pipe_buf_ops; + + pipe->nrbufs++; + page_nr++; + ret += buf->len; + + if (pipe->inode) + do_wakeup = 1; + } + +out_unlock: + pipe_unlock(pipe); + + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + } + +out: + for (; page_nr < cs.nr_segs; page_nr++) + page_cache_release(bufs[page_nr].page); + + kfree(bufs); + return ret; +} + static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { @@ -988,23 +1294,17 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, * it from the list and copy the rest of the buffer to the request. * The request is finished by calling request_end() */ -static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t fuse_dev_do_write(struct fuse_conn *fc, + struct fuse_copy_state *cs, size_t nbytes) { int err; - size_t nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; - struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); - if (!fc) - return -EPERM; - fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs); if (nbytes < sizeof(struct fuse_out_header)) return -EINVAL; - err = fuse_copy_one(&cs, &oh, sizeof(oh)); + err = fuse_copy_one(cs, &oh, sizeof(oh)); if (err) goto err_finish; @@ -1017,7 +1317,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, * and error contains notification code. */ if (!oh.unique) { - err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs); + err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); return err ? err : nbytes; } @@ -1036,7 +1336,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, if (req->aborted) { spin_unlock(&fc->lock); - fuse_copy_finish(&cs); + fuse_copy_finish(cs); spin_lock(&fc->lock); request_end(fc, req); return -ENOENT; @@ -1053,7 +1353,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, queue_interrupt(fc, req); spin_unlock(&fc->lock); - fuse_copy_finish(&cs); + fuse_copy_finish(cs); return nbytes; } @@ -1061,11 +1361,13 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, list_move(&req->list, &fc->io); req->out.h = oh; req->locked = 1; - cs.req = req; + cs->req = req; + if (!req->out.page_replace) + cs->move_pages = 0; spin_unlock(&fc->lock); - err = copy_out_args(&cs, &req->out, nbytes); - fuse_copy_finish(&cs); + err = copy_out_args(cs, &req->out, nbytes); + fuse_copy_finish(cs); spin_lock(&fc->lock); req->locked = 0; @@ -1081,10 +1383,101 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, err_unlock: spin_unlock(&fc->lock); err_finish: - fuse_copy_finish(&cs); + fuse_copy_finish(cs); return err; } +static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct fuse_copy_state cs; + struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); + if (!fc) + return -EPERM; + + fuse_copy_init(&cs, fc, 0, iov, nr_segs); + + return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); +} + +static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags) +{ + unsigned nbuf; + unsigned idx; + struct pipe_buffer *bufs; + struct fuse_copy_state cs; + struct fuse_conn *fc; + size_t rem; + ssize_t ret; + + fc = fuse_get_conn(out); + if (!fc) + return -EPERM; + + bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); + if (!bufs) + return -ENOMEM; + + pipe_lock(pipe); + nbuf = 0; + rem = 0; + for (idx = 0; idx < pipe->nrbufs && rem < len; idx++) + rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; + + ret = -EINVAL; + if (rem < len) { + pipe_unlock(pipe); + goto out; + } + + rem = len; + while (rem) { + struct pipe_buffer *ibuf; + struct pipe_buffer *obuf; + + BUG_ON(nbuf >= pipe->buffers); + BUG_ON(!pipe->nrbufs); + ibuf = &pipe->bufs[pipe->curbuf]; + obuf = &bufs[nbuf]; + + if (rem >= ibuf->len) { + *obuf = *ibuf; + ibuf->ops = NULL; + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); + pipe->nrbufs--; + } else { + ibuf->ops->get(pipe, ibuf); + *obuf = *ibuf; + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + obuf->len = rem; + ibuf->offset += obuf->len; + ibuf->len -= obuf->len; + } + nbuf++; + rem -= obuf->len; + } + pipe_unlock(pipe); + + fuse_copy_init(&cs, fc, 0, NULL, nbuf); + cs.pipebufs = bufs; + cs.pipe = pipe; + + if (flags & SPLICE_F_MOVE) + cs.move_pages = 1; + + ret = fuse_dev_do_write(fc, &cs, len); + + for (idx = 0; idx < nbuf; idx++) { + struct pipe_buffer *buf = &bufs[idx]; + buf->ops->release(pipe, buf); + } +out: + kfree(bufs); + return ret; +} + static unsigned fuse_dev_poll(struct file *file, poll_table *wait) { unsigned mask = POLLOUT | POLLWRNORM; @@ -1226,8 +1619,10 @@ const struct file_operations fuse_dev_operations = { .llseek = no_llseek, .read = do_sync_read, .aio_read = fuse_dev_read, + .splice_read = fuse_dev_splice_read, .write = do_sync_write, .aio_write = fuse_dev_write, + .splice_write = fuse_dev_splice_write, .poll = fuse_dev_poll, .release = fuse_dev_release, .fasync = fuse_dev_fasync, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4787ae6..3cdc5f7 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1156,10 +1156,9 @@ static int fuse_dir_release(struct inode *inode, struct file *file) return 0; } -static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) +static int fuse_dir_fsync(struct file *file, int datasync) { - /* nfsd can call this with no file */ - return file ? fuse_fsync_common(file, de, datasync, 1) : 0; + return fuse_fsync_common(file, datasync, 1); } static bool update_mtime(unsigned ivalid) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a9f5e13..ada0ade 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -351,10 +351,9 @@ static void fuse_sync_writes(struct inode *inode) fuse_release_nowrite(inode); } -int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, - int isdir) +int fuse_fsync_common(struct file *file, int datasync, int isdir) { - struct inode *inode = de->d_inode; + struct inode *inode = file->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; struct fuse_req *req; @@ -403,9 +402,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, return err; } -static int fuse_fsync(struct file *file, struct dentry *de, int datasync) +static int fuse_fsync(struct file *file, int datasync) { - return fuse_fsync_common(file, de, datasync, 0); + return fuse_fsync_common(file, datasync, 0); } void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, @@ -517,17 +516,26 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) int i; size_t count = req->misc.read.in.size; size_t num_read = req->out.args[0].size; - struct inode *inode = req->pages[0]->mapping->host; + struct address_space *mapping = NULL; - /* - * Short read means EOF. If file size is larger, truncate it - */ - if (!req->out.h.error && num_read < count) { - loff_t pos = page_offset(req->pages[0]) + num_read; - fuse_read_update_size(inode, pos, req->misc.read.attr_ver); - } + for (i = 0; mapping == NULL && i < req->num_pages; i++) + mapping = req->pages[i]->mapping; - fuse_invalidate_attr(inode); /* atime changed */ + if (mapping) { + struct inode *inode = mapping->host; + + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (!req->out.h.error && num_read < count) { + loff_t pos; + + pos = page_offset(req->pages[0]) + num_read; + fuse_read_update_size(inode, pos, + req->misc.read.attr_ver); + } + fuse_invalidate_attr(inode); /* atime changed */ + } for (i = 0; i < req->num_pages; i++) { struct page *page = req->pages[i]; @@ -536,6 +544,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) else SetPageError(page); unlock_page(page); + page_cache_release(page); } if (req->ff) fuse_file_put(req->ff); @@ -550,6 +559,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file) req->out.argpages = 1; req->out.page_zeroing = 1; + req->out.page_replace = 1; fuse_read_fill(req, file, pos, count, FUSE_READ); req->misc.read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { @@ -589,6 +599,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) return PTR_ERR(req); } } + page_cache_get(page); req->pages[req->num_pages] = page; req->num_pages++; return 0; @@ -994,10 +1005,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); - down_read(¤t->mm->mmap_sem); - npages = get_user_pages(current, current->mm, user_addr, npages, !write, - 0, req->pages, NULL); - up_read(¤t->mm->mmap_sem); + npages = get_user_pages_fast(user_addr, npages, !write, req->pages); if (npages < 0) return npages; @@ -1580,9 +1588,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, while (iov_iter_count(&ii)) { struct page *page = pages[page_idx++]; size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii)); - void *kaddr, *map; + void *kaddr; - kaddr = map = kmap(page); + kaddr = kmap(page); while (todo) { char __user *uaddr = ii.iov->iov_base + ii.iov_offset; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 01cc462..8f309f0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -177,6 +177,9 @@ struct fuse_out { /** Zero partially or not copied pages */ unsigned page_zeroing:1; + /** Pages may be replaced with new ones */ + unsigned page_replace:1; + /** Number or arguments */ unsigned numargs; @@ -568,8 +571,7 @@ void fuse_release_common(struct file *file, int opcode); /** * Send FSYNC or FSYNCDIR request */ -int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, - int isdir); +int fuse_fsync_common(struct file *file, int datasync, int isdir); /** * Notify poll wakeup diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index a739a0a..9f8b525 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -700,8 +700,14 @@ out: return 0; page_cache_release(page); + + /* + * XXX(hch): the call below should probably be replaced with + * a call to the gfs2-specific truncate blocks helper to actually + * release disk blocks.. + */ if (pos + len > ip->i_inode.i_size) - vmtruncate(&ip->i_inode, ip->i_inode.i_size); + simple_setsize(&ip->i_inode, ip->i_inode.i_size); out_endtrans: gfs2_trans_end(sdp); out_trans_fail: diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index b20bfcc..ed9a94f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -554,9 +554,9 @@ static int gfs2_close(struct inode *inode, struct file *file) * Returns: errno */ -static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) +static int gfs2_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); int ret = 0; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 4e64352..98cdd05 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1071,6 +1071,9 @@ int gfs2_permission(struct inode *inode, int mask) return error; } +/* + * XXX: should be changed to have proper ordering by opencoding simple_setsize + */ static int setattr_size(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1081,7 +1084,7 @@ static int setattr_size(struct inode *inode, struct iattr *attr) error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (error) return error; - error = vmtruncate(inode, attr->ia_size); + error = simple_setsize(inode, attr->ia_size); gfs2_trans_end(sdp); if (error) return error; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 3a029d8..87ac189 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -411,9 +411,9 @@ int hostfs_file_open(struct inode *ino, struct file *file) return 0; } -int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int hostfs_fsync(struct file *file, int datasync) { - return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync); + return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync); } static const struct file_operations hostfs_file_fops = { diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 3efabff..a9ae9bf 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -19,9 +19,9 @@ static int hpfs_file_release(struct inode *inode, struct file *file) return 0; } -int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) +int hpfs_file_fsync(struct file *file, int datasync) { - /*return file_fsync(file, dentry);*/ + /*return file_fsync(file, datasync);*/ return 0; /* Don't fsync :-) */ } diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 97bf738..75f9d43 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *, /* file.c */ -int hpfs_file_fsync(struct file *, struct dentry *, int); +int hpfs_file_fsync(struct file *, int); extern const struct file_operations hpfs_file_ops; extern const struct inode_operations hpfs_file_iops; extern const struct address_space_operations hpfs_aops; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 2e4dfa8..826c3f9 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -587,7 +587,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) return err; } -static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) +static int hppfs_fsync(struct file *file, int datasync) { return 0; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a0bbd3d..a4e9a7e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -688,7 +688,7 @@ static void init_once(void *foo) const struct file_operations hugetlbfs_file_operations = { .read = hugetlbfs_read, .mmap = hugetlbfs_file_mmap, - .fsync = simple_sync_file, + .fsync = noop_fsync, .get_unmapped_area = hugetlb_get_unmapped_area, }; diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index a33aab6..54a92fd 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) if (inode->i_mode != mode) { struct iattr attr; - attr.ia_valid = ATTR_MODE; + attr.ia_valid = ATTR_MODE | ATTR_CTIME; attr.ia_mode = mode; + attr.ia_ctime = CURRENT_TIME_SEC; rc = jffs2_do_setattr(inode, &attr); if (rc < 0) return rc; diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 7aa4417..166062a 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); jffs2_free_raw_inode(ri); - d_instantiate(dentry, inode); D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->pino_nlink, inode->i_mapping->nrpages)); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; fail: make_bad_inode(inode); + unlock_new_inode(inode); iput(inode); jffs2_free_raw_inode(ri); return ret; @@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char /* Eeek. Wave bye bye */ mutex_unlock(&f->sem); jffs2_complete_reservation(c); - jffs2_clear_inode(inode); - return PTR_ERR(fn); + ret = PTR_ERR(fn); + goto fail; } /* We use f->target field to store the target path. */ @@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1); mutex_unlock(&f->sem); jffs2_complete_reservation(c); - jffs2_clear_inode(inode); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } memcpy(f->target, target, targetlen + 1); @@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char jffs2_complete_reservation(c); ret = jffs2_init_security(inode, dir_i); - if (ret) { - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; + ret = jffs2_init_acl_post(inode); - if (ret) { - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); - if (ret) { - /* Eep. */ - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; rd = jffs2_alloc_raw_dirent(); if (!rd) { /* Argh. Now we treat it like a normal delete */ jffs2_complete_reservation(c); - jffs2_clear_inode(inode); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } dir_f = JFFS2_INODE_INFO(dir_i); @@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char jffs2_complete_reservation(c); jffs2_free_raw_dirent(rd); mutex_unlock(&dir_f->sem); - jffs2_clear_inode(inode); - return PTR_ERR(fd); + ret = PTR_ERR(fd); + goto fail; } dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); @@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char jffs2_complete_reservation(c); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; + + fail: + make_bad_inode(inode); + unlock_new_inode(inode); + iput(inode); + return ret; } @@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) /* Eeek. Wave bye bye */ mutex_unlock(&f->sem); jffs2_complete_reservation(c); - jffs2_clear_inode(inode); - return PTR_ERR(fn); + ret = PTR_ERR(fn); + goto fail; } /* No data here. Only a metadata node, which will be obsoleted by the first data write @@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) jffs2_complete_reservation(c); ret = jffs2_init_security(inode, dir_i); - if (ret) { - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; + ret = jffs2_init_acl_post(inode); - if (ret) { - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); - if (ret) { - /* Eep. */ - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; rd = jffs2_alloc_raw_dirent(); if (!rd) { /* Argh. Now we treat it like a normal delete */ jffs2_complete_reservation(c); - jffs2_clear_inode(inode); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } dir_f = JFFS2_INODE_INFO(dir_i); @@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) jffs2_complete_reservation(c); jffs2_free_raw_dirent(rd); mutex_unlock(&dir_f->sem); - jffs2_clear_inode(inode); - return PTR_ERR(fd); + ret = PTR_ERR(fd); + goto fail; } dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); @@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) jffs2_complete_reservation(c); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; + + fail: + make_bad_inode(inode); + unlock_new_inode(inode); + iput(inode); + return ret; } static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) @@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de /* Eeek. Wave bye bye */ mutex_unlock(&f->sem); jffs2_complete_reservation(c); - jffs2_clear_inode(inode); - return PTR_ERR(fn); + ret = PTR_ERR(fn); + goto fail; } /* No data here. Only a metadata node, which will be obsoleted by the first data write @@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de jffs2_complete_reservation(c); ret = jffs2_init_security(inode, dir_i); - if (ret) { - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; + ret = jffs2_init_acl_post(inode); - if (ret) { - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); - if (ret) { - /* Eep. */ - jffs2_clear_inode(inode); - return ret; - } + if (ret) + goto fail; rd = jffs2_alloc_raw_dirent(); if (!rd) { /* Argh. Now we treat it like a normal delete */ jffs2_complete_reservation(c); - jffs2_clear_inode(inode); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } dir_f = JFFS2_INODE_INFO(dir_i); @@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de jffs2_complete_reservation(c); jffs2_free_raw_dirent(rd); mutex_unlock(&dir_f->sem); - jffs2_clear_inode(inode); - return PTR_ERR(fd); + ret = PTR_ERR(fd); + goto fail; } dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); @@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de jffs2_complete_reservation(c); d_instantiate(dentry, inode); - + unlock_new_inode(inode); return 0; + + fail: + make_bad_inode(inode); + unlock_new_inode(inode); + iput(inode); + return ret; } static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index e7291c1..8134970 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -26,9 +26,9 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page **pagep, void **fsdata); static int jffs2_readpage (struct file *filp, struct page *pg); -int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync) +int jffs2_fsync(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); /* Trigger GC to flush any pending writes for this inode */ diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 86e0821..459d39d 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -169,13 +169,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) mutex_unlock(&f->sem); jffs2_complete_reservation(c); - /* We have to do the vmtruncate() without f->sem held, since + /* We have to do the simple_setsize() without f->sem held, since some pages may be locked and waiting for it in readpage(). We are protected from a simultaneous write() extending i_size back past iattr->ia_size, because do_truncate() holds the generic inode semaphore. */ if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { - vmtruncate(inode, iattr->ia_size); + simple_setsize(inode, iattr->ia_size); inode->i_blocks = (inode->i_size + 511) >> 9; } @@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i inode->i_blocks = 0; inode->i_size = 0; - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + make_bad_inode(inode); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(-EINVAL); + } return inode; } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 035a767..4791aac 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations; extern const struct file_operations jffs2_file_operations; extern const struct inode_operations jffs2_file_inode_operations; extern const struct address_space_operations jffs2_file_address_operations; -int jffs2_fsync(struct file *, struct dentry *, int); +int jffs2_fsync(struct file *, int); int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); /* ioctl.c */ diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 85d9ec6..127263c 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -27,9 +27,9 @@ #include "jfs_acl.h" #include "jfs_debug.h" -int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int jfs_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int rc = 0; if (!(inode->i_state & I_DIRTY) || diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 9e6bda3..11042b1 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -21,7 +21,7 @@ struct fid; extern struct inode *ialloc(struct inode *, umode_t); -extern int jfs_fsync(struct file *, struct dentry *, int); +extern int jfs_fsync(struct file *, int); extern long jfs_ioctl(struct file *, unsigned int, unsigned long); extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b66832a..b38f96b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -179,6 +179,8 @@ static void jfs_put_super(struct super_block *sb) jfs_info("In jfs_put_super"); + dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + lock_kernel(); rc = jfs_umount(sb); @@ -396,10 +398,20 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) JFS_SBI(sb)->flag = flag; ret = jfs_mount_rw(sb, 1); + + /* mark the fs r/w for quota activity */ + sb->s_flags &= ~MS_RDONLY; + unlock_kernel(); + dquot_resume(sb, -1); return ret; } if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { + rc = dquot_suspend(sb, -1); + if (rc < 0) { + unlock_kernel(); + return rc; + } rc = jfs_umount_rw(sb); JFS_SBI(sb)->flag = flag; unlock_kernel(); @@ -469,6 +481,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) */ sb->s_op = &jfs_super_operations; sb->s_export_op = &jfs_export_operations; +#ifdef CONFIG_QUOTA + sb->dq_op = &dquot_operations; + sb->s_qcop = &dquot_quotactl_ops; +#endif /* * Initialize direct-mapping inode/address-space @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/mount.h> #include <linux/vfs.h> +#include <linux/quotaops.h> #include <linux/mutex.h> #include <linux/exportfs.h> #include <linux/writeback.h> @@ -58,11 +59,6 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na return NULL; } -int simple_sync_file(struct file * file, struct dentry *dentry, int datasync) -{ - return 0; -} - int dcache_dir_open(struct inode *inode, struct file *file) { static struct qstr cursor_name = {.len = 1, .name = "."}; @@ -190,7 +186,7 @@ const struct file_operations simple_dir_operations = { .llseek = dcache_dir_lseek, .read = generic_read_dir, .readdir = dcache_readdir, - .fsync = simple_sync_file, + .fsync = noop_fsync, }; const struct inode_operations simple_dir_inode_operations = { @@ -330,6 +326,81 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; } +/** + * simple_setsize - handle core mm and vfs requirements for file size change + * @inode: inode + * @newsize: new file size + * + * Returns 0 on success, -error on failure. + * + * simple_setsize must be called with inode_mutex held. + * + * simple_setsize will check that the requested new size is OK (see + * inode_newsize_ok), and then will perform the necessary i_size update + * and pagecache truncation (if necessary). It will be typically be called + * from the filesystem's setattr function when ATTR_SIZE is passed in. + * + * The inode itself must have correct permissions and attributes to allow + * i_size to be changed, this function then just checks that the new size + * requested is valid. + * + * In the case of simple in-memory filesystems with inodes stored solely + * in the inode cache, and file data in the pagecache, nothing more needs + * to be done to satisfy a truncate request. Filesystems with on-disk + * blocks for example will need to free them in the case of truncate, in + * that case it may be easier not to use simple_setsize (but each of its + * components will likely be required at some point to update pagecache + * and inode etc). + */ +int simple_setsize(struct inode *inode, loff_t newsize) +{ + loff_t oldsize; + int error; + + error = inode_newsize_ok(inode, newsize); + if (error) + return error; + + oldsize = inode->i_size; + i_size_write(inode, newsize); + truncate_pagecache(inode, oldsize, newsize); + + return error; +} +EXPORT_SYMBOL(simple_setsize); + +/** + * simple_setattr - setattr for simple in-memory filesystem + * @dentry: dentry + * @iattr: iattr structure + * + * Returns 0 on success, -error on failure. + * + * simple_setattr implements setattr for an in-memory filesystem which + * does not store its own file data or metadata (eg. uses the page cache + * and inode cache as its data store). + */ +int simple_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, iattr); + if (error) + return error; + + if (iattr->ia_valid & ATTR_SIZE) { + error = simple_setsize(inode, iattr->ia_size); + if (error) + return error; + } + + generic_setattr(inode, iattr); + + return error; +} +EXPORT_SYMBOL(simple_setattr); + int simple_readpage(struct file *file, struct page *page) { clear_highpage(page); @@ -418,7 +489,8 @@ int simple_write_end(struct file *file, struct address_space *mapping, * unique inode values later for this filesystem, then you must take care * to pass it an appropriate max_reserved value to avoid collisions. */ -int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) +int simple_fill_super(struct super_block *s, unsigned long magic, + struct tree_descr *files) { struct inode *inode; struct dentry *root; @@ -851,13 +923,22 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, } EXPORT_SYMBOL_GPL(generic_fh_to_parent); -int simple_fsync(struct file *file, struct dentry *dentry, int datasync) +/** + * generic_file_fsync - generic fsync implementation for simple filesystems + * @file: file to synchronize + * @datasync: only synchronize essential metadata if true + * + * This is a generic implementation of the fsync method for simple + * filesystems which track all non-inode metadata in the buffers list + * hanging off the address_space structure. + */ +int generic_file_fsync(struct file *file, int datasync) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 0, /* metadata-only; caller takes care of data */ }; - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int err; int ret; @@ -872,7 +953,15 @@ int simple_fsync(struct file *file, struct dentry *dentry, int datasync) ret = err; return ret; } -EXPORT_SYMBOL(simple_fsync); +EXPORT_SYMBOL(generic_file_fsync); + +/* + * No-op implementation of ->fsync for in-memory filesystems. + */ +int noop_fsync(struct file *file, int datasync) +{ + return 0; +} EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); @@ -895,7 +984,7 @@ EXPORT_SYMBOL(simple_release_fs); EXPORT_SYMBOL(simple_rename); EXPORT_SYMBOL(simple_rmdir); EXPORT_SYMBOL(simple_statfs); -EXPORT_SYMBOL(simple_sync_file); +EXPORT_SYMBOL(noop_fsync); EXPORT_SYMBOL(simple_unlink); EXPORT_SYMBOL(simple_read_from_buffer); EXPORT_SYMBOL(simple_write_to_buffer); diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 0de5240..abe1caf 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -219,9 +219,9 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, } } -int logfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int logfs_fsync(struct file *file, int datasync) { - struct super_block *sb = dentry->d_inode->i_sb; + struct super_block *sb = file->f_mapping->host->i_sb; logfs_write_anchor(sb); return 0; diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 1a9db84..c838c4d 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -506,7 +506,7 @@ extern const struct address_space_operations logfs_reg_aops; int logfs_readpage(struct file *file, struct page *page); int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); -int logfs_fsync(struct file *file, struct dentry *dentry, int datasync); +int logfs_fsync(struct file *file, int datasync); /* gc.c */ u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec); diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 6198731..1dbf921 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -22,7 +22,7 @@ const struct file_operations minix_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = minix_readdir, - .fsync = simple_fsync, + .fsync = generic_file_fsync, }; static inline void dir_put_page(struct page *page) @@ -72,16 +72,9 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n) { struct address_space *mapping = dir->i_mapping; struct page *page = read_mapping_page(mapping, n, NULL); - if (!IS_ERR(page)) { + if (!IS_ERR(page)) kmap(page); - if (!PageUptodate(page)) - goto fail; - } return page; - -fail: - dir_put_page(page); - return ERR_PTR(-EIO); } static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi) diff --git a/fs/minix/file.c b/fs/minix/file.c index 3eec3e6..d5320ff 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -19,7 +19,7 @@ const struct file_operations minix_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = simple_fsync, + .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index f230109..13487ad 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -20,6 +20,9 @@ static inline block_t *i_data(struct inode *inode) return (block_t *)minix_i(inode)->u.i2_data; } +#define DIRCOUNT 7 +#define INDIRCOUNT(sb) (1 << ((sb)->s_blocksize_bits - 2)) + static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) { int n = 0; @@ -34,21 +37,21 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %s\n", block, bdevname(sb->s_bdev, b)); - } else if (block < 7) { + } else if (block < DIRCOUNT) { offsets[n++] = block; - } else if ((block -= 7) < 256) { - offsets[n++] = 7; + } else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) { + offsets[n++] = DIRCOUNT; offsets[n++] = block; - } else if ((block -= 256) < 256*256) { - offsets[n++] = 8; - offsets[n++] = block>>8; - offsets[n++] = block & 255; + } else if ((block -= INDIRCOUNT(sb)) < INDIRCOUNT(sb) * INDIRCOUNT(sb)) { + offsets[n++] = DIRCOUNT + 1; + offsets[n++] = block / INDIRCOUNT(sb); + offsets[n++] = block % INDIRCOUNT(sb); } else { - block -= 256*256; - offsets[n++] = 9; - offsets[n++] = block>>16; - offsets[n++] = (block>>8) & 255; - offsets[n++] = block & 255; + block -= INDIRCOUNT(sb) * INDIRCOUNT(sb); + offsets[n++] = DIRCOUNT + 2; + offsets[n++] = (block / INDIRCOUNT(sb)) / INDIRCOUNT(sb); + offsets[n++] = (block / INDIRCOUNT(sb)) % INDIRCOUNT(sb); + offsets[n++] = block % INDIRCOUNT(sb); } return n; } @@ -1621,6 +1621,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, case LAST_DOTDOT: follow_dotdot(nd); dir = nd->path.dentry; + case LAST_DOT: if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { if (!dir->d_op->d_revalidate(dir, nd)) { error = -ESTALE; @@ -1628,7 +1629,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } } /* fallthrough */ - case LAST_DOT: case LAST_ROOT: if (open_flag & O_CREAT) goto exit; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index b938708..3639cc5 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -22,7 +22,7 @@ #include <linux/ncp_fs.h> #include "ncplib_kernel.h" -static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync) +static int ncp_fsync(struct file *file, int datasync) { return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index db64854..782b431 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -53,7 +53,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *); static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -static int nfs_fsync_dir(struct file *, struct dentry *, int); +static int nfs_fsync_dir(struct file *, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); const struct file_operations nfs_dir_operations = { @@ -641,8 +641,10 @@ out: * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. */ -static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) +static int nfs_fsync_dir(struct file *filp, int datasync) { + struct dentry *dentry = filp->f_path.dentry; + dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cac96bc..36a5e74 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -53,7 +53,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); -static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync); +static int nfs_file_fsync(struct file *, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); @@ -322,8 +322,9 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * whether any write errors occurred for this process. */ static int -nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) +nfs_file_fsync(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 12f7109..4a27347 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4122,8 +4122,8 @@ nfs4_state_shutdown(void) nfs4_lock_state(); nfs4_release_reclaim(); __nfs4_state_shutdown(); - nfsd4_destroy_callback_queue(); nfs4_unlock_state(); + nfsd4_destroy_callback_queue(); } /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ebbf3b6..3c11112 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, if (size_change) put_write_access(inode); if (!err) - if (EX_ISSYNC(fhp->fh_export)) - write_inode_now(inode, 1); + commit_metadata(fhp); out: return err; diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index af638d5..43c8c5b 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -75,8 +75,6 @@ struct nilfs_btree_path { extern struct kmem_cache *nilfs_btree_path_cache; -int nilfs_btree_path_cache_init(void); -void nilfs_btree_path_cache_destroy(void); int nilfs_btree_init(struct nilfs_bmap *); int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, const __u64 *, const __u64 *, int); diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 30292df..c9a30d7 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -27,7 +27,7 @@ #include "nilfs.h" #include "segment.h" -int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) +int nilfs_sync_file(struct file *file, int datasync) { /* * Called from fsync() system call @@ -37,7 +37,7 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) * This function should be implemented when the writeback function * will be implemented. */ - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int err; if (!nilfs_inode_dirty(inode)) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 8723e5b..47d6d79 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -228,7 +228,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, struct page *, struct inode *); /* file.c */ -extern int nilfs_sync_file(struct file *, struct dentry *, int); +extern int nilfs_sync_file(struct file *, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index fdf1c3b..85fbb66 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -127,8 +127,6 @@ struct nilfs_segment_buffer { extern struct kmem_cache *nilfs_segbuf_cachep; -int __init nilfs_init_segbuf_cache(void); -void nilfs_destroy_segbuf_cache(void); struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); void nilfs_segbuf_free(struct nilfs_segment_buffer *); void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index dca1423..01e20db 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -221,8 +221,6 @@ enum { extern struct kmem_cache *nilfs_transaction_cachep; /* segment.c */ -extern int nilfs_init_transaction_cache(void); -extern void nilfs_destroy_transaction_cache(void); extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 03b34b7..414ef68 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1130,13 +1130,13 @@ static void nilfs_segbuf_init_once(void *obj) static void nilfs_destroy_cachep(void) { - if (nilfs_inode_cachep) + if (nilfs_inode_cachep) kmem_cache_destroy(nilfs_inode_cachep); - if (nilfs_transaction_cachep) + if (nilfs_transaction_cachep) kmem_cache_destroy(nilfs_transaction_cachep); - if (nilfs_segbuf_cachep) + if (nilfs_segbuf_cachep) kmem_cache_destroy(nilfs_segbuf_cachep); - if (nilfs_btree_path_cache) + if (nilfs_btree_path_cache) kmem_cache_destroy(nilfs_btree_path_cache); } diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index fe44d3f..0f48e7c 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1527,10 +1527,9 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) * this problem for now. We do write the $BITMAP attribute if it is present * which is the important one for a directory so things are not too bad. */ -static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int ntfs_dir_fsync(struct file *filp, int datasync) { - struct inode *bmp_vi, *vi = dentry->d_inode; + struct inode *bmp_vi, *vi = filp->f_mapping->host; int err, ret; ntfs_attr na; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index a1924a0..113ebd9 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2133,7 +2133,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, /** * ntfs_file_fsync - sync a file to disk * @filp: file to be synced - * @dentry: dentry describing the file to sync * @datasync: if non-zero only flush user data and not metadata * * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync @@ -2149,19 +2148,15 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, * Also, if @datasync is true, we do not wait on the inode to be written out * but we always wait on the page cache pages to be written out. * - * Note: In the past @filp could be NULL so we ignore it as we don't need it - * anyway. - * * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore * this problem for now. */ -static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int ntfs_file_fsync(struct file *filp, int datasync) { - struct inode *vi = dentry->d_inode; + struct inode *vi = filp->f_mapping->host; int err, ret = 0; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 97e54b9..6a13ea6 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -175,13 +175,12 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file) return 0; } -static int ocfs2_sync_file(struct file *file, - struct dentry *dentry, - int datasync) +static int ocfs2_sync_file(struct file *file, int datasync) { int err = 0; journal_t *journal; - struct inode *inode = dentry->d_inode; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = file->f_mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, @@ -1053,7 +1052,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } /* - * This will intentionally not wind up calling vmtruncate(), + * This will intentionally not wind up calling simple_setsize(), * since all the work for a size change has been done above. * Otherwise, we could get into problems with truncate as * ip_alloc_sem is used there to protect against i_size @@ -2119,9 +2118,13 @@ relock: * direct write may have instantiated a few * blocks outside i_size. Trim these off again. * Don't need i_size_read because we hold i_mutex. + * + * XXX(hch): this looks buggy because ocfs2 did not + * actually implement ->truncate. Take a look at + * the new truncate sequence and update this accordingly */ if (*ppos + count > inode->i_size) - vmtruncate(inode, inode->i_size); + simple_setsize(inode, inode->i_size); ret = written; goto out_dio; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2c26ce2..0eaa929 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -879,13 +879,15 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) continue; if (unsuspend) - status = vfs_quota_enable( - sb_dqopt(sb)->files[type], - type, QFMT_OCFS2, - DQUOT_SUSPENDED); - else - status = vfs_quota_disable(sb, type, - DQUOT_SUSPENDED); + status = dquot_resume(sb, type); + else { + struct ocfs2_mem_dqinfo *oinfo; + + /* Cancel periodic syncing before suspending */ + oinfo = sb_dqinfo(sb, type)->dqi_priv; + cancel_delayed_work_sync(&oinfo->dqi_sync_work); + status = dquot_suspend(sb, type); + } if (status < 0) break; } @@ -916,8 +918,8 @@ static int ocfs2_enable_quotas(struct ocfs2_super *osb) status = -ENOENT; goto out_quota_off; } - status = vfs_quota_enable(inode[type], type, QFMT_OCFS2, - DQUOT_USAGE_ENABLED); + status = dquot_enable(inode[type], type, QFMT_OCFS2, + DQUOT_USAGE_ENABLED); if (status < 0) goto out_quota_off; } @@ -952,8 +954,8 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) /* Turn off quotas. This will remove all dquot structures from * memory and so they will be automatically synced to global * quota files */ - vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED | - DQUOT_LIMITS_ENABLED); + dquot_disable(sb, type, DQUOT_USAGE_ENABLED | + DQUOT_LIMITS_ENABLED); if (!inode) continue; iput(inode); @@ -962,7 +964,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) /* Handle quota on quotactl */ static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount) + char *path) { unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; @@ -970,30 +972,24 @@ static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) return -EINVAL; - if (remount) - return 0; /* Just ignore it has been handled in - * ocfs2_remount() */ - return vfs_quota_enable(sb_dqopt(sb)->files[type], type, - format_id, DQUOT_LIMITS_ENABLED); + return dquot_enable(sb_dqopt(sb)->files[type], type, + format_id, DQUOT_LIMITS_ENABLED); } /* Handle quota off quotactl */ -static int ocfs2_quota_off(struct super_block *sb, int type, int remount) +static int ocfs2_quota_off(struct super_block *sb, int type) { - if (remount) - return 0; /* Ignore now and handle later in - * ocfs2_remount() */ - return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); + return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); } static const struct quotactl_ops ocfs2_quotactl_ops = { .quota_on = ocfs2_quota_on, .quota_off = ocfs2_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, }; static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 399487c..6e7a329 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -329,7 +329,7 @@ const struct file_operations omfs_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = simple_fsync, + .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, }; @@ -26,9 +26,14 @@ /* * The max size that a non-root user is allowed to grow the pipe. Can - * be set by root in /proc/sys/fs/pipe-max-pages + * be set by root in /proc/sys/fs/pipe-max-size */ -unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; +unsigned int pipe_max_size = 1048576; + +/* + * Minimum pipe size, as required by POSIX + */ +unsigned int pipe_min_size = PAGE_SIZE; /* * We use a start+len construction, which provides full use of the @@ -230,6 +235,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, return kmap(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_map); /** * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer @@ -249,6 +255,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, } else kunmap(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_unmap); /** * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer @@ -279,6 +286,7 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, return 1; } +EXPORT_SYMBOL(generic_pipe_buf_steal); /** * generic_pipe_buf_get - get a reference to a &struct pipe_buffer @@ -294,6 +302,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { page_cache_get(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_get); /** * generic_pipe_buf_confirm - verify contents of the pipe buffer @@ -309,6 +318,7 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info, { return 0; } +EXPORT_SYMBOL(generic_pipe_buf_confirm); /** * generic_pipe_buf_release - put a reference to a &struct pipe_buffer @@ -323,6 +333,7 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, { page_cache_release(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_release); static const struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, @@ -1112,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) * Allocate a new array of pipe buffers and copy the info over. Returns the * pipe size if successful, or return -ERROR on error. */ -static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) +static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) { struct pipe_buffer *bufs; /* - * Must be a power-of-2 currently - */ - if (!is_power_of_2(arg)) - return -EINVAL; - - /* * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't * expect a lot of shrink+grow operations, just free and allocate * again like we would do for growing. If the pipe currently * contains more buffers than arg, then return busy. */ - if (arg < pipe->nrbufs) + if (nr_pages < pipe->nrbufs) return -EBUSY; - bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); + bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); if (unlikely(!bufs)) return -ENOMEM; @@ -1140,20 +1145,56 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) * and adjust the indexes. */ if (pipe->nrbufs) { - const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); - const unsigned int head = pipe->nrbufs - tail; + unsigned int tail; + unsigned int head; + tail = pipe->curbuf + pipe->nrbufs; + if (tail < pipe->buffers) + tail = 0; + else + tail &= (pipe->buffers - 1); + + head = pipe->nrbufs - tail; if (head) memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); if (tail) - memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); + memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer)); } pipe->curbuf = 0; kfree(pipe->bufs); pipe->bufs = bufs; - pipe->buffers = arg; - return arg; + pipe->buffers = nr_pages; + return nr_pages * PAGE_SIZE; +} + +/* + * Currently we rely on the pipe array holding a power-of-2 number + * of pages. + */ +static inline unsigned int round_pipe_size(unsigned int size) +{ + unsigned long nr_pages; + + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; +} + +/* + * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax + * will return an error. + */ +int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, + size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); + if (ret < 0 || !write) + return ret; + + pipe_max_size = round_pipe_size(pipe_max_size); + return ret; } long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1168,25 +1209,32 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) mutex_lock(&pipe->inode->i_mutex); switch (cmd) { - case F_SETPIPE_SZ: - if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) - return -EINVAL; - /* - * The pipe needs to be at least 2 pages large to - * guarantee POSIX behaviour. - */ - if (arg < 2) - return -EINVAL; - ret = pipe_set_size(pipe, arg); + case F_SETPIPE_SZ: { + unsigned int size, nr_pages; + + size = round_pipe_size(arg); + nr_pages = size >> PAGE_SHIFT; + + ret = -EINVAL; + if (!nr_pages) + goto out; + + if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { + ret = -EPERM; + goto out; + } + ret = pipe_set_size(pipe, nr_pages); break; + } case F_GETPIPE_SZ: - ret = pipe->buffers; + ret = pipe->buffers * PAGE_SIZE; break; default: ret = -EINVAL; break; } +out: mutex_unlock(&pipe->inode->i_mutex); return ret; } diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 3d3fd46..6e8fc62 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -80,7 +80,7 @@ const struct file_operations qnx4_dir_operations = .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = qnx4_readdir, - .fsync = simple_fsync, + .fsync = generic_file_fsync, }; const struct inode_operations qnx4_dir_inode_operations = diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 1ad8bf0..12c233d 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -228,10 +228,6 @@ static struct hlist_head *dquot_hash; struct dqstats dqstats; EXPORT_SYMBOL(dqstats); -#ifdef CONFIG_SMP -struct dqstats *dqstats_pcpu; -EXPORT_SYMBOL(dqstats_pcpu); -#endif static qsize_t inode_get_rsv_space(struct inode *inode); static void __dquot_initialize(struct inode *inode, int type); @@ -584,7 +580,7 @@ out: } EXPORT_SYMBOL(dquot_scan_active); -int vfs_quota_sync(struct super_block *sb, int type, int wait) +int dquot_quota_sync(struct super_block *sb, int type, int wait) { struct list_head *dirty; struct dquot *dquot; @@ -656,7 +652,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) return 0; } -EXPORT_SYMBOL(vfs_quota_sync); +EXPORT_SYMBOL(dquot_quota_sync); /* Free unused dquots from cache */ static void prune_dqcache(int count) @@ -676,27 +672,10 @@ static void prune_dqcache(int count) } } -static int dqstats_read(unsigned int type) -{ - int count = 0; -#ifdef CONFIG_SMP - int cpu; - for_each_possible_cpu(cpu) - count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type]; - /* Statistics reading is racy, but absolute accuracy isn't required */ - if (count < 0) - count = 0; -#else - count = dqstats.stat[type]; -#endif - return count; -} - /* * This is called from kswapd when we think we need some * more memory */ - static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) { if (nr) { @@ -704,7 +683,9 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) prune_dqcache(nr); spin_unlock(&dq_list_lock); } - return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure; + return ((unsigned) + percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]) + /100) * sysctl_vfs_cache_pressure; } static struct shrinker dqcache_shrinker = { @@ -1815,7 +1796,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) - transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA); + transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA); ret = __dquot_transfer(inode, transfer_to); dqput_all(transfer_to); @@ -1850,6 +1831,7 @@ const struct dquot_operations dquot_operations = { .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, }; +EXPORT_SYMBOL(dquot_operations); /* * Generic helper for ->open on filesystems supporting disk quotas. @@ -1868,7 +1850,7 @@ EXPORT_SYMBOL(dquot_file_open); /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ -int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags) +int dquot_disable(struct super_block *sb, int type, unsigned int flags) { int cnt, ret = 0; struct quota_info *dqopt = sb_dqopt(sb); @@ -1998,14 +1980,15 @@ put_inodes: } return ret; } -EXPORT_SYMBOL(vfs_quota_disable); +EXPORT_SYMBOL(dquot_disable); -int vfs_quota_off(struct super_block *sb, int type, int remount) +int dquot_quota_off(struct super_block *sb, int type) { - return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED : - (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED)); + return dquot_disable(sb, type, + DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); } -EXPORT_SYMBOL(vfs_quota_off); +EXPORT_SYMBOL(dquot_quota_off); + /* * Turn quotas on on a device */ @@ -2123,36 +2106,43 @@ out_fmt: } /* Reenable quotas on remount RW */ -static int vfs_quota_on_remount(struct super_block *sb, int type) +int dquot_resume(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); struct inode *inode; - int ret; + int ret = 0, cnt; unsigned int flags; - mutex_lock(&dqopt->dqonoff_mutex); - if (!sb_has_quota_suspended(sb, type)) { + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + + mutex_lock(&dqopt->dqonoff_mutex); + if (!sb_has_quota_suspended(sb, cnt)) { + mutex_unlock(&dqopt->dqonoff_mutex); + continue; + } + inode = dqopt->files[cnt]; + dqopt->files[cnt] = NULL; + spin_lock(&dq_state_lock); + flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | + DQUOT_LIMITS_ENABLED, + cnt); + dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt); + spin_unlock(&dq_state_lock); mutex_unlock(&dqopt->dqonoff_mutex); - return 0; - } - inode = dqopt->files[type]; - dqopt->files[type] = NULL; - spin_lock(&dq_state_lock); - flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | - DQUOT_LIMITS_ENABLED, type); - dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type); - spin_unlock(&dq_state_lock); - mutex_unlock(&dqopt->dqonoff_mutex); - flags = dquot_generic_flag(flags, type); - ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id, - flags); - iput(inode); + flags = dquot_generic_flag(flags, cnt); + ret = vfs_load_quota_inode(inode, cnt, + dqopt->info[cnt].dqi_fmt_id, flags); + iput(inode); + } return ret; } +EXPORT_SYMBOL(dquot_resume); -int vfs_quota_on_path(struct super_block *sb, int type, int format_id, +int dquot_quota_on_path(struct super_block *sb, int type, int format_id, struct path *path) { int error = security_quota_on(path->dentry); @@ -2167,40 +2157,36 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id, DQUOT_LIMITS_ENABLED); return error; } -EXPORT_SYMBOL(vfs_quota_on_path); +EXPORT_SYMBOL(dquot_quota_on_path); -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name, - int remount) +int dquot_quota_on(struct super_block *sb, int type, int format_id, char *name) { struct path path; int error; - if (remount) - return vfs_quota_on_remount(sb, type); - error = kern_path(name, LOOKUP_FOLLOW, &path); if (!error) { - error = vfs_quota_on_path(sb, type, format_id, &path); + error = dquot_quota_on_path(sb, type, format_id, &path); path_put(&path); } return error; } -EXPORT_SYMBOL(vfs_quota_on); +EXPORT_SYMBOL(dquot_quota_on); /* * More powerful function for turning on quotas allowing setting * of individual quota flags */ -int vfs_quota_enable(struct inode *inode, int type, int format_id, - unsigned int flags) +int dquot_enable(struct inode *inode, int type, int format_id, + unsigned int flags) { int ret = 0; struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); /* Just unsuspend quotas? */ - if (flags & DQUOT_SUSPENDED) - return vfs_quota_on_remount(sb, type); + BUG_ON(flags & DQUOT_SUSPENDED); + if (!flags) return 0; /* Just updating flags needed? */ @@ -2232,13 +2218,13 @@ out_lock: load_quota: return vfs_load_quota_inode(inode, type, format_id, flags); } -EXPORT_SYMBOL(vfs_quota_enable); +EXPORT_SYMBOL(dquot_enable); /* * This function is used when filesystem needs to initialize quotas * during mount time. */ -int vfs_quota_on_mount(struct super_block *sb, char *qf_name, +int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type) { struct dentry *dentry; @@ -2264,24 +2250,7 @@ out: dput(dentry); return error; } -EXPORT_SYMBOL(vfs_quota_on_mount); - -/* Wrapper to turn on quotas when remounting rw */ -int vfs_dq_quota_on_remount(struct super_block *sb) -{ - int cnt; - int ret = 0, err; - - if (!sb->s_qcop || !sb->s_qcop->quota_on) - return -ENOSYS; - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1); - if (err < 0 && !ret) - ret = err; - } - return ret; -} -EXPORT_SYMBOL(vfs_dq_quota_on_remount); +EXPORT_SYMBOL(dquot_quota_on_mount); static inline qsize_t qbtos(qsize_t blocks) { @@ -2316,8 +2285,8 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) spin_unlock(&dq_data_lock); } -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, - struct fs_disk_quota *di) +int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *di) { struct dquot *dquot; @@ -2329,7 +2298,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, return 0; } -EXPORT_SYMBOL(vfs_get_dqblk); +EXPORT_SYMBOL(dquot_get_dqblk); #define VFS_FS_DQ_MASK \ (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ @@ -2428,7 +2397,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) return 0; } -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, +int dquot_set_dqblk(struct super_block *sb, int type, qid_t id, struct fs_disk_quota *di) { struct dquot *dquot; @@ -2444,10 +2413,10 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, out: return rc; } -EXPORT_SYMBOL(vfs_set_dqblk); +EXPORT_SYMBOL(dquot_set_dqblk); /* Generic routine for getting common part of quota file information */ -int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) +int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) { struct mem_dqinfo *mi; @@ -2466,10 +2435,10 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return 0; } -EXPORT_SYMBOL(vfs_get_dqinfo); +EXPORT_SYMBOL(dquot_get_dqinfo); /* Generic routine for setting common part of quota file information */ -int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) +int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) { struct mem_dqinfo *mi; int err = 0; @@ -2496,27 +2465,27 @@ out: mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return err; } -EXPORT_SYMBOL(vfs_set_dqinfo); +EXPORT_SYMBOL(dquot_set_dqinfo); -const struct quotactl_ops vfs_quotactl_ops = { - .quota_on = vfs_quota_on, - .quota_off = vfs_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk +const struct quotactl_ops dquot_quotactl_ops = { + .quota_on = dquot_quota_on, + .quota_off = dquot_quota_off, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk }; - +EXPORT_SYMBOL(dquot_quotactl_ops); static int do_proc_dqstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { -#ifdef CONFIG_SMP - /* Update global table */ unsigned int type = (int *)table->data - dqstats.stat; - dqstats.stat[type] = dqstats_read(type); -#endif + + /* Update global table */ + dqstats.stat[type] = + percpu_counter_sum_positive(&dqstats.counter[type]); return proc_dointvec(table, write, buffer, lenp, ppos); } @@ -2609,7 +2578,7 @@ static ctl_table sys_table[] = { static int __init dquot_init(void) { - int i; + int i, ret; unsigned long nr_hash, order; printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); @@ -2627,12 +2596,11 @@ static int __init dquot_init(void) if (!dquot_hash) panic("Cannot create dquot hash table"); -#ifdef CONFIG_SMP - dqstats_pcpu = alloc_percpu(struct dqstats); - if (!dqstats_pcpu) - panic("Cannot create dquot stats table"); -#endif - memset(&dqstats, 0, sizeof(struct dqstats)); + for (i = 0; i < _DQST_DQSTAT_LAST; i++) { + ret = percpu_counter_init(&dqstats.counter[i], 0); + if (ret) + panic("Cannot create dquot stat counters"); + } /* Find power-of-two hlist_heads which can fit into allocation */ nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index ce3dfd0..b299961 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -73,7 +73,7 @@ static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, if (IS_ERR(pathname)) return PTR_ERR(pathname); if (sb->s_qcop->quota_on) - ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0); + ret = sb->s_qcop->quota_on(sb, type, id, pathname); putname(pathname); return ret; } @@ -260,7 +260,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_QUOTAOFF: if (!sb->s_qcop->quota_off) return -ENOSYS; - return sb->s_qcop->quota_off(sb, type, 0); + return sb->s_qcop->quota_off(sb, type); case Q_GETFMT: return quota_getfmt(sb, type, addr); case Q_GETINFO: diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 78f613c..4884ac5 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -43,12 +43,13 @@ const struct file_operations ramfs_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = simple_sync_file, + .fsync = noop_fsync, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .llseek = generic_file_llseek, }; const struct inode_operations ramfs_file_inode_operations = { + .setattr = simple_setattr, .getattr = simple_getattr, }; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 5ea4ad8..d532c20 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = { .aio_read = generic_file_aio_read, .write = do_sync_write, .aio_write = generic_file_aio_write, - .fsync = simple_sync_file, + .fsync = noop_fsync, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .llseek = generic_file_llseek, @@ -146,7 +146,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) return ret; } - ret = vmtruncate(inode, newsize); + ret = simple_setsize(inode, newsize); return ret; } @@ -169,7 +169,8 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) /* pick out size-changing events */ if (ia->ia_valid & ATTR_SIZE) { - loff_t size = i_size_read(inode); + loff_t size = inode->i_size; + if (ia->ia_size != size) { ret = ramfs_nommu_resize(inode, ia->ia_size, size); if (ret < 0 || ia->ia_valid == ATTR_SIZE) @@ -182,7 +183,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) } } - ret = inode_setattr(inode, ia); + generic_setattr(inode, ia); out: ia->ia_valid = old_ia_valid; return ret; diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 4455fbe..198dabf 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -14,8 +14,7 @@ extern const struct reiserfs_key MIN_KEY; static int reiserfs_readdir(struct file *, void *, filldir_t); -static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, - int datasync); +static int reiserfs_dir_fsync(struct file *filp, int datasync); const struct file_operations reiserfs_dir_operations = { .llseek = generic_file_llseek, @@ -28,10 +27,9 @@ const struct file_operations reiserfs_dir_operations = { #endif }; -static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int reiserfs_dir_fsync(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int err; reiserfs_write_lock(inode->i_sb); err = reiserfs_commit_for_inode(inode); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 9977df9..b82cdd8 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -134,10 +134,9 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) * be removed... */ -static int reiserfs_sync_file(struct file *filp, - struct dentry *dentry, int datasync) +static int reiserfs_sync_file(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int err; int barrier_done; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 59125fb..9822fa1 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -158,6 +158,7 @@ static int finish_unfinished(struct super_block *s) #ifdef CONFIG_QUOTA int i; int ms_active_set; + int quota_enabled[MAXQUOTAS]; #endif /* compose key to look for "save" links */ @@ -179,8 +180,15 @@ static int finish_unfinished(struct super_block *s) } /* Turn on quotas so that they are updated correctly */ for (i = 0; i < MAXQUOTAS; i++) { + quota_enabled[i] = 1; if (REISERFS_SB(s)->s_qf_names[i]) { - int ret = reiserfs_quota_on_mount(s, i); + int ret; + + if (sb_has_quota_active(s, i)) { + quota_enabled[i] = 0; + continue; + } + ret = reiserfs_quota_on_mount(s, i); if (ret < 0) reiserfs_warning(s, "reiserfs-2500", "cannot turn on journaled " @@ -304,8 +312,8 @@ static int finish_unfinished(struct super_block *s) #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(s)->files[i]) - vfs_quota_off(s, i, 0); + if (sb_dqopt(s)->files[i] && quota_enabled[i]) + dquot_quota_off(s, i); } if (ms_active_set) /* Restore the flag back */ @@ -466,6 +474,8 @@ static void reiserfs_put_super(struct super_block *s) struct reiserfs_transaction_handle th; th.t_trans_id = 0; + dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + reiserfs_write_lock(s); if (s->s_dirt) @@ -620,7 +630,7 @@ static int reiserfs_acquire_dquot(struct dquot *); static int reiserfs_release_dquot(struct dquot *); static int reiserfs_mark_dquot_dirty(struct dquot *); static int reiserfs_write_info(struct super_block *, int); -static int reiserfs_quota_on(struct super_block *, int, int, char *, int); +static int reiserfs_quota_on(struct super_block *, int, int, char *); static const struct dquot_operations reiserfs_quota_operations = { .write_dquot = reiserfs_write_dquot, @@ -634,12 +644,12 @@ static const struct dquot_operations reiserfs_quota_operations = { static const struct quotactl_ops reiserfs_qctl_operations = { .quota_on = reiserfs_quota_on, - .quota_off = vfs_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk, + .quota_off = dquot_quota_off, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, }; #endif @@ -1242,6 +1252,11 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) if (s->s_flags & MS_RDONLY) /* it is read-only already */ goto out_ok; + + err = dquot_suspend(s, -1); + if (err < 0) + goto out_err; + /* try to remount file system with read-only permissions */ if (sb_umount_state(rs) == REISERFS_VALID_FS || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { @@ -1295,6 +1310,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) s->s_dirt = 0; if (!(*mount_flags & MS_RDONLY)) { + dquot_resume(s, -1); finish_unfinished(s); reiserfs_xattr_init(s, *mount_flags); } @@ -2022,15 +2038,15 @@ static int reiserfs_write_info(struct super_block *sb, int type) */ static int reiserfs_quota_on_mount(struct super_block *sb, int type) { - return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], - REISERFS_SB(sb)->s_jquota_fmt, type); + return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], + REISERFS_SB(sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, - char *name, int remount) + char *name) { int err; struct path path; @@ -2039,9 +2055,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) return -EINVAL; - /* No more checks needed? Path and format_id are bogus anyway... */ - if (remount) - return vfs_quota_on(sb, type, format_id, name, 1); + err = kern_path(name, LOOKUP_FOLLOW, &path); if (err) return err; @@ -2085,7 +2099,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (err) goto out; } - err = vfs_quota_on_path(sb, type, format_id, &path); + err = dquot_quota_on_path(sb, type, format_id, &path); out: path_put(&path); return err; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 84ecf0e..8e187a0 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -28,8 +28,9 @@ #include "proto.h" static int -smb_fsync(struct file *file, struct dentry * dentry, int datasync) +smb_fsync(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct smb_sb_info *server = server_from_dentry(dentry); int result; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index dfa1d67..9551cb6 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -714,7 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr) error = server->ops->truncate(inode, attr->ia_size); if (error) goto out; - error = vmtruncate(inode, attr->ia_size); + error = simple_setsize(inode, attr->ia_size); if (error) goto out; refresh = 1; diff --git a/fs/splice.c b/fs/splice.c index ac22b00..740e6b9 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; error = add_to_page_cache_lru(page, mapping, index, - mapping_gfp_mask(mapping)); + GFP_KERNEL); if (unlikely(error)) { page_cache_release(page); if (error == -EEXIST) @@ -24,7 +24,6 @@ #include <linux/slab.h> #include <linux/acct.h> #include <linux/blkdev.h> -#include <linux/quotaops.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/writeback.h> /* for the emergency remount stuff */ @@ -94,8 +93,6 @@ static struct super_block *alloc_super(struct file_system_type *type) init_rwsem(&s->s_dquot.dqptr_sem); init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; - s->dq_op = sb_dquot_ops; - s->s_qcop = sb_quotactl_ops; s->s_op = &default_op; s->s_time_gran = 1000000000; } @@ -160,7 +157,6 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { - vfs_dq_off(s, 0); fs->kill_sb(s); put_filesystem(fs); put_super(s); @@ -524,7 +520,7 @@ rescan: int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; - int remount_rw, remount_ro; + int remount_ro; if (sb->s_frozen != SB_UNFROZEN) return -EBUSY; @@ -540,7 +536,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) sync_filesystem(sb); remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); - remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ @@ -549,9 +544,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) mark_files_ro(sb); else if (!fs_may_remount_ro(sb)) return -EBUSY; - retval = vfs_dq_off(sb, 1); - if (retval < 0 && retval != -ENOSYS) - return -EBUSY; } if (sb->s_op->remount_fs) { @@ -560,8 +552,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) return retval; } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); - if (remount_rw) - vfs_dq_quota_on_remount(sb); + /* * Some filesystems modify their metadata via some other path than the * bdev buffer cache (eg. use a private mapping, or directories in @@ -946,8 +937,8 @@ out: EXPORT_SYMBOL_GPL(vfs_kern_mount); /** - * freeze_super -- lock the filesystem and force it into a consistent state - * @super: the super to lock + * freeze_super - lock the filesystem and force it into a consistent state + * @sb: the super to lock * * Syncs the super to make sure the filesystem is consistent and calls the fs's * freeze_fs. Subsequent calls to this without first thawing the fs will return @@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) if (wait) sync_inodes_sb(sb); else - writeback_inodes_sb_locked(sb); + writeback_inodes_sb(sb); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); @@ -130,12 +130,10 @@ void emergency_sync(void) /* * Generic function to fsync a file. - * - * filp may be NULL if called via the msync of a vma. */ -int file_fsync(struct file *filp, struct dentry *dentry, int datasync) +int file_fsync(struct file *filp, int datasync) { - struct inode * inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; struct super_block * sb; int ret, err; @@ -183,7 +181,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) * livelocks in fsync_buffers_list(). */ mutex_lock(&mapping->host->i_mutex); - err = file->f_op->fsync(file, file->f_path.dentry, datasync); + err = file->f_op->fsync(file, datasync); if (!ret) ret = err; mutex_unlock(&mapping->host->i_mutex); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index bbd77e9..0835a3b 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -117,13 +117,13 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) if (error) goto out; - iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */ - - error = inode_setattr(inode, iattr); + error = sysfs_sd_setattr(sd, iattr); if (error) goto out; - error = sysfs_sd_setattr(sd, iattr); + /* this ignores size changes */ + generic_setattr(inode, iattr); + out: mutex_unlock(&sysfs_mutex); return error; diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 1dabed2..79941e4 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -24,7 +24,7 @@ const struct file_operations sysv_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = sysv_readdir, - .fsync = simple_fsync, + .fsync = generic_file_fsync, }; static inline void dir_put_page(struct page *page) diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 96340c0..750cc22 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = simple_fsync, + .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 4573734..d4a5380 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -43,6 +43,7 @@ static int sysv_sync_fs(struct super_block *sb, int wait) * then attach current time stamp. * But if the filesystem was marked clean, keep it clean. */ + sb->s_dirt = 0; old_time = fs32_to_cpu(sbi, *sbi->s_sb_time); if (sbi->s_type == FSTYPE_SYSV4) { if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time)) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5692cf7..12f445c 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -967,12 +967,15 @@ static int do_writepage(struct page *page, int len) * the page locked, and it locks @ui_mutex. However, write-back does take inode * @i_mutex, which means other VFS operations may be run on this inode at the * same time. And the problematic one is truncation to smaller size, from where - * we have to call 'vmtruncate()', which first changes @inode->i_size, then + * we have to call 'simple_setsize()', which first changes @inode->i_size, then * drops the truncated pages. And while dropping the pages, it takes the page - * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with + * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This * means that @inode->i_size is changed while @ui_mutex is unlocked. * + * XXX: with the new truncate the above is not true anymore, the simple_setsize + * calls can be replaced with the individual components. + * * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond * inode size. How do we do this if @inode->i_size may became smaller while we * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the @@ -1125,7 +1128,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, budgeted = 0; } - err = vmtruncate(inode, new_size); + err = simple_setsize(inode, new_size); if (err) goto out_budg; @@ -1214,7 +1217,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (attr->ia_valid & ATTR_SIZE) { dbg_gen("size %lld -> %lld", inode->i_size, new_size); - err = vmtruncate(inode, new_size); + err = simple_setsize(inode, new_size); if (err) goto out; } @@ -1223,7 +1226,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); - /* 'vmtruncate()' changed @i_size, update @ui_size */ + /* 'simple_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1304,9 +1307,9 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) +int ubifs_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; int err; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index bd2542d..2eef553 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb { * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot * make sure @inode->i_size is always changed under @ui_mutex, because it - * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock + * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock * with 'ubifs_writepage()' (see file.c). All the other inode fields are * changed under @ui_mutex, so they do not need "shadow" fields. Note, one * could consider to rework locking and base it on "shadow" fields. @@ -1678,7 +1678,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_fsync(struct file *file, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); /* dir.c */ diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 9a9378b..b608efa 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -21,7 +21,6 @@ #include "udfdecl.h" -#include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bitops.h> @@ -159,8 +158,6 @@ static void udf_bitmap_free_blocks(struct super_block *sb, udf_debug("byte=%2x\n", ((char *)bh->b_data)[(bit + i) >> 3]); } else { - if (inode) - dquot_free_block(inode, 1); udf_add_free_space(sb, sbi->s_partition, 1); } } @@ -210,15 +207,8 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb, bit = block % (sb->s_blocksize << 3); while (bit < (sb->s_blocksize << 3) && block_count > 0) { - if (!udf_test_bit(bit, bh->b_data)) + if (!udf_clear_bit(bit, bh->b_data)) goto out; - else if (dquot_prealloc_block(inode, 1)) - goto out; - else if (!udf_clear_bit(bit, bh->b_data)) { - udf_debug("bit already cleared for block %d\n", bit); - dquot_free_block(inode, 1); - goto out; - } block_count--; alloc_count++; bit++; @@ -338,20 +328,6 @@ search_back: } got_block: - - /* - * Check quota for allocation of this block. - */ - if (inode) { - int ret = dquot_alloc_block(inode, 1); - - if (ret) { - mutex_unlock(&sbi->s_alloc_mutex); - *err = ret; - return 0; - } - } - newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) - (sizeof(struct spaceBitmapDesc) << 3); @@ -401,10 +377,6 @@ static void udf_table_free_blocks(struct super_block *sb, } iinfo = UDF_I(table); - /* We do this up front - There are some error conditions that - could occure, but.. oh well */ - if (inode) - dquot_free_block(inode, count); udf_add_free_space(sb, sbi->s_partition, count); start = bloc->logicalBlockNum + offset; @@ -649,10 +621,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, epos.offset -= adsize; alloc_count = (elen >> sb->s_blocksize_bits); - if (inode && dquot_prealloc_block(inode, - alloc_count > block_count ? block_count : alloc_count)) - alloc_count = 0; - else if (alloc_count > block_count) { + if (alloc_count > block_count) { alloc_count = block_count; eloc.logicalBlockNum += alloc_count; elen -= (alloc_count << sb->s_blocksize_bits); @@ -752,14 +721,6 @@ static int udf_table_new_block(struct super_block *sb, newblock = goal_eloc.logicalBlockNum; goal_eloc.logicalBlockNum++; goal_elen -= sb->s_blocksize; - if (inode) { - *err = dquot_alloc_block(inode, 1); - if (*err) { - brelse(goal_epos.bh); - mutex_unlock(&sbi->s_alloc_mutex); - return 0; - } - } if (goal_elen) udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1); diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 1660c81..51552bf 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -211,5 +211,5 @@ const struct file_operations udf_dir_operations = { .read = generic_read_dir, .readdir = udf_readdir, .unlocked_ioctl = udf_ioctl, - .fsync = simple_fsync, + .fsync = generic_file_fsync, }; diff --git a/fs/udf/file.c b/fs/udf/file.c index baae3a7..94e06d6 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -34,7 +34,6 @@ #include <linux/errno.h> #include <linux/smp_lock.h> #include <linux/pagemap.h> -#include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/aio.h> #include <linux/smp_lock.h> @@ -219,39 +218,16 @@ const struct file_operations udf_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .unlocked_ioctl = udf_ioctl, - .open = dquot_file_open, + .open = generic_file_open, .mmap = generic_file_mmap, .write = do_sync_write, .aio_write = udf_file_aio_write, .release = udf_release_file, - .fsync = simple_fsync, + .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, .llseek = generic_file_llseek, }; -int udf_setattr(struct dentry *dentry, struct iattr *iattr) -{ - struct inode *inode = dentry->d_inode; - int error; - - error = inode_change_ok(inode, iattr); - if (error) - return error; - - if (is_quota_modification(inode, iattr)) - dquot_initialize(inode); - - if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { - error = dquot_transfer(inode, iattr); - if (error) - return error; - } - - return inode_setattr(inode, iattr); -} - const struct inode_operations udf_file_inode_operations = { .truncate = udf_truncate, - .setattr = udf_setattr, }; diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 2b5586c..18cd711 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -20,7 +20,6 @@ #include "udfdecl.h" #include <linux/fs.h> -#include <linux/quotaops.h> #include <linux/sched.h> #include <linux/slab.h> @@ -32,13 +31,6 @@ void udf_free_inode(struct inode *inode) struct super_block *sb = inode->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); - /* - * Note: we must free any quota before locking the superblock, - * as writing the quota to disk may need the lock as well. - */ - dquot_free_inode(inode); - dquot_drop(inode); - clear_inode(inode); mutex_lock(&sbi->s_alloc_mutex); @@ -61,7 +53,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) struct super_block *sb = dir->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); struct inode *inode; - int block, ret; + int block; uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; struct udf_inode_info *iinfo; struct udf_inode_info *dinfo = UDF_I(dir); @@ -146,17 +138,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) insert_inode_hash(inode); mark_inode_dirty(inode); - dquot_initialize(inode); - ret = dquot_alloc_inode(inode); - if (ret) { - dquot_drop(inode); - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; - iput(inode); - *err = ret; - return NULL; - } - *err = 0; return inode; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8a3fbd1..124852b 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -36,7 +36,6 @@ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> -#include <linux/quotaops.h> #include <linux/slab.h> #include <linux/crc-itu-t.h> @@ -71,9 +70,6 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); void udf_delete_inode(struct inode *inode) { - if (!is_bad_inode(inode)) - dquot_initialize(inode); - truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) @@ -113,7 +109,6 @@ void udf_clear_inode(struct inode *inode) (unsigned long long)iinfo->i_lenExtents); } - dquot_drop(inode); kfree(iinfo->i_ext.i_data); iinfo->i_ext.i_data = NULL; } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 585f733..bf5fc67 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -27,7 +27,6 @@ #include <linux/errno.h> #include <linux/mm.h> #include <linux/slab.h> -#include <linux/quotaops.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/sched.h> @@ -563,8 +562,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, int err; struct udf_inode_info *iinfo; - dquot_initialize(dir); - lock_kernel(); inode = udf_new_inode(dir, mode, &err); if (!inode) { @@ -617,8 +614,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, if (!old_valid_dev(rdev)) return -EINVAL; - dquot_initialize(dir); - lock_kernel(); err = -EIO; inode = udf_new_inode(dir, mode, &err); @@ -664,8 +659,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct udf_inode_info *dinfo = UDF_I(dir); struct udf_inode_info *iinfo; - dquot_initialize(dir); - lock_kernel(); err = -EMLINK; if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) @@ -800,8 +793,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) struct fileIdentDesc *fi, cfi; struct kernel_lb_addr tloc; - dquot_initialize(dir); - retval = -ENOENT; lock_kernel(); fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); @@ -848,8 +839,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) struct fileIdentDesc cfi; struct kernel_lb_addr tloc; - dquot_initialize(dir); - retval = -ENOENT; lock_kernel(); fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); @@ -904,8 +893,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, struct buffer_head *bh; struct udf_inode_info *iinfo; - dquot_initialize(dir); - lock_kernel(); inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); if (!inode) @@ -1075,8 +1062,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, int err; struct buffer_head *bh; - dquot_initialize(dir); - lock_kernel(); if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { unlock_kernel(); @@ -1139,9 +1124,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernel_lb_addr tloc; struct udf_inode_info *old_iinfo = UDF_I(old_inode); - dquot_initialize(old_dir); - dquot_initialize(new_dir); - lock_kernel(); ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { @@ -1387,7 +1369,6 @@ const struct export_operations udf_export_ops = { const struct inode_operations udf_dir_inode_operations = { .lookup = udf_lookup, .create = udf_create, - .setattr = udf_setattr, .link = udf_link, .unlink = udf_unlink, .symlink = udf_symlink, @@ -1400,5 +1381,4 @@ const struct inode_operations udf_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, - .setattr = udf_setattr, }; diff --git a/fs/udf/super.c b/fs/udf/super.c index 1e4543c..612d1e2 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -557,6 +557,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) { struct udf_options uopt; struct udf_sb_info *sbi = UDF_SB(sb); + int error = 0; uopt.flags = sbi->s_flags; uopt.uid = sbi->s_uid; @@ -582,17 +583,17 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) *flags |= MS_RDONLY; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { - unlock_kernel(); - return 0; - } + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + goto out_unlock; + if (*flags & MS_RDONLY) udf_close_lvid(sb); else udf_open_lvid(sb); +out_unlock: unlock_kernel(); - return 0; + return error; } /* Check Volume Structure Descriptors (ECMA 167 2/9.1) */ @@ -1939,7 +1940,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* Fill in the rest of the superblock */ sb->s_op = &udf_sb_ops; sb->s_export_op = &udf_export_ops; - sb->dq_op = NULL; + sb->s_dirt = 0; sb->s_magic = UDF_SUPER_MAGIC; sb->s_time_gran = 1000; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 9079ff7..2bac035 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -131,7 +131,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, /* file.c */ extern long udf_ioctl(struct file *, unsigned int, unsigned long); -extern int udf_setattr(struct dentry *dentry, struct iattr *iattr); /* inode.c */ extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); extern int udf_sync_inode(struct inode *); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 5cfa4d8..048484f 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -12,7 +12,6 @@ #include <linux/stat.h> #include <linux/time.h> #include <linux/string.h> -#include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/capability.h> #include <linux/bitops.h> @@ -85,9 +84,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) "bit already cleared for fragment %u", i); } - dquot_free_block(inode, count); - - fs32_add(sb, &ucg->cg_cs.cs_nffree, count); uspi->cs_total.cs_nffree += count; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); @@ -195,7 +191,6 @@ do_more: ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, 1); - dquot_free_block(inode, uspi->s_fpb); fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); uspi->cs_total.cs_nbfree++; @@ -511,7 +506,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, struct ufs_cg_private_info * ucpi; struct ufs_cylinder_group * ucg; unsigned cgno, fragno, fragoff, count, fragsize, i; - int ret; UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n", (unsigned long long)fragment, oldcount, newcount); @@ -557,11 +551,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1); for (i = oldcount; i < newcount; i++) ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i); - ret = dquot_alloc_block(inode, count); - if (ret) { - *err = ret; - return 0; - } fs32_sub(sb, &ucg->cg_cs.cs_nffree, count); fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); @@ -598,7 +587,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, struct ufs_cylinder_group * ucg; unsigned oldcg, i, j, k, allocsize; u64 result; - int ret; UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n", inode->i_ino, cgno, (unsigned long long)goal, count); @@ -667,7 +655,6 @@ cg_found: for (i = count; i < uspi->s_fpb; i++) ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); i = uspi->s_fpb - count; - dquot_free_block(inode, i); fs32_add(sb, &ucg->cg_cs.cs_nffree, i); uspi->cs_total.cs_nffree += i; @@ -679,11 +666,6 @@ cg_found: result = ufs_bitmap_search (sb, ucpi, goal, allocsize); if (result == INVBLOCK) return 0; - ret = dquot_alloc_block(inode, count); - if (ret) { - *err = ret; - return 0; - } for (i = 0; i < count; i++) ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); @@ -718,7 +700,6 @@ static u64 ufs_alloccg_block(struct inode *inode, struct ufs_super_block_first * usb1; struct ufs_cylinder_group * ucg; u64 result, blkno; - int ret; UFSD("ENTER, goal %llu\n", (unsigned long long)goal); @@ -752,11 +733,6 @@ gotit: ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, -1); - ret = dquot_alloc_block(inode, uspi->s_fpb); - if (ret) { - *err = ret; - return INVBLOCK; - } fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1); uspi->cs_total.cs_nbfree--; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 317a0d4..ec78475 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -666,6 +666,6 @@ not_empty: const struct file_operations ufs_dir_operations = { .read = generic_read_dir, .readdir = ufs_readdir, - .fsync = simple_fsync, + .fsync = generic_file_fsync, .llseek = generic_file_llseek, }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index a8962ce..33afa20 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -24,7 +24,6 @@ */ #include <linux/fs.h> -#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -41,7 +40,7 @@ const struct file_operations ufs_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .open = dquot_file_open, - .fsync = simple_fsync, + .open = generic_file_open, + .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 3a959d5..594480e 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -27,7 +27,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/sched.h> #include <linux/bitops.h> @@ -95,9 +94,6 @@ void ufs_free_inode (struct inode * inode) is_directory = S_ISDIR(inode->i_mode); - dquot_free_inode(inode); - dquot_drop(inode); - clear_inode (inode); if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) @@ -347,21 +343,12 @@ cg_found: unlock_super (sb); - dquot_initialize(inode); - err = dquot_alloc_inode(inode); - if (err) { - dquot_drop(inode); - goto fail_without_unlock; - } - UFSD("allocating inode %lu\n", inode->i_ino); UFSD("EXIT\n"); return inode; fail_remove_inode: unlock_super(sb); -fail_without_unlock: - inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index cffa756..73fe773 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -37,7 +37,6 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/writeback.h> -#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -910,9 +909,6 @@ void ufs_delete_inode (struct inode * inode) { loff_t old_i_size; - if (!is_bad_inode(inode)) - dquot_initialize(inode); - truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) goto no_delete; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index eabc02e..b056f02 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -30,7 +30,6 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/smp_lock.h> -#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -86,8 +85,6 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, UFSD("BEGIN\n"); - dquot_initialize(dir); - inode = ufs_new_inode(dir, mode); err = PTR_ERR(inode); @@ -112,8 +109,6 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t if (!old_valid_dev(rdev)) return -EINVAL; - dquot_initialize(dir); - inode = ufs_new_inode(dir, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { @@ -138,8 +133,6 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; - dquot_initialize(dir); - lock_kernel(); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); @@ -185,8 +178,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, return -EMLINK; } - dquot_initialize(dir); - inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); atomic_inc(&inode->i_count); @@ -204,8 +195,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (dir->i_nlink >= UFS_LINK_MAX) goto out; - dquot_initialize(dir); - lock_kernel(); inode_inc_link_count(dir); @@ -250,8 +239,6 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry) struct page *page; int err = -ENOENT; - dquot_initialize(dir); - de = ufs_find_entry(dir, &dentry->d_name, &page); if (!de) goto out; @@ -296,9 +283,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ufs_dir_entry *old_de; int err = -ENOENT; - dquot_initialize(old_dir); - dquot_initialize(new_dir); - old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ad9bc1e..3ec5a9e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -77,7 +77,6 @@ #include <linux/errno.h> #include <linux/fs.h> -#include <linux/quotaops.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/stat.h> @@ -1047,7 +1046,7 @@ magic_found: */ sb->s_op = &ufs_super_ops; sb->s_export_op = &ufs_export_ops; - sb->dq_op = NULL; /***/ + sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno); @@ -1437,126 +1436,19 @@ static void destroy_inodecache(void) kmem_cache_destroy(ufs_inode_cachep); } -static void ufs_clear_inode(struct inode *inode) -{ - dquot_drop(inode); -} - -#ifdef CONFIG_QUOTA -static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t); -static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t); -#endif - static const struct super_operations ufs_super_ops = { .alloc_inode = ufs_alloc_inode, .destroy_inode = ufs_destroy_inode, .write_inode = ufs_write_inode, .delete_inode = ufs_delete_inode, - .clear_inode = ufs_clear_inode, .put_super = ufs_put_super, .write_super = ufs_write_super, .sync_fs = ufs_sync_fs, .statfs = ufs_statfs, .remount_fs = ufs_remount, .show_options = ufs_show_options, -#ifdef CONFIG_QUOTA - .quota_read = ufs_quota_read, - .quota_write = ufs_quota_write, -#endif }; -#ifdef CONFIG_QUOTA - -/* Read data from quotafile - avoid pagecache and such because we cannot afford - * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) - * we don't have to be afraid of races */ -static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> sb->s_blocksize_bits; - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int tocopy; - size_t toread; - struct buffer_head *bh; - loff_t i_size = i_size_read(inode); - - if (off > i_size) - return 0; - if (off+len > i_size) - len = i_size-off; - toread = len; - while (toread > 0) { - tocopy = sb->s_blocksize - offset < toread ? - sb->s_blocksize - offset : toread; - - bh = ufs_bread(inode, blk, 0, &err); - if (err) - return err; - if (!bh) /* A hole? */ - memset(data, 0, tocopy); - else { - memcpy(data, bh->b_data+offset, tocopy); - brelse(bh); - } - offset = 0; - toread -= tocopy; - data += tocopy; - blk++; - } - return len; -} - -/* Write to quotafile */ -static ssize_t ufs_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> sb->s_blocksize_bits; - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int tocopy; - size_t towrite = len; - struct buffer_head *bh; - - mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - - bh = ufs_bread(inode, blk, 1, &err); - if (!bh) - goto out; - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - unlock_buffer(bh); - brelse(bh); - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; - } -out: - if (len == towrite) { - mutex_unlock(&inode->i_mutex); - return err; - } - if (inode->i_size < off+len-towrite) - i_size_write(inode, off+len-towrite); - inode->i_version++; - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - mutex_unlock(&inode->i_mutex); - return len - towrite; -} - -#endif - static int ufs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index f294c44..589e01a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -44,7 +44,6 @@ #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/sched.h> -#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -501,12 +500,10 @@ out: return err; } - /* - * We don't define our `inode->i_op->truncate', and call it here, - * because of: - * - there is no way to know old size - * - there is no way inform user about error, if it happens in `truncate' + * TODO: + * - truncate case should use proper ordering instead of using + * simple_setsize */ int ufs_setattr(struct dentry *dentry, struct iattr *attr) { @@ -518,19 +515,10 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if (is_quota_modification(inode, attr)) - dquot_initialize(inode); - - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - error = dquot_transfer(inode, attr); - if (error) - return error; - } if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { loff_t old_i_size = inode->i_size; - error = vmtruncate(inode, attr->ia_size); + error = simple_setsize(inode, attr->ia_size); if (error) return error; error = ufs_truncate(inode, old_i_size); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 089eaca..34640d6 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1333,6 +1333,21 @@ xfs_vm_writepage( trace_xfs_writepage(inode, page, 0); /* + * Refuse to write the page out if we are called from reclaim context. + * + * This is primarily to avoid stack overflows when called from deep + * used stacks in random callers for direct reclaim, but disabling + * reclaim for kswap is a nice side-effect as kswapd causes rather + * suboptimal I/O patters, too. + * + * This should really be done by the core VM, but until that happens + * filesystems like XFS, btrfs and ext4 have to take care of this + * by themselves. + */ + if (current->flags & PF_MEMALLOC) + goto out_fail; + + /* * We need a transaction if: * 1. There are delalloc buffers on the page * 2. The page is uptodate and we have unmapped buffers @@ -1366,14 +1381,6 @@ xfs_vm_writepage( if (!page_has_buffers(page)) create_empty_buffers(page, 1 << inode->i_blkbits, 0); - - /* - * VM calculation for nr_to_write seems off. Bump it way - * up, this gets simple streaming writes zippy again. - * To be reviewed again after Jens' writeback changes. - */ - wbc->nr_to_write *= 4; - /* * Convert delayed allocate, unwritten or unmapped space * to real space and flush out to disk. diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index d8fb1b5..257a56b 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -100,10 +100,10 @@ xfs_iozero( STATIC int xfs_file_fsync( struct file *file, - struct dentry *dentry, int datasync) { - struct xfs_inode *ip = XFS_I(dentry->d_inode); + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); struct xfs_trans *tp; int error = 0; int log_flushed = 0; @@ -140,8 +140,8 @@ xfs_file_fsync( * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ - if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || - ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && + if (((inode->i_state & I_DIRTY_DATASYNC) || + ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the @@ -868,7 +868,7 @@ write_retry: mutex_lock(&inode->i_mutex); xfs_ilock(ip, iolock); - error2 = -xfs_file_fsync(file, file->f_path.dentry, + error2 = -xfs_file_fsync(file, (file->f_flags & __O_SYNC) ? 0 : 1); if (!error) error = error2; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 9c8019c..44f0b2d 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -585,11 +585,20 @@ xfs_vn_fallocate( bf.l_len = len; xfs_ilock(ip, XFS_IOLOCK_EXCL); + + /* check the new inode size is valid before allocating */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } + error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 0, XFS_ATTR_NOLOCK); - if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) - new_size = offset + len; + if (error) + goto out_unlock; /* Change file size if needed */ if (new_size) { @@ -600,6 +609,7 @@ xfs_vn_fallocate( error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } +out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); out_error: return error; diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 9ac8aea..067cafb 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -23,7 +23,6 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_quota.h" -#include "xfs_log.h" #include "xfs_trans.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 3884e20..ef7f021 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -164,10 +164,6 @@ xfs_inode_ag_iterator( struct xfs_perag *pag; pag = xfs_perag_get(mp, ag); - if (!pag->pag_ici_init) { - xfs_perag_put(pag); - continue; - } error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, exclusive, &nr); xfs_perag_put(pag); @@ -867,12 +863,7 @@ xfs_reclaim_inode_shrink( down_read(&xfs_mount_list_lock); list_for_each_entry(mp, &xfs_mount_list, m_mplist) { for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - pag = xfs_perag_get(mp, ag); - if (!pag->pag_ici_init) { - xfs_perag_put(pag); - continue; - } reclaimable += pag->pag_ici_reclaimable; xfs_perag_put(pag); } diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 207fa77..d12be84 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -50,7 +50,6 @@ #include "quota/xfs_dquot_item.h" #include "quota/xfs_dquot.h" #include "xfs_log_recover.h" -#include "xfs_buf_item.h" #include "xfs_inode_item.h" /* diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index ff6bc79..73d5aa1 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class, ) ) -#define DEFINE_PERAG_REF_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ - unsigned long caller_ip), \ - TP_ARGS(mp, agno, refcount, caller_ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_agnumber_t, agno) \ - __field(int, refcount) \ - __field(unsigned long, caller_ip) \ - ), \ - TP_fast_assign( \ - __entry->dev = mp->m_super->s_dev; \ - __entry->agno = agno; \ - __entry->refcount = refcount; \ - __entry->caller_ip = caller_ip; \ - ), \ - TP_printk("dev %d:%d agno %u refcount %d caller %pf", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->agno, \ - __entry->refcount, \ - (char *)__entry->caller_ip) \ -); - -DEFINE_PERAG_REF_EVENT(xfs_perag_get) -DEFINE_PERAG_REF_EVENT(xfs_perag_put) - #define DEFINE_ATTR_LIST_EVENT(name) \ DEFINE_EVENT(xfs_attr_list_class, name, \ TP_PROTO(struct xfs_attr_list_context *ctx), \ @@ -122,6 +95,37 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); +DECLARE_EVENT_CLASS(xfs_perag_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, + unsigned long caller_ip), + TP_ARGS(mp, agno, refcount, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, refcount) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->refcount = refcount; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u refcount %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->refcount, + (char *)__entry->caller_ip) +); + +#define DEFINE_PERAG_REF_EVENT(name) \ +DEFINE_EVENT(xfs_perag_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip)) +DEFINE_PERAG_REF_EVENT(xfs_perag_get); +DEFINE_PERAG_REF_EVENT(xfs_perag_put); + TRACE_EVENT(xfs_attr_list_node_descend, TP_PROTO(struct xfs_attr_list_context *ctx, struct xfs_da_node_entry *btree), @@ -775,165 +779,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); -#define DEFINE_RW_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ - TP_ARGS(ip, count, offset, flags), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(xfs_fsize_t, size) \ - __field(xfs_fsize_t, new_size) \ - __field(loff_t, offset) \ - __field(size_t, count) \ - __field(int, flags) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->size = ip->i_d.di_size; \ - __entry->new_size = ip->i_new_size; \ - __entry->offset = offset; \ - __entry->count = count; \ - __entry->flags = flags; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ - "offset 0x%llx count 0x%zx ioflags %s", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->size, \ - __entry->new_size, \ - __entry->offset, \ - __entry->count, \ - __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \ +DECLARE_EVENT_CLASS(xfs_file_class, + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), + TP_ARGS(ip, count, offset, flags), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->flags = flags; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count 0x%zx ioflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) ) + +#define DEFINE_RW_EVENT(name) \ +DEFINE_EVENT(xfs_file_class, name, \ + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ + TP_ARGS(ip, count, offset, flags)) DEFINE_RW_EVENT(xfs_file_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_splice_read); DEFINE_RW_EVENT(xfs_file_splice_write); - -#define DEFINE_PAGE_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ - TP_ARGS(inode, page, off), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(pgoff_t, pgoff) \ - __field(loff_t, size) \ - __field(unsigned long, offset) \ - __field(int, delalloc) \ - __field(int, unmapped) \ - __field(int, unwritten) \ - ), \ - TP_fast_assign( \ - int delalloc = -1, unmapped = -1, unwritten = -1; \ - \ - if (page_has_buffers(page)) \ - xfs_count_page_state(page, &delalloc, \ - &unmapped, &unwritten); \ - __entry->dev = inode->i_sb->s_dev; \ - __entry->ino = XFS_I(inode)->i_ino; \ - __entry->pgoff = page_offset(page); \ - __entry->size = i_size_read(inode); \ - __entry->offset = off; \ - __entry->delalloc = delalloc; \ - __entry->unmapped = unmapped; \ - __entry->unwritten = unwritten; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \ - "delalloc %d unmapped %d unwritten %d", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->pgoff, \ - __entry->size, \ - __entry->offset, \ - __entry->delalloc, \ - __entry->unmapped, \ - __entry->unwritten) \ +DECLARE_EVENT_CLASS(xfs_page_class, + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), + TP_ARGS(inode, page, off), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(pgoff_t, pgoff) + __field(loff_t, size) + __field(unsigned long, offset) + __field(int, delalloc) + __field(int, unmapped) + __field(int, unwritten) + ), + TP_fast_assign( + int delalloc = -1, unmapped = -1, unwritten = -1; + + if (page_has_buffers(page)) + xfs_count_page_state(page, &delalloc, + &unmapped, &unwritten); + __entry->dev = inode->i_sb->s_dev; + __entry->ino = XFS_I(inode)->i_ino; + __entry->pgoff = page_offset(page); + __entry->size = i_size_read(inode); + __entry->offset = off; + __entry->delalloc = delalloc; + __entry->unmapped = unmapped; + __entry->unwritten = unwritten; + ), + TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " + "delalloc %d unmapped %d unwritten %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pgoff, + __entry->size, + __entry->offset, + __entry->delalloc, + __entry->unmapped, + __entry->unwritten) ) + +#define DEFINE_PAGE_EVENT(name) \ +DEFINE_EVENT(xfs_page_class, name, \ + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ + TP_ARGS(inode, page, off)) DEFINE_PAGE_EVENT(xfs_writepage); DEFINE_PAGE_EVENT(xfs_releasepage); DEFINE_PAGE_EVENT(xfs_invalidatepage); -#define DEFINE_IOMAP_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ - int flags, struct xfs_bmbt_irec *irec), \ - TP_ARGS(ip, offset, count, flags, irec), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(loff_t, size) \ - __field(loff_t, new_size) \ - __field(loff_t, offset) \ - __field(size_t, count) \ - __field(int, flags) \ - __field(xfs_fileoff_t, startoff) \ - __field(xfs_fsblock_t, startblock) \ - __field(xfs_filblks_t, blockcount) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->size = ip->i_d.di_size; \ - __entry->new_size = ip->i_new_size; \ - __entry->offset = offset; \ - __entry->count = count; \ - __entry->flags = flags; \ - __entry->startoff = irec ? irec->br_startoff : 0; \ - __entry->startblock = irec ? irec->br_startblock : 0; \ - __entry->blockcount = irec ? irec->br_blockcount : 0; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ - "offset 0x%llx count %zd flags %s " \ - "startoff 0x%llx startblock %lld blockcount 0x%llx", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->size, \ - __entry->new_size, \ - __entry->offset, \ - __entry->count, \ - __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ - __entry->startoff, \ - (__int64_t)__entry->startblock, \ - __entry->blockcount) \ +DECLARE_EVENT_CLASS(xfs_iomap_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, + int flags, struct xfs_bmbt_irec *irec), + TP_ARGS(ip, offset, count, flags, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, size) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, flags) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->flags = flags; + __entry->startoff = irec ? irec->br_startoff : 0; + __entry->startblock = irec ? irec->br_startblock : 0; + __entry->blockcount = irec ? irec->br_blockcount : 0; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd flags %s " + "startoff 0x%llx startblock %lld blockcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_flags(__entry->flags, "|", BMAPI_FLAGS), + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount) ) + +#define DEFINE_IOMAP_EVENT(name) \ +DEFINE_EVENT(xfs_iomap_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ + int flags, struct xfs_bmbt_irec *irec), \ + TP_ARGS(ip, offset, count, flags, irec)) DEFINE_IOMAP_EVENT(xfs_iomap_enter); DEFINE_IOMAP_EVENT(xfs_iomap_found); DEFINE_IOMAP_EVENT(xfs_iomap_alloc); -#define DEFINE_SIMPLE_IO_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ - TP_ARGS(ip, offset, count), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(loff_t, size) \ - __field(loff_t, new_size) \ - __field(loff_t, offset) \ - __field(size_t, count) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->size = ip->i_d.di_size; \ - __entry->new_size = ip->i_new_size; \ - __entry->offset = offset; \ - __entry->count = count; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ - "offset 0x%llx count %zd", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->size, \ - __entry->new_size, \ - __entry->offset, \ - __entry->count) \ +DECLARE_EVENT_CLASS(xfs_simple_io_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), + TP_ARGS(ip, offset, count), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, size) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count) ); + +#define DEFINE_SIMPLE_IO_EVENT(name) \ +DEFINE_EVENT(xfs_simple_io_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ + TP_ARGS(ip, offset, count)) DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 38e7641..2d8b7bc 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref( if (!xfs_Gqm) { xfs_Gqm = xfs_Gqm_init(); - if (!xfs_Gqm) + if (!xfs_Gqm) { + mutex_unlock(&xfs_Gqm_lock); return ENOMEM; + } } /* diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 401f364..4917d4e 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -227,7 +227,6 @@ typedef struct xfs_perag { atomic_t pagf_fstrms; /* # of filestreams active in this AG */ - int pag_ici_init; /* incore inode cache initialised */ rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ int pag_ici_reclaimable; /* reclaimable inodes */ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 6845db9..75df75f 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -382,9 +382,6 @@ xfs_iget( /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); - if (!pag->pagi_inodeok) - return EINVAL; - ASSERT(pag->pag_ici_init); agino = XFS_INO_TO_AGINO(mp, ino); again: @@ -744,30 +741,24 @@ xfs_ilock_demote( } #ifdef DEBUG -/* - * Debug-only routine, without additional rw_semaphore APIs, we can - * now only answer requests regarding whether we hold the lock for write - * (reader state is outside our visibility, we only track writer state). - * - * Note: this means !xfs_isilocked would give false positives, so don't do that. - */ int xfs_isilocked( xfs_inode_t *ip, uint lock_flags) { - if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) == - XFS_ILOCK_EXCL) { - if (!ip->i_lock.mr_writer) - return 0; + if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { + if (!(lock_flags & XFS_ILOCK_SHARED)) + return !!ip->i_lock.mr_writer; + return rwsem_is_locked(&ip->i_lock.mr_lock); } - if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) == - XFS_IOLOCK_EXCL) { - if (!ip->i_iolock.mr_writer) - return 0; + if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { + if (!(lock_flags & XFS_IOLOCK_SHARED)) + return !!ip->i_iolock.mr_writer; + return rwsem_is_locked(&ip->i_iolock.mr_lock); } - return 1; + ASSERT(0); + return 0; } #endif diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8cd6e8d..d53c39d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1940,10 +1940,10 @@ xfs_ifree_cluster( int blks_per_cluster; int nbufs; int ninodes; - int i, j, found, pre_flushed; + int i, j; xfs_daddr_t blkno; xfs_buf_t *bp; - xfs_inode_t *ip, **ip_found; + xfs_inode_t *ip; xfs_inode_log_item_t *iip; xfs_log_item_t *lip; struct xfs_perag *pag; @@ -1960,114 +1960,97 @@ xfs_ifree_cluster( nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; } - ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); - for (j = 0; j < nbufs; j++, inum += ninodes) { + int found = 0; + blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); + /* + * We obtain and lock the backing buffer first in the process + * here, as we have to ensure that any dirty inode that we + * can't get the flush lock on is attached to the buffer. + * If we scan the in-memory inodes first, then buffer IO can + * complete before we get a lock on it, and hence we may fail + * to mark all the active inodes on the buffer stale. + */ + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, + mp->m_bsize * blks_per_cluster, + XBF_LOCK); + + /* + * Walk the inodes already attached to the buffer and mark them + * stale. These will all have the flush locks held, so an + * in-memory inode walk can't lock them. + */ + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + while (lip) { + if (lip->li_type == XFS_LI_INODE) { + iip = (xfs_inode_log_item_t *)lip; + ASSERT(iip->ili_logged == 1); + lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; + xfs_trans_ail_copy_lsn(mp->m_ail, + &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); + xfs_iflags_set(iip->ili_inode, XFS_ISTALE); + found++; + } + lip = lip->li_bio_list; + } /* - * Look for each inode in memory and attempt to lock it, - * we can be racing with flush and tail pushing here. - * any inode we get the locks on, add to an array of - * inode items to process later. + * For each inode in memory attempt to add it to the inode + * buffer and set it up for being staled on buffer IO + * completion. This is safe as we've locked out tail pushing + * and flushing by locking the buffer. * - * The get the buffer lock, we could beat a flush - * or tail pushing thread to the lock here, in which - * case they will go looking for the inode buffer - * and fail, we need some other form of interlock - * here. + * We have already marked every inode that was part of a + * transaction stale above, which means there is no point in + * even trying to lock them. */ - found = 0; for (i = 0; i < ninodes; i++) { read_lock(&pag->pag_ici_lock); ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, (inum + i))); - /* Inode not in memory or we found it already, - * nothing to do - */ + /* Inode not in memory or stale, nothing to do */ if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { read_unlock(&pag->pag_ici_lock); continue; } - if (xfs_inode_clean(ip)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - - /* If we can get the locks then add it to the - * list, otherwise by the time we get the bp lock - * below it will already be attached to the - * inode buffer. - */ - - /* This inode will already be locked - by us, lets - * keep it that way. - */ - - if (ip == free_ip) { - if (xfs_iflock_nowait(ip)) { - xfs_iflags_set(ip, XFS_ISTALE); - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - } else { - ip_found[found++] = ip; - } - } + /* don't try to lock/unlock the current inode */ + if (ip != free_ip && + !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { read_unlock(&pag->pag_ici_lock); continue; } + read_unlock(&pag->pag_ici_lock); - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - if (xfs_iflock_nowait(ip)) { - xfs_iflags_set(ip, XFS_ISTALE); - - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } else { - ip_found[found++] = ip; - } - } else { + if (!xfs_iflock_nowait(ip)) { + if (ip != free_ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); - } + continue; } - read_unlock(&pag->pag_ici_lock); - } - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, - mp->m_bsize * blks_per_cluster, - XBF_LOCK); - - pre_flushed = 0; - lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); - while (lip) { - if (lip->li_type == XFS_LI_INODE) { - iip = (xfs_inode_log_item_t *)lip; - ASSERT(iip->ili_logged == 1); - lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; - xfs_trans_ail_copy_lsn(mp->m_ail, - &iip->ili_flush_lsn, - &iip->ili_item.li_lsn); - xfs_iflags_set(iip->ili_inode, XFS_ISTALE); - pre_flushed++; + xfs_iflags_set(ip, XFS_ISTALE); + if (xfs_inode_clean(ip)) { + ASSERT(ip != free_ip); + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; } - lip = lip->li_bio_list; - } - for (i = 0; i < found; i++) { - ip = ip_found[i]; iip = ip->i_itemp; - if (!iip) { + /* inode with unlogged changes only */ + ASSERT(ip != free_ip); ip->i_update_core = 0; xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); continue; } + found++; iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; @@ -2078,17 +2061,16 @@ xfs_ifree_cluster( xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done, (xfs_log_item_t *)iip); - if (ip != free_ip) { + + if (ip != free_ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); - } } - if (found || pre_flushed) + if (found) xfs_trans_stale_inode_buf(tp, bp); xfs_trans_binval(tp, bp); } - kmem_free(ip_found); xfs_perag_put(pag); } @@ -2649,8 +2631,6 @@ xfs_iflush_cluster( int i; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - ASSERT(pag->pagi_inodeok); - ASSERT(pag->pag_ici_init); inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 14a69ae..ed0684c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -132,15 +132,10 @@ xlog_align( int nbblks, xfs_buf_t *bp) { - xfs_daddr_t offset; - xfs_caddr_t ptr; + xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); - offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1); - ptr = XFS_BUF_PTR(bp) + BBTOB(offset); - - ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp)); - - return ptr; + ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); + return XFS_BUF_PTR(bp) + BBTOB(offset); } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d7bf38c..d59f4e8 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count( #if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) - return E2BIG; + return EFBIG; #else /* Limited by UINT_MAX of sectors */ if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) - return E2BIG; + return EFBIG; #endif return 0; } @@ -393,7 +393,7 @@ xfs_mount_validate_sb( xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { xfs_fs_mount_cmn_err(flags, "file system too large to be mounted on this system."); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } if (unlikely(sbp->sb_inprogress)) { @@ -413,17 +413,6 @@ xfs_mount_validate_sb( return 0; } -STATIC void -xfs_initialize_perag_icache( - xfs_perag_t *pag) -{ - if (!pag->pag_ici_init) { - rwlock_init(&pag->pag_ici_lock); - INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); - pag->pag_ici_init = 1; - } -} - int xfs_initialize_perag( xfs_mount_t *mp, @@ -436,13 +425,8 @@ xfs_initialize_perag( xfs_agino_t agino; xfs_ino_t ino; xfs_sb_t *sbp = &mp->m_sb; - xfs_ino_t max_inum = XFS_MAXINUMBER_32; int error = -ENOMEM; - /* Check to see if the filesystem can overflow 32 bit inodes */ - agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); - ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); - /* * Walk the current per-ag tree so we don't try to initialise AGs * that already exist (growfs case). Allocate and insert all the @@ -456,11 +440,18 @@ xfs_initialize_perag( } if (!first_initialised) first_initialised = index; + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); if (!pag) goto out_unwind; + pag->pag_agno = index; + pag->pag_mount = mp; + rwlock_init(&pag->pag_ici_lock); + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + if (radix_tree_preload(GFP_NOFS)) goto out_unwind; + spin_lock(&mp->m_perag_lock); if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { BUG(); @@ -469,25 +460,26 @@ xfs_initialize_perag( error = -EEXIST; goto out_unwind; } - pag->pag_agno = index; - pag->pag_mount = mp; spin_unlock(&mp->m_perag_lock); radix_tree_preload_end(); } - /* Clear the mount flag if no inode can overflow 32 bits - * on this filesystem, or if specifically requested.. + /* + * If we mount with the inode64 option, or no inode overflows + * the legacy 32-bit address space clear the inode32 option. */ - if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { + agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); + ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); + + if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) mp->m_flags |= XFS_MOUNT_32BITINODES; - } else { + else mp->m_flags &= ~XFS_MOUNT_32BITINODES; - } - /* If we can overflow then setup the ag headers accordingly */ if (mp->m_flags & XFS_MOUNT_32BITINODES) { - /* Calculate how much should be reserved for inodes to - * meet the max inode percentage. + /* + * Calculate how much should be reserved for inodes to meet + * the max inode percentage. */ if (mp->m_maxicount) { __uint64_t icount; @@ -500,30 +492,28 @@ xfs_initialize_perag( } else { max_metadata = agcount; } + for (index = 0; index < agcount; index++) { ino = XFS_AGINO_TO_INO(mp, index, agino); - if (ino > max_inum) { + if (ino > XFS_MAXINUMBER_32) { index++; break; } - /* This ag is preferred for inodes */ pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; if (index < max_metadata) pag->pagf_metadata = 1; - xfs_initialize_perag_icache(pag); xfs_perag_put(pag); } } else { - /* Setup default behavior for smaller filesystems */ for (index = 0; index < agcount; index++) { pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; - xfs_initialize_perag_icache(pag); xfs_perag_put(pag); } } + if (maxagi) *maxagi = index; return 0; @@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { cmn_err(CE_WARN, "XFS: size check 1 failed"); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } error = xfs_read_buf(mp, mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), @@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp) } else { cmn_err(CE_WARN, "XFS: size check 2 failed"); if (error == ENOSPC) - error = XFS_ERROR(E2BIG); + error = XFS_ERROR(EFBIG); return error; } @@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { cmn_err(CE_WARN, "XFS: size check 3 failed"); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } error = xfs_read_buf(mp, mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp) } else { cmn_err(CE_WARN, "XFS: size check 3 failed"); if (error == ENOSPC) - error = XFS_ERROR(E2BIG); + error = XFS_ERROR(EFBIG); return error; } } @@ -1254,7 +1244,7 @@ xfs_mountfs( * Allocate and initialize the per-ag data. */ spin_lock_init(&mp->m_perag_lock); - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); if (error) { cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6be05f7..1644551 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -2247,7 +2247,7 @@ xfs_rtmount_init( cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", (unsigned long long) XFS_BB_TO_FSB(mp, d), (unsigned long long) mp->m_sb.sb_rblocks); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } error = xfs_read_buf(mp, mp->m_rtdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -2256,7 +2256,7 @@ xfs_rtmount_init( cmn_err(CE_WARN, "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); if (error == ENOSPC) - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); return error; } xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index b2d67ad..ff614c2 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -147,7 +147,16 @@ xfs_growfs_rt( # define xfs_rtfree_extent(t,b,l) (ENOSYS) # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) # define xfs_growfs_rt(mp,in) (ENOSYS) -# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) +static inline int /* error */ +xfs_rtmount_init( + xfs_mount_t *mp) /* file system mount structure */ +{ + if (mp->m_sb.sb_rblocks == 0) + return 0; + + cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); + return ENOSYS; +} # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) # define xfs_rtunmount_inodes(m) #endif /* CONFIG_XFS_RT */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ce558ef..28547df 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -48,134 +48,489 @@ kmem_zone_t *xfs_trans_zone; + /* - * Reservation functions here avoid a huge stack in xfs_trans_init - * due to register overflow from temporaries in the calculations. + * Various log reservation values. + * + * These are based on the size of the file system block because that is what + * most transactions manipulate. Each adds in an additional 128 bytes per + * item logged to try to account for the overhead of the transaction mechanism. + * + * Note: Most of the reservations underestimate the number of allocation + * groups into which they could free extents in the xfs_bmap_finish() call. + * This is because the number in the worst case is quite high and quite + * unusual. In order to fix this we need to change xfs_bmap_finish() to free + * extents in only a single AG at a time. This will require changes to the + * EFI code as well, however, so that the EFI for the extents not freed is + * logged again in each transaction. See SGI PV #261917. + * + * Reservation functions here avoid a huge stack in xfs_trans_init due to + * register overflow from temporaries in the calculations. + */ + + +/* + * In a write transaction we can allocate a maximum of 2 + * extents. This gives: + * the inode getting the new extents: inode size + * the inode's bmap btree: max depth * block size + * the agfs of the ags from which the extents are allocated: 2 * sector + * the superblock free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + * And the bmap_finish transaction can free bmap blocks in a join: + * the agfs of the ags containing the blocks: 2 * sector size + * the agfls of the ags containing the blocks: 2 * sector size + * the super block free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size */ STATIC uint -xfs_calc_write_reservation(xfs_mount_t *mp) +xfs_calc_write_reservation( + struct xfs_mount *mp) { - return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + + XFS_ALLOCFREE_LOG_COUNT(mp, 2))), + (2 * mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); } +/* + * In truncating a file we free up to two extents at once. We can modify: + * the inode being truncated: inode size + * the inode's bmap btree: (max depth + 1) * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ STATIC uint -xfs_calc_itruncate_reservation(xfs_mount_t *mp) +xfs_calc_itruncate_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + + 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), + (4 * mp->m_sb.sb_sectsize + + 4 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 4) + + 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + + 128 * 5 + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * In renaming a files we can modify: + * the four inodes involved: 4 * inode size + * the two directory btrees: 2 * (max depth + v2) * dir block size + * the two directory bmap btrees: 2 * max depth * block size + * And the bmap_finish transaction can free dir and bmap blocks (two sets + * of bmap blocks) giving: + * the agf for the ags in which the blocks live: 3 * sector size + * the agfl for the ags in which the blocks live: 3 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_rename_reservation(xfs_mount_t *mp) +xfs_calc_rename_reservation( + struct xfs_mount *mp) { - return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((4 * mp->m_sb.sb_inodesize + + 2 * XFS_DIROP_LOG_RES(mp) + + 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), + (3 * mp->m_sb.sb_sectsize + + 3 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 3) + + 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3)))); } +/* + * For creating a link to an inode: + * the parent directory inode: inode size + * the linked inode: inode size + * the directory btree could split: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free some bmap blocks giving: + * the agf for the ag in which the blocks live: sector size + * the agfl for the ag in which the blocks live: sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_link_reservation(xfs_mount_t *mp) +xfs_calc_link_reservation( + struct xfs_mount *mp) { - return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + XFS_DIROP_LOG_RES(mp) + + 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), + (mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * For removing a directory entry we can modify: + * the parent directory inode: inode size + * the removed inode: inode size + * the directory btree could join: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free the dir and bmap blocks giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_remove_reservation(xfs_mount_t *mp) +xfs_calc_remove_reservation( + struct xfs_mount *mp) { - return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + XFS_DIROP_LOG_RES(mp) + + 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), + (2 * mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); } +/* + * For symlink we can modify: + * the parent directory inode: inode size + * the new inode: inode size + * the inode btree entry: 1 block + * the directory btree: (max depth + v2) * dir block size + * the directory inode's bmap btree: (max depth + v2) * block size + * the blocks for the symlink: 1 kB + * Or in the first xact we allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_symlink_reservation(xfs_mount_t *mp) +xfs_calc_symlink_reservation( + struct xfs_mount *mp) { - return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, 1) + + XFS_DIROP_LOG_RES(mp) + + 1024 + + 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), + (2 * mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + + XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * For create we can modify: + * the parent directory inode: inode size + * the new inode: inode size + * the inode btree entry: block size + * the superblock for the nlink flag: sector size + * the directory btree: (max depth + v2) * dir block size + * the directory inode's bmap btree: (max depth + v2) * block size + * Or in the first xact we allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the superblock for the nlink flag: sector size + * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ STATIC uint -xfs_calc_create_reservation(xfs_mount_t *mp) +xfs_calc_create_reservation( + struct xfs_mount *mp) { - return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, 1) + + XFS_DIROP_LOG_RES(mp) + + 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), + (3 * mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + + XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * Making a new directory is the same as creating a new file. + */ STATIC uint -xfs_calc_mkdir_reservation(xfs_mount_t *mp) +xfs_calc_mkdir_reservation( + struct xfs_mount *mp) { - return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return xfs_calc_create_reservation(mp); } +/* + * In freeing an inode we can modify: + * the inode being freed: inode size + * the super block free inode counter: sector size + * the agi hash list and counters: sector size + * the inode btree entry: block size + * the on disk inode before ours in the agi hash list: inode cluster size + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ STATIC uint -xfs_calc_ifree_reservation(xfs_mount_t *mp) +xfs_calc_ifree_reservation( + struct xfs_mount *mp) { - return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, 1) + + MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), + XFS_INODE_CLUSTER_SIZE(mp)) + + 128 * 5 + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * When only changing the inode we log the inode and possibly the superblock + * We also add a bit of slop for the transaction stuff. + */ STATIC uint -xfs_calc_ichange_reservation(xfs_mount_t *mp) +xfs_calc_ichange_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + 512; + } +/* + * Growing the data section of the filesystem. + * superblock + * agi and agf + * allocation btrees + */ STATIC uint -xfs_calc_growdata_reservation(xfs_mount_t *mp) +xfs_calc_growdata_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWDATA_LOG_RES(mp); + return mp->m_sb.sb_sectsize * 3 + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * Growing the rt section of the filesystem. + * In the first set of transactions (ALLOC) we allocate space to the + * bitmap or summary files. + * superblock: sector size + * agf of the ag from which the extent is allocated: sector size + * bmap btree for bitmap/summary inode: max depth * blocksize + * bitmap/summary inode: inode size + * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize + */ STATIC uint -xfs_calc_growrtalloc_reservation(xfs_mount_t *mp) +xfs_calc_growrtalloc_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWRTALLOC_LOG_RES(mp); + return 2 * mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + + mp->m_sb.sb_inodesize + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * Growing the rt section of the filesystem. + * In the second set of transactions (ZERO) we zero the new metadata blocks. + * one bitmap/summary block: blocksize + */ STATIC uint -xfs_calc_growrtzero_reservation(xfs_mount_t *mp) +xfs_calc_growrtzero_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWRTZERO_LOG_RES(mp); + return mp->m_sb.sb_blocksize + 128; } +/* + * Growing the rt section of the filesystem. + * In the third set of transactions (FREE) we update metadata without + * allocating any new blocks. + * superblock: sector size + * bitmap inode: inode size + * summary inode: inode size + * one bitmap block: blocksize + * summary blocks: new summary size + */ STATIC uint -xfs_calc_growrtfree_reservation(xfs_mount_t *mp) +xfs_calc_growrtfree_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWRTFREE_LOG_RES(mp); + return mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_inodesize + + mp->m_sb.sb_blocksize + + mp->m_rsumsize + + 128 * 5; } +/* + * Logging the inode modification timestamp on a synchronous write. + * inode + */ STATIC uint -xfs_calc_swrite_reservation(xfs_mount_t *mp) +xfs_calc_swrite_reservation( + struct xfs_mount *mp) { - return XFS_CALC_SWRITE_LOG_RES(mp); + return mp->m_sb.sb_inodesize + 128; } +/* + * Logging the inode mode bits when writing a setuid/setgid file + * inode + */ STATIC uint xfs_calc_writeid_reservation(xfs_mount_t *mp) { - return XFS_CALC_WRITEID_LOG_RES(mp); + return mp->m_sb.sb_inodesize + 128; } +/* + * Converting the inode from non-attributed to attributed. + * the inode being converted: inode size + * agf block and superblock (for block allocation) + * the new block (directory sized) + * bmap blocks for the new directory block + * allocation btrees + */ STATIC uint -xfs_calc_addafork_reservation(xfs_mount_t *mp) +xfs_calc_addafork_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize * 2 + + mp->m_dirblksize + + XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * Removing the attribute fork of a file + * the inode being truncated: inode size + * the inode's bmap btree: max depth * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_attrinval_reservation(xfs_mount_t *mp) +xfs_calc_attrinval_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ATTRINVAL_LOG_RES(mp); + return MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + + 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), + (4 * mp->m_sb.sb_sectsize + + 4 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 4) + + 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))); } +/* + * Setting an attribute. + * the inode getting the attribute + * the superblock for allocations + * the agfs extents are allocated from + * the attribute btree * max depth + * the inode allocation btree + * Since attribute transaction space is dependent on the size of the attribute, + * the calculation is done partially at mount time and partially at runtime. + */ STATIC uint -xfs_calc_attrset_reservation(xfs_mount_t *mp) +xfs_calc_attrset_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + + 128 * (2 + XFS_DA_NODE_MAXDEPTH); } +/* + * Removing an attribute. + * the inode: inode size + * the attribute btree could join: max depth * block size + * the inode bmap btree could join or split: max depth * block size + * And the bmap_finish transaction can free the attr blocks freed giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_attrrm_reservation(xfs_mount_t *mp) +xfs_calc_attrrm_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + + 128 * (1 + XFS_DA_NODE_MAXDEPTH + + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), + (2 * mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); } +/* + * Clearing a bad agino number in an agi hash bucket. + */ STATIC uint -xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) +xfs_calc_clear_agi_bucket_reservation( + struct xfs_mount *mp) { - return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp); + return mp->m_sb.sb_sectsize + 128; } /* @@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) */ void xfs_trans_init( - xfs_mount_t *mp) + struct xfs_mount *mp) { - xfs_trans_reservations_t *resp; + struct xfs_trans_reservations *resp = &mp->m_reservations; - resp = &(mp->m_reservations); resp->tr_write = xfs_calc_write_reservation(mp); resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); resp->tr_rename = xfs_calc_rename_reservation(mp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 8c69e78..e639e8e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) /* - * Various log reservation values. - * These are based on the size of the file system block - * because that is what most transactions manipulate. - * Each adds in an additional 128 bytes per item logged to - * try to account for the overhead of the transaction mechanism. - * - * Note: - * Most of the reservations underestimate the number of allocation - * groups into which they could free extents in the xfs_bmap_finish() - * call. This is because the number in the worst case is quite high - * and quite unusual. In order to fix this we need to change - * xfs_bmap_finish() to free extents in only a single AG at a time. - * This will require changes to the EFI code as well, however, so that - * the EFI for the extents not freed is logged again in each transaction. - * See bug 261917. - */ - -/* * Per-extent log reservation for the allocation btree changes * involved in freeing or allocating an extent. * 2 trees * (2 blocks/level * max depth - 1) * block size @@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) -/* - * In a write transaction we can allocate a maximum of 2 - * extents. This gives: - * the inode getting the new extents: inode size - * the inode's bmap btree: max depth * block size - * the agfs of the ags from which the extents are allocated: 2 * sector - * the superblock free block counter: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - * And the bmap_finish transaction can free bmap blocks in a join: - * the agfs of the ags containing the blocks: 2 * sector size - * the agfls of the ags containing the blocks: 2 * sector size - * the super block free block counter: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_WRITE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\ - ((2 * (mp)->m_sb.sb_sectsize) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) #define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write) - -/* - * In truncating a file we free up to two extents at once. We can modify: - * the inode being truncated: inode size - * the inode's bmap btree: (max depth + 1) * block size - * And the bmap_finish transaction can free the blocks and bmap blocks: - * the agf for each of the ags: 4 * sector size - * the agfl for each of the ags: 4 * sector size - * the super block to reflect the freed blocks: sector size - * worst case split in allocation btrees per extent assuming 4 extents: - * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \ - (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \ - ((4 * (mp)->m_sb.sb_sectsize) + \ - (4 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 4) + \ - (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \ - (128 * 5) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) - -/* - * In renaming a files we can modify: - * the four inodes involved: 4 * inode size - * the two directory btrees: 2 * (max depth + v2) * dir block size - * the two directory bmap btrees: 2 * max depth * block size - * And the bmap_finish transaction can free dir and bmap blocks (two sets - * of bmap blocks) giving: - * the agf for the ags in which the blocks live: 3 * sector size - * the agfl for the ags in which the blocks live: 3 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_RENAME_LOG_RES(mp) \ - (MAX( \ - ((4 * (mp)->m_sb.sb_inodesize) + \ - (2 * XFS_DIROP_LOG_RES(mp)) + \ - (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \ - ((3 * (mp)->m_sb.sb_sectsize) + \ - (3 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 3) + \ - (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3)))))) - #define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename) - -/* - * For creating a link to an inode: - * the parent directory inode: inode size - * the linked inode: inode size - * the directory btree could split: (max depth + v2) * dir block size - * the directory bmap btree could join or split: (max depth + v2) * blocksize - * And the bmap_finish transaction can free some bmap blocks giving: - * the agf for the ag in which the blocks live: sector size - * the agfl for the ag in which the blocks live: sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_LINK_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - XFS_DIROP_LOG_RES(mp) + \ - (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \ - ((mp)->m_sb.sb_sectsize + \ - (mp)->m_sb.sb_sectsize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link) - -/* - * For removing a directory entry we can modify: - * the parent directory inode: inode size - * the removed inode: inode size - * the directory btree could join: (max depth + v2) * dir block size - * the directory bmap btree could join or split: (max depth + v2) * blocksize - * And the bmap_finish transaction can free the dir and bmap blocks giving: - * the agf for the ag in which the blocks live: 2 * sector size - * the agfl for the ag in which the blocks live: 2 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_REMOVE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - XFS_DIROP_LOG_RES(mp) + \ - (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \ - ((2 * (mp)->m_sb.sb_sectsize) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) - #define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove) - -/* - * For symlink we can modify: - * the parent directory inode: inode size - * the new inode: inode size - * the inode btree entry: 1 block - * the directory btree: (max depth + v2) * dir block size - * the directory inode's bmap btree: (max depth + v2) * block size - * the blocks for the symlink: 1 kB - * Or in the first xact we allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_SYMLINK_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B(mp, 1) + \ - XFS_DIROP_LOG_RES(mp) + \ - 1024 + \ - (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \ - (2 * (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ - XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) - -/* - * For create we can modify: - * the parent directory inode: inode size - * the new inode: inode size - * the inode btree entry: block size - * the superblock for the nlink flag: sector size - * the directory btree: (max depth + v2) * dir block size - * the directory inode's bmap btree: (max depth + v2) * block size - * Or in the first xact we allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -#define XFS_CALC_CREATE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B(mp, 1) + \ - XFS_DIROP_LOG_RES(mp) + \ - (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \ - (3 * (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ - XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) - -/* - * Making a new directory is the same as creating a new file. - */ -#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp) - #define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir) - -/* - * In freeing an inode we can modify: - * the inode being freed: inode size - * the super block free inode counter: sector size - * the agi hash list and counters: sector size - * the inode btree entry: block size - * the on disk inode before ours in the agi hash list: inode cluster size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -#define XFS_CALC_IFREE_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), 1) + \ - MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \ - (128 * 5) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - - #define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree) - -/* - * When only changing the inode we log the inode and possibly the superblock - * We also add a bit of slop for the transaction stuff. - */ -#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + 512) - #define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange) - -/* - * Growing the data section of the filesystem. - * superblock - * agi and agf - * allocation btrees - */ -#define XFS_CALC_GROWDATA_LOG_RES(mp) \ - ((mp)->m_sb.sb_sectsize * 3 + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - #define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata) - -/* - * Growing the rt section of the filesystem. - * In the first set of transactions (ALLOC) we allocate space to the - * bitmap or summary files. - * superblock: sector size - * agf of the ag from which the extent is allocated: sector size - * bmap btree for bitmap/summary inode: max depth * blocksize - * bitmap/summary inode: inode size - * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize - */ -#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \ - (2 * (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \ - (mp)->m_sb.sb_inodesize + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * \ - (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - #define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc) - -/* - * Growing the rt section of the filesystem. - * In the second set of transactions (ZERO) we zero the new metadata blocks. - * one bitmap/summary block: blocksize - */ -#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \ - ((mp)->m_sb.sb_blocksize + 128) - #define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero) - -/* - * Growing the rt section of the filesystem. - * In the third set of transactions (FREE) we update metadata without - * allocating any new blocks. - * superblock: sector size - * bitmap inode: inode size - * summary inode: inode size - * one bitmap block: blocksize - * summary blocks: new summary size - */ -#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \ - ((mp)->m_sb.sb_sectsize + \ - 2 * (mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_blocksize + \ - (mp)->m_rsumsize + \ - (128 * 5)) - #define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree) - -/* - * Logging the inode modification timestamp on a synchronous write. - * inode - */ -#define XFS_CALC_SWRITE_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + 128) - #define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) - /* * Logging the inode timestamps on an fsync -- same as SWRITE * as long as SWRITE logs the entire inode core */ #define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) - -/* - * Logging the inode mode bits when writing a setuid/setgid file - * inode - */ -#define XFS_CALC_WRITEID_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + 128) - #define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) - -/* - * Converting the inode from non-attributed to attributed. - * the inode being converted: inode size - * agf block and superblock (for block allocation) - * the new block (directory sized) - * bmap blocks for the new directory block - * allocation btrees - */ -#define XFS_CALC_ADDAFORK_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize * 2 + \ - (mp)->m_dirblksize + \ - XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - #define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) - -/* - * Removing the attribute fork of a file - * the inode being truncated: inode size - * the inode's bmap btree: max depth * block size - * And the bmap_finish transaction can free the blocks and bmap blocks: - * the agf for each of the ags: 4 * sector size - * the agfl for each of the ags: 4 * sector size - * the super block to reflect the freed blocks: sector size - * worst case split in allocation btrees per extent assuming 4 extents: - * 4 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \ - (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \ - ((4 * (mp)->m_sb.sb_sectsize) + \ - (4 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 4) + \ - (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))))) - #define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) - -/* - * Setting an attribute. - * the inode getting the attribute - * the superblock for allocations - * the agfs extents are allocated from - * the attribute btree * max depth - * the inode allocation btree - * Since attribute transaction space is dependent on the size of the attribute, - * the calculation is done partially at mount time and partially at runtime. - */ -#define XFS_CALC_ATTRSET_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \ - (128 * (2 + XFS_DA_NODE_MAXDEPTH))) - #define XFS_ATTRSET_LOG_RES(mp, ext) \ ((mp)->m_reservations.tr_attrset + \ (ext * (mp)->m_sb.sb_sectsize) + \ (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) - -/* - * Removing an attribute. - * the inode: inode size - * the attribute btree could join: max depth * block size - * the inode bmap btree could join or split: max depth * block size - * And the bmap_finish transaction can free the attr blocks freed giving: - * the agf for the ag in which the blocks live: 2 * sector size - * the agfl for the ag in which the blocks live: 2 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_ATTRRM_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \ - (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \ - ((2 * (mp)->m_sb.sb_sectsize) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) - #define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) - -/* - * Clearing a bad agino number in an agi hash bucket. - */ -#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \ - ((mp)->m_sb.sb_sectsize + 128) - #define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9d376be..a06bd62 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -267,7 +267,7 @@ xfs_setattr( if (code) { ASSERT(tp == NULL); lock_flags &= ~XFS_ILOCK_EXCL; - ASSERT(lock_flags == XFS_IOLOCK_EXCL); + ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock); goto error_return; } tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); |