diff options
Diffstat (limited to 'fs')
52 files changed, 436 insertions, 269 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 14a8644..69357c0 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -188,7 +188,8 @@ static void v9fs_kill_super(struct super_block *s) P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); - v9fs_dentry_release(s->s_root); /* clunk root */ + if (s->s_root) + v9fs_dentry_release(s->s_root); /* clunk root */ kill_anon_super(s); diff --git a/fs/affs/affs.h b/fs/affs/affs.h index e511dc6..0e40caa 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -106,8 +106,8 @@ struct affs_sb_info { u32 s_last_bmap; struct buffer_head *s_bmap_bh; char *s_prefix; /* Prefix for volumes and assigns. */ - int s_prefix_len; /* Length of prefix. */ char s_volume[32]; /* Volume prefix for absolute symlinks. */ + spinlock_t symlink_lock; /* protects the previous two */ }; #define SF_INTL 0x0001 /* International filesystem. */ diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 960d336..d70bbba 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -341,10 +341,13 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) p = (char *)AFFS_HEAD(bh)->table; lc = '/'; if (*symname == '/') { + struct affs_sb_info *sbi = AFFS_SB(sb); while (*symname == '/') symname++; - while (AFFS_SB(sb)->s_volume[i]) /* Cannot overflow */ - *p++ = AFFS_SB(sb)->s_volume[i++]; + spin_lock(&sbi->symlink_lock); + while (sbi->s_volume[i]) /* Cannot overflow */ + *p++ = sbi->s_volume[i++]; + spin_unlock(&sbi->symlink_lock); } while (i < maxlen && (c = *symname++)) { if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') { diff --git a/fs/affs/super.c b/fs/affs/super.c index 104fdcb..d41e967 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -203,7 +203,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s switch (token) { case Opt_bs: if (match_int(&args[0], &n)) - return -EINVAL; + return 0; if (n != 512 && n != 1024 && n != 2048 && n != 4096) { printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n"); @@ -213,7 +213,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s break; case Opt_mode: if (match_octal(&args[0], &option)) - return 1; + return 0; *mode = option & 0777; *mount_opts |= SF_SETMODE; break; @@ -221,8 +221,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s *mount_opts |= SF_MUFS; break; case Opt_prefix: - /* Free any previous prefix */ - kfree(*prefix); *prefix = match_strdup(&args[0]); if (!*prefix) return 0; @@ -233,21 +231,21 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s break; case Opt_reserved: if (match_int(&args[0], reserved)) - return 1; + return 0; break; case Opt_root: if (match_int(&args[0], root)) - return 1; + return 0; break; case Opt_setgid: if (match_int(&args[0], &option)) - return 1; + return 0; *gid = option; *mount_opts |= SF_SETGID; break; case Opt_setuid: if (match_int(&args[0], &option)) - return -EINVAL; + return 0; *uid = option; *mount_opts |= SF_SETUID; break; @@ -311,11 +309,14 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; sb->s_fs_info = sbi; mutex_init(&sbi->s_bmlock); + spin_lock_init(&sbi->symlink_lock); if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, &blocksize,&sbi->s_prefix, sbi->s_volume, &mount_flags)) { printk(KERN_ERR "AFFS: Error parsing options\n"); + kfree(sbi->s_prefix); + kfree(sbi); return -EINVAL; } /* N.B. after this point s_prefix must be released */ @@ -516,14 +517,18 @@ affs_remount(struct super_block *sb, int *flags, char *data) unsigned long mount_flags; int res = 0; char *new_opts = kstrdup(data, GFP_KERNEL); + char volume[32]; + char *prefix = NULL; pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); *flags |= MS_NODIRATIME; + memcpy(volume, sbi->s_volume, 32); if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, - &blocksize, &sbi->s_prefix, sbi->s_volume, + &blocksize, &prefix, volume, &mount_flags)) { + kfree(prefix); kfree(new_opts); return -EINVAL; } @@ -534,6 +539,14 @@ affs_remount(struct super_block *sb, int *flags, char *data) sbi->s_mode = mode; sbi->s_uid = uid; sbi->s_gid = gid; + /* protect against readers */ + spin_lock(&sbi->symlink_lock); + if (prefix) { + kfree(sbi->s_prefix); + sbi->s_prefix = prefix; + } + memcpy(sbi->s_volume, volume, 32); + spin_unlock(&sbi->symlink_lock); if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { unlock_kernel(); diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c index 4178253..ee00f08 100644 --- a/fs/affs/symlink.c +++ b/fs/affs/symlink.c @@ -20,7 +20,6 @@ static int affs_symlink_readpage(struct file *file, struct page *page) int i, j; char c; char lc; - char *pf; pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino); @@ -32,11 +31,15 @@ static int affs_symlink_readpage(struct file *file, struct page *page) j = 0; lf = (struct slink_front *)bh->b_data; lc = 0; - pf = AFFS_SB(inode->i_sb)->s_prefix ? AFFS_SB(inode->i_sb)->s_prefix : "/"; if (strchr(lf->symname,':')) { /* Handle assign or volume name */ + struct affs_sb_info *sbi = AFFS_SB(inode->i_sb); + char *pf; + spin_lock(&sbi->symlink_lock); + pf = sbi->s_prefix ? sbi->s_prefix : "/"; while (i < 1023 && (c = pf[i])) link[i++] = c; + spin_unlock(&sbi->symlink_lock); while (i < 1023 && lf->symname[j] != ':') link[i++] = lf->symname[j++]; if (i < 1023) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 6f60336..8f3d9fd 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -353,35 +353,35 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct inode *inode; unsigned i, imap_len; struct bfs_sb_info *info; - long ret = -EINVAL; + int ret = -EINVAL; unsigned long i_sblock, i_eblock, i_eoff, s_size; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; + mutex_init(&info->bfs_lock); s->s_fs_info = info; sb_set_blocksize(s, BFS_BSIZE); - bh = sb_bread(s, 0); - if(!bh) + info->si_sbh = sb_bread(s, 0); + if (!info->si_sbh) goto out; - bfs_sb = (struct bfs_super_block *)bh->b_data; + bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data; if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { if (!silent) printf("No BFS filesystem on %s (magic=%08x)\n", s->s_id, le32_to_cpu(bfs_sb->s_magic)); - goto out; + goto out1; } if (BFS_UNCLEAN(bfs_sb, s) && !silent) printf("%s is unclean, continuing\n", s->s_id); s->s_magic = BFS_MAGIC; - info->si_sbh = bh; if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { printf("Superblock is corrupted\n"); - goto out; + goto out1; } info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / @@ -390,7 +390,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) imap_len = (info->si_lasti / 8) + 1; info->si_imap = kzalloc(imap_len, GFP_KERNEL); if (!info->si_imap) - goto out; + goto out1; for (i = 0; i < BFS_ROOT_INO; i++) set_bit(i, info->si_imap); @@ -398,15 +398,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) inode = bfs_iget(s, BFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); - kfree(info->si_imap); - goto out; + goto out2; } s->s_root = d_alloc_root(inode); if (!s->s_root) { iput(inode); ret = -ENOMEM; - kfree(info->si_imap); - goto out; + goto out2; } info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; @@ -419,10 +417,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) bh = sb_bread(s, info->si_blocks - 1); if (!bh) { printf("Last block not available: %lu\n", info->si_blocks - 1); - iput(inode); ret = -EIO; - kfree(info->si_imap); - goto out; + goto out3; } brelse(bh); @@ -459,11 +455,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) printf("Inode 0x%08x corrupted\n", i); brelse(bh); - s->s_root = NULL; - kfree(info->si_imap); - kfree(info); - s->s_fs_info = NULL; - return -EIO; + ret = -EIO; + goto out3; } if (!di->i_ino) { @@ -483,11 +476,17 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_dirt = 1; } dump_imap("read_super", s); - mutex_init(&info->bfs_lock); return 0; +out3: + dput(s->s_root); + s->s_root = NULL; +out2: + kfree(info->si_imap); +out1: + brelse(info->si_sbh); out: - brelse(bh); + mutex_destroy(&info->bfs_lock); kfree(info); s->s_fs_info = NULL; return ret; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 346b694..fdd3970 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) #else set_personality(PER_LINUX); #endif + setup_new_exec(bprm); current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index edd90c4..fd5b2ea 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - /* - * The early SET_PERSONALITY here is so that the lookup - * for the interpreter happens in the namespace of the - * to-be-execed image. SET_PERSONALITY can select an - * alternate root. - * - * However, SET_PERSONALITY is NOT allowed to switch - * this task into the new images's memory mapping - * policy - that is, TASK_SIZE must still evaluate to - * that which is appropriate to the execing application. - * This is because exit_mmap() needs to have TASK_SIZE - * evaluate to the size of the old image. - * - * So if (say) a 64-bit application is execing a 32-bit - * application it is the architecture's responsibility - * to defer changing the value of TASK_SIZE until the - * switch really is going to happen - do this in - * flush_thread(). - akpm - */ - SET_PERSONALITY(loc->elf_ex); - interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) @@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Verify the interpreter has a valid arch */ if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; - } else { - /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; - arch_pick_mmap_layout(current->mm); + + setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c57d9ce..18d7729 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, set_personality(PER_LINUX_FDPIC); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; + + setup_new_exec(bprm); + set_binfmt(&elf_fdpic_format); current->mm->start_code = 0; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index d4a00ea..42c6b4a 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* OK, This is the point of no return */ set_personality(PER_LINUX_32BIT); + setup_new_exec(bprm); } /* diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 2a9b533..cc8560f 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, This is the point of no return */ current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; + setup_new_exec(bprm); /* Set the task size for HP-UX processes such that * the gateway page is outside the address space. diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 49a34e7..a16f29e 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -61,7 +61,7 @@ static inline unsigned int vecs_to_idx(unsigned int nr) static inline int use_bip_pool(unsigned int idx) { - if (idx == BIOVEC_NR_POOLS) + if (idx == BIOVEC_MAX_IDX) return 1; return 0; @@ -95,6 +95,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, /* Use mempool if lower order alloc failed or max vecs were requested */ if (bip == NULL) { + idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */ bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); if (unlikely(bip == NULL)) { @@ -542,13 +542,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page if (page == prev->bv_page && offset == prev->bv_offset + prev->bv_len) { + unsigned int prev_bv_len = prev->bv_len; prev->bv_len += len; if (q->merge_bvec_fn) { struct bvec_merge_data bvm = { + /* prev_bvec is already charged in + bi_size, discharge it in order to + simulate merging updated prev_bvec + as new bvec. */ .bi_bdev = bio->bi_bdev, .bi_sector = bio->bi_sector, - .bi_size = bio->bi_size, + .bi_size = bio->bi_size - prev_bv_len, .bi_rw = bio->bi_rw, }; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 54f4798..6df6d6e 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -244,6 +244,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, ACL_TYPE_ACCESS); } } + posix_acl_release(clone); } failed: posix_acl_release(acl); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9f806dd..2aa8ec6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1161,6 +1161,7 @@ struct btrfs_root { #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) #define BTRFS_MOUNT_NOSSD (1 << 9) #define BTRFS_MOUNT_DISCARD (1 << 10) +#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 009e3bd..2b59201 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1982,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_recover_relocation(tree_root); - BUG_ON(ret); + if (ret < 0) { + printk(KERN_WARNING + "btrfs: failed to recover relocation\n"); + err = -EINVAL; + goto fail_trans_kthread; + } } location.objectid = BTRFS_FS_TREE_OBJECTID; @@ -1993,6 +1998,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->fs_root) goto fail_trans_kthread; + if (!(sb->s_flags & MS_RDONLY)) { + down_read(&fs_info->cleanup_work_sem); + btrfs_orphan_cleanup(fs_info->fs_root); + up_read(&fs_info->cleanup_work_sem); + } + return tree_root; fail_trans_kthread: diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 432a2da..559f724 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5402,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, int ret; while (level >= 0) { - if (path->slots[level] >= - btrfs_header_nritems(path->nodes[level])) - break; - ret = walk_down_proc(trans, root, path, wc, lookup_info); if (ret > 0) break; @@ -5413,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (level == 0) break; + if (path->slots[level] >= + btrfs_header_nritems(path->nodes[level])) + break; + ret = do_walk_down(trans, root, path, wc, &lookup_info); if (ret > 0) { path->slots[level]++; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96577e8..b177ed3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, spin_unlock(&tree->buffer_lock); goto free_eb; } - spin_unlock(&tree->buffer_lock); - /* add one reference for the tree */ atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); return eb; free_eb: diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 46bea0f..428fcac 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, return NULL; } -/* - * look for an offset in the tree, and if it can't be found, return - * the first offset we can find smaller than 'offset'. - */ -static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) -{ - struct rb_node *prev; - struct rb_node *ret; - ret = __tree_search(root, offset, &prev, NULL); - if (!ret) - return prev; - return ret; -} - /* check to see if two extent_map structs are adjacent and safe to merge */ static int mergable_maps(struct extent_map *prev, struct extent_map *next) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c020335..9d08096 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1133,7 +1133,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_lock(&dentry->d_inode->i_mutex); out: - return ret > 0 ? EIO : ret; + return ret > 0 ? -EIO : ret; } static const struct vm_operations_struct btrfs_file_vm_ops = { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b330e27c..4deb280 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -483,7 +483,8 @@ again: nr_pages_ret = 0; /* flag the file so we don't compress in the future */ - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + if (!btrfs_test_opt(root, FORCE_COMPRESS)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; } if (will_compress) { *num_added += 1; @@ -1680,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * before we start the transaction. It limits the amount of btree * reads required while inside the transaction. */ -static noinline void reada_csum(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_ordered_extent *ordered_extent) -{ - struct btrfs_ordered_sum *sum; - u64 bytenr; - - sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, - list); - bytenr = sum->sums[0].bytenr; - - /* - * we don't care about the results, the point of this search is - * just to get the btree leaves into ram - */ - btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); -} - /* as ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. @@ -1708,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_path *path; int compressed = 0; int ret; @@ -1716,32 +1698,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (!ret) return 0; - /* - * before we join the transaction, try to do some of our IO. - * This will limit the amount of IO that we have to do with - * the transaction running. We're unlikely to need to do any - * IO if the file extents are new, the disk_i_size checks - * covers the most common case. - */ - if (start < BTRFS_I(inode)->disk_i_size) { - path = btrfs_alloc_path(); - if (path) { - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, - start, 0); - ordered_extent = btrfs_lookup_ordered_extent(inode, - start); - if (!list_empty(&ordered_extent->list)) { - btrfs_release_path(root, path); - reada_csum(root, path, ordered_extent); - } - btrfs_free_path(path); - } - } - - if (!ordered_extent) - ordered_extent = btrfs_lookup_ordered_extent(inode, start); + ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); @@ -3796,12 +3755,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (location.type == BTRFS_INODE_ITEM_KEY) { inode = btrfs_iget(dir->i_sb, &location, root); - if (unlikely(root->clean_orphans) && - !(inode->i_sb->s_flags & MS_RDONLY)) { - down_read(&root->fs_info->cleanup_work_sem); - btrfs_orphan_cleanup(root); - up_read(&root->fs_info->cleanup_work_sem); - } return inode; } @@ -5799,7 +5752,7 @@ out_fail: } static int prealloc_file_range(struct inode *inode, u64 start, u64 end, - u64 alloc_hint, int mode) + u64 alloc_hint, int mode, loff_t actual_len) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -5808,6 +5761,7 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, u64 cur_offset = start; u64 num_bytes = end - start; int ret = 0; + u64 i_size; while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); @@ -5845,9 +5799,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - cur_offset > inode->i_size) { - i_size_write(inode, cur_offset); - btrfs_ordered_update_i_size(inode, cur_offset, NULL); + (actual_len > inode->i_size) && + (cur_offset > inode->i_size)) { + + if (cur_offset > actual_len) + i_size = actual_len; + else + i_size = cur_offset; + i_size_write(inode, i_size); + btrfs_ordered_update_i_size(inode, i_size, NULL); } ret = btrfs_update_inode(trans, root, inode); @@ -5940,7 +5900,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { ret = prealloc_file_range(inode, cur_offset, last_byte, - alloc_hint, mode); + alloc_hint, mode, offset+len); if (ret < 0) { free_extent_map(em); break; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ed3e4a2..ab7ab53 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3764,7 +3764,8 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); - btrfs_orphan_cleanup(fs_root); + else + btrfs_orphan_cleanup(fs_root); } return err; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3f9b457..8a1ea6e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -66,7 +66,8 @@ enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, - Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, + Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, + Opt_flushoncommit, Opt_discard, Opt_err, }; @@ -82,6 +83,7 @@ static match_table_t tokens = { {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, {Opt_compress, "compress"}, + {Opt_compress_force, "compress-force"}, {Opt_ssd, "ssd"}, {Opt_ssd_spread, "ssd_spread"}, {Opt_nossd, "nossd"}, @@ -173,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: use compression\n"); btrfs_set_opt(info->mount_opt, COMPRESS); break; + case Opt_compress_force: + printk(KERN_INFO "btrfs: forcing compression\n"); + btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); + btrfs_set_opt(info->mount_opt, COMPRESS); + break; case Opt_ssd: printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); btrfs_set_opt(info->mount_opt, SSD); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 220dad5..41ecbb2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_metadata_alloc_bits; if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - root->fs_info->fs_devices->rw_devices <= 4) { + root->fs_info->fs_devices->num_devices <= 4) { printk(KERN_ERR "btrfs: unable to go below four devices " "on raid10\n"); ret = -EINVAL; @@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - root->fs_info->fs_devices->rw_devices <= 2) { + root->fs_info->fs_devices->num_devices <= 2) { printk(KERN_ERR "btrfs: unable to go below two " "devices on raid1\n"); ret = -EINVAL; @@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EINVAL; bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); - if (!bdev) - return -EIO; + if (IS_ERR(bdev)) + return PTR_ERR(bdev); if (root->fs_info->fs_devices->seeding) { seeding_dev = 1; @@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) if (!em) return 1; + if (btrfs_test_opt(root, DEGRADED)) { + free_extent_map(em); + return 0; + } + map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { if (!map->stripes[i].dev->writeable) { diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b486169..274ac86 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -160,15 +160,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode, * block. A pointer to that is in the struct vfsmount that we * have around. */ - if (!parent) { - if (debugfs_mount && debugfs_mount->mnt_sb) { - parent = debugfs_mount->mnt_sb->s_root; - } - } - if (!parent) { - pr_debug("debugfs: Ah! can not find a parent!\n"); - return -EFAULT; - } + if (!parent) + parent = debugfs_mount->mnt_sb->s_root; *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); diff --git a/fs/eventfd.c b/fs/eventfd.c index d26402f..7758cc3 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait) return events; } -static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; + ctx->count -= *cnt; +} + +/** + * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. + * @ctx: [in] Pointer to eventfd context. + * @wait: [in] Wait queue to be removed. + * @cnt: [out] Pointer to the 64bit conter value. + * + * Returns zero if successful, or the following error codes: + * + * -EAGAIN : The operation would have blocked. + * + * This is used to atomically remove a wait queue entry from the eventfd wait + * queue head, and read/reset the counter value. + */ +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, + __u64 *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->wqh.lock, flags); + eventfd_ctx_do_read(ctx, cnt); + __remove_wait_queue(&ctx->wqh, wait); + if (*cnt != 0 && waitqueue_active(&ctx->wqh)) + wake_up_locked_poll(&ctx->wqh, POLLOUT); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return *cnt != 0 ? 0 : -EAGAIN; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); + +/** + * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. + * @ctx: [in] Pointer to eventfd context. + * @no_wait: [in] Different from zero if the operation should not block. + * @cnt: [out] Pointer to the 64bit conter value. + * + * Returns zero if successful, or the following error codes: + * + * -EAGAIN : The operation would have blocked but @no_wait was nonzero. + * -ERESTARTSYS : A signal interrupted the wait operation. + * + * If @no_wait is zero, the function might sleep until the eventfd internal + * counter becomes greater than zero. + */ +ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt) { - struct eventfd_ctx *ctx = file->private_data; ssize_t res; - __u64 ucnt = 0; DECLARE_WAITQUEUE(wait, current); - if (count < sizeof(ucnt)) - return -EINVAL; spin_lock_irq(&ctx->wqh.lock); + *cnt = 0; res = -EAGAIN; if (ctx->count > 0) - res = sizeof(ucnt); - else if (!(file->f_flags & O_NONBLOCK)) { + res = 0; + else if (!no_wait) { __add_wait_queue(&ctx->wqh, &wait); - for (res = 0;;) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (ctx->count > 0) { - res = sizeof(ucnt); + res = 0; break; } if (signal_pending(current)) { @@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (likely(res > 0)) { - ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; - ctx->count -= ucnt; + if (likely(res == 0)) { + eventfd_ctx_do_read(ctx, cnt); if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, POLLOUT); } spin_unlock_irq(&ctx->wqh.lock); - if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) - return -EFAULT; return res; } +EXPORT_SYMBOL_GPL(eventfd_ctx_read); + +static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct eventfd_ctx *ctx = file->private_data; + ssize_t res; + __u64 cnt; + + if (count < sizeof(cnt)) + return -EINVAL; + res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt); + if (res < 0) + return res; + + return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt); +} static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) int flush_old_exec(struct linux_binprm * bprm) { - char * name; - int i, ch, retval; - char tcomm[sizeof(current->comm)]; + int retval; /* * Make sure we have a private signal table and that @@ -964,6 +962,25 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ + current->flags &= ~PF_RANDOMIZE; + flush_thread(); + current->personality &= ~bprm->per_clear; + + return 0; + +out: + return retval; +} +EXPORT_SYMBOL(flush_old_exec); + +void setup_new_exec(struct linux_binprm * bprm) +{ + int i, ch; + char * name; + char tcomm[sizeof(current->comm)]; + + arch_pick_mmap_layout(current->mm); + /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -985,9 +1002,6 @@ int flush_old_exec(struct linux_binprm * bprm) tcomm[i] = '\0'; set_task_comm(current, tcomm); - current->flags &= ~PF_RANDOMIZE; - flush_thread(); - /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on * some architectures like powerpc @@ -1003,8 +1017,6 @@ int flush_old_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - current->personality &= ~bprm->per_clear; - /* * Flush performance counters when crossing a * security domain: @@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current, 0); flush_old_files(current->files); - - return 0; - -out: - return retval; } - -EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(setup_new_exec); /* * Prepare credentials and lock ->cred_guard_mutex. diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af7b626..874d169 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -361,14 +361,11 @@ struct ext4_new_group_data { so set the magic i_delalloc_reserve_flag after taking the inode allocation semaphore for */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 - /* Call ext4_da_update_reserve_space() after successfully - allocating the blocks */ -#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 /* caller is from the direct IO path, request to creation of an unitialized extents if not allocated, split the uninitialized extent if blocks has been preallocated already*/ -#define EXT4_GET_BLOCKS_DIO 0x0010 -#define EXT4_GET_BLOCKS_CONVERT 0x0020 +#define EXT4_GET_BLOCKS_DIO 0x0008 +#define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* Convert extent to initialized after direct IO complete */ @@ -1443,6 +1440,8 @@ extern int ext4_block_truncate_page(handle_t *handle, extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); +extern void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7d7b74e..765a482 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3132,7 +3132,19 @@ out: unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, newblock + max_blocks, allocated - max_blocks); + allocated = max_blocks; } + + /* + * If we have done fallocate with the offset that is already + * delayed allocated, we would have block reservation + * and quota reservation done in the delayed write path. + * But fallocate would have already updated quota and block + * count for this offset. So cancel these reservation + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, allocated, 0); + map_out: set_buffer_mapped(bh_result); out1: @@ -3368,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); + if (allocated > max_blocks) + allocated = max_blocks; set_buffer_new(bh_result); /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, allocated, 1); + + /* * Cache the extent and update transaction to commit on fdatasync only * when it is _not_ an uninitialized extent. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c818972..e119524 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1053,11 +1053,12 @@ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) * Called with i_data_sem down, which is important since we can call * ext4_discard_preallocations() from here. */ -static void ext4_da_update_reserve_space(struct inode *inode, int used) +void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - int mdb_free = 0; + int mdb_free = 0, allocated_meta_blocks = 0; spin_lock(&ei->i_block_reservation_lock); if (unlikely(used > ei->i_reserved_data_blocks)) { @@ -1073,6 +1074,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) ei->i_reserved_data_blocks -= used; used += ei->i_allocated_meta_blocks; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; + allocated_meta_blocks = ei->i_allocated_meta_blocks; ei->i_allocated_meta_blocks = 0; percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); @@ -1090,9 +1092,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* Update quota subsystem */ - vfs_dq_claim_block(inode, used); - if (mdb_free) - vfs_dq_release_reservation_block(inode, mdb_free); + if (quota_claim) { + vfs_dq_claim_block(inode, used); + if (mdb_free) + vfs_dq_release_reservation_block(inode, mdb_free); + } else { + /* + * We did fallocate with an offset that is already delayed + * allocated. So on delayed allocated writeback we should + * not update the quota for allocated blocks. But then + * converting an fallocate region to initialized region would + * have caused a metadata allocation. So claim quota for + * that + */ + if (allocated_meta_blocks) + vfs_dq_claim_block(inode, allocated_meta_blocks); + vfs_dq_release_reservation_block(inode, mdb_free + used); + } /* * If we have done all the pending block allocations and if @@ -1292,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, */ EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; } - } + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. We don't + * support fallocate for non extent files. So we can update + * reserve space here. + */ + if ((retval > 0) && + (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) + ext4_da_update_reserve_space(inode, retval, 1); + } if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 0; - /* - * Update reserved blocks/metadata blocks after successful - * block allocation which had been deferred till now. - */ - if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) - ext4_da_update_reserve_space(inode, retval); - up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && buffer_mapped(bh)) { int ret = check_block_validity(inode, "file system " @@ -1835,24 +1853,12 @@ repeat: * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, md_needed + 1)) { - /* - * We tend to badly over-estimate the amount of - * metadata blocks which are needed, so if we have - * reserved any metadata blocks, try to force out the - * inode and see if we have any better luck. - */ - if (md_reserved && retries++ <= 3) - goto retry; + if (vfs_dq_reserve_block(inode, md_needed + 1)) return -EDQUOT; - } if (ext4_claim_free_blocks(sbi, md_needed + 1)) { vfs_dq_release_reservation_block(inode, md_needed + 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - retry: - if (md_reserved) - write_inode_now(inode, (retries == 3)); yield(); goto repeat; } @@ -2213,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * variables are updated after the blocks have been allocated. */ new.b_state = 0; - get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | - EXT4_GET_BLOCKS_DELALLOC_RESERVE); + get_blocks_flags = EXT4_GET_BLOCKS_CREATE; if (mpd->b_state & (1 << BH_Delay)) - get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; + get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; + blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, &new, get_blocks_flags); if (blks < 0) { @@ -3032,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int ret, retries = 0; + int ret, retries = 0, quota_retries = 0; struct page *page; pgoff_t index; unsigned from, to; @@ -3091,6 +3097,22 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; + + if ((ret == -EDQUOT) && + EXT4_I(inode)->i_reserved_meta_blocks && + (quota_retries++ < 3)) { + /* + * Since we often over-estimate the number of meta + * data blocks required, we may sometimes get a + * spurios out of quota error even though there would + * be enough space once we write the data blocks and + * find out how many meta data blocks were _really_ + * required. So try forcing the inode write to see if + * that helps. + */ + write_inode_now(inode, (quota_retries == 3)); + goto retry; + } out: return ret; } @@ -199,7 +199,9 @@ static int setfl(int fd, struct file * filp, unsigned long arg) static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int force) { - write_lock_irq(&filp->f_owner.lock); + unsigned long flags; + + write_lock_irqsave(&filp->f_owner.lock, flags); if (force || !filp->f_owner.pid) { put_pid(filp->f_owner.pid); filp->f_owner.pid = get_pid(pid); @@ -211,7 +213,7 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, filp->f_owner.euid = cred->euid; } } - write_unlock_irq(&filp->f_owner.lock); + write_unlock_irqrestore(&filp->f_owner.lock, flags); } int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c18913a..a9f5e13 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -828,6 +828,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (!page) break; + if (mapping_writably_mapped(mapping)) + flush_dcache_page(page); + pagefault_disable(); tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); pagefault_enable(); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f455a03..f426633 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (!gl) return -ENOMEM; + atomic_inc(&sdp->sd_glock_disposal); gl->gl_flags = 0; gl->gl_name = name; atomic_set(&gl->gl_ref, 1); @@ -1538,6 +1539,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) up_write(&gfs2_umount_flush_sem); msleep(10); } + flush_workqueue(glock_workqueue); + wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); + gfs2_dump_lockstate(sdp); } void gfs2_glock_finish_truncate(struct gfs2_inode *ip) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 13f0bd2..c0262fa 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -123,7 +123,7 @@ struct lm_lockops { int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); - void (*lm_put_lock) (struct kmem_cache *cachep, void *gl); + void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); unsigned int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 4792200..bc0ad15 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -544,6 +544,8 @@ struct gfs2_sbd { struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_trans_gl; + wait_queue_head_t sd_glock_wait; + atomic_t sd_glock_disposal; /* Inode Stuff */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 46df988..0e5e0e7 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -21,6 +21,7 @@ static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; + struct gfs2_sbd *sdp = gl->gl_sbd; BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -30,6 +31,8 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ kmem_cache_free(gfs2_glock_cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); return; case -DLM_ECANCEL: /* Cancel while getting lock */ ret |= LM_OUT_CANCELED; @@ -164,14 +167,16 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, return LM_OUT_ASYNC; } -static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) +static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) { - struct gfs2_glock *gl = ptr; - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (gl->gl_lksb.sb_lkid == 0) { kmem_cache_free(cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); return; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index edfee24..8a102f7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -82,6 +82,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) gfs2_tune_init(&sdp->sd_tune); + init_waitqueue_head(&sdp->sd_glock_wait); + atomic_set(&sdp->sd_glock_disposal, 0); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); @@ -983,9 +985,17 @@ static const match_table_t nolock_tokens = { { Opt_err, NULL }, }; +static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + kmem_cache_free(cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); +} + static const struct lm_lockops nolock_ops = { .lm_proto_name = "lock_nolock", - .lm_put_lock = kmem_cache_free, + .lm_put_lock = nolock_put_lock, .lm_tokens = &nolock_tokens, }; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 78f73ca..84350e1 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1088,7 +1088,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) error = vfs_follow_link(nd, buf); if (buf != array) kfree(buf); - } + } else + path_put(&nd->path); return ERR_PTR(error); } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0608f49..503b842 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -591,11 +591,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip) u64 rgrp_count = ip->i_disksize; int error; - if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) { - gfs2_consist_inode(ip); - return -EIO; - } - + do_div(rgrp_count, sizeof(struct gfs2_rindex)); clear_rgrpdi(sdp); file_ra_state_init(&ra_state, inode->i_mapping); @@ -915,7 +911,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) { BUG_ON(ip->i_alloc != NULL); - ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL); + ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); return ip->i_alloc; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index c282ad4..b9dd3da 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -21,6 +21,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/time.h> +#include <linux/wait.h> #include "gfs2.h" #include "incore.h" diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6b89132..63f2071 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) { dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); + if (gfp & __GFP_WAIT) + nfs_wb_page(page->mapping->host, page); /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index faa0918..f141bde 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1261,8 +1261,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { + umode_t newmode = inode->i_mode & S_IFMT; + newmode |= fattr->mode & S_IALLUGO; + inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - inode->i_mode = fattr->mode; } } else if (server->caps & NFS_CAP_MODE) invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 865265b..0c6fda3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -146,6 +146,7 @@ enum { NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ + NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ }; struct nfs4_state { @@ -277,6 +278,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); +extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 198d51d..375f0fa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -249,19 +249,15 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, if (state == NULL) break; nfs4_state_mark_reclaim_nograce(clp, state); - case -NFS4ERR_STALE_CLIENTID: + goto do_state_recovery; case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(clp); - ret = nfs4_wait_clnt_recover(clp); - if (ret == 0) - exception->retry = 1; -#if !defined(CONFIG_NFS_V4_1) - break; -#else /* !defined(CONFIG_NFS_V4_1) */ - if (!nfs4_has_session(server->nfs_client)) + if (state == NULL) break; - /* FALLTHROUGH */ + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_EXPIRED: + goto do_state_recovery; +#if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: @@ -274,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, nfs4_schedule_state_recovery(clp); exception->retry = 1; break; -#endif /* !defined(CONFIG_NFS_V4_1) */ +#endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { /* We have retried a decent amount, time to @@ -293,6 +289,12 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, } /* We failed to handle the error */ return nfs4_map_errors(ret); +do_state_recovery: + nfs4_schedule_state_recovery(clp); + ret = nfs4_wait_clnt_recover(clp); + if (ret == 0) + exception->retry = 1; + return ret; } @@ -1658,6 +1660,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = PTR_ERR(state); if (IS_ERR(state)) goto err_opendata_put; + if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) + set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; @@ -3422,15 +3426,14 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (state == NULL) break; nfs4_state_mark_reclaim_nograce(clp, state); - case -NFS4ERR_STALE_CLIENTID: + goto do_state_recovery; case -NFS4ERR_STALE_STATEID: + if (state == NULL) + break; + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - nfs4_schedule_state_recovery(clp); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - task->tk_status = 0; - return -EAGAIN; + goto do_state_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -3458,6 +3461,13 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, } task->tk_status = nfs4_map_errors(task->tk_status); return 0; +do_state_recovery: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + task->tk_status = 0; + return -EAGAIN; } static int @@ -4088,6 +4098,28 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = { .rpc_release = nfs4_lock_release, }; +static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state *state = lsp->ls_state; + + switch (error) { + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_nograce(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + break; + case -NFS4ERR_STALE_STATEID: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_reboot(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + }; +} + static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) { struct nfs4_lockdata *data; @@ -4126,6 +4158,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; + if (ret) + nfs4_handle_setlk_error(data->server, data->lsp, + data->arg.new_lock_owner, ret); } else data->cancelled = 1; rpc_put_task(task); @@ -4181,8 +4216,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock { struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; - int status; + int status = -ENOLCK; + if ((fl_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + goto out; /* Is this a delegated open? */ status = nfs4_set_lock_state(state, request); if (status != 0) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6d263ed..c1e2733 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -901,7 +901,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } -static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) +int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e297593..a12c45b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req) kref_put(&req->wb_kref, nfs_free_request); } +static int nfs_wait_bit_uninterruptible(void *word) +{ + io_schedule(); + return 0; +} + /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. @@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req) int nfs_wait_on_request(struct nfs_page *req) { - int ret = 0; - - if (!test_bit(PG_BUSY, &req->wb_flags)) - goto out; - ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, - nfs_wait_bit_killable, TASK_KILLABLE); -out: - return ret; + return wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_uninterruptible, + TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ce907ef..f1afee4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -243,6 +243,7 @@ static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs_put_super(struct super_block *); static void nfs_kill_super(struct super_block *); static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); @@ -266,6 +267,7 @@ static const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -335,6 +337,7 @@ static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, @@ -2258,6 +2261,17 @@ error_splat_super: } /* + * Ensure that we unregister the bdi before kill_anon_super + * releases the device name + */ +static void nfs_put_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + bdi_unregister(&server->backing_dev_info); +} + +/* * Destroy an NFS2/3 superblock */ static void nfs_kill_super(struct super_block *s) @@ -2265,7 +2279,6 @@ static void nfs_kill_super(struct super_block *s) struct nfs_server *server = NFS_SB(s); kill_anon_super(s); - bdi_unregister(&server->backing_dev_info); nfs_fscache_release_super_cookie(s); nfs_free_server(server); } diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 70e1fbb..ad4d2e7 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -15,8 +15,10 @@ #include "callback.h" +#ifdef CONFIG_NFS_V4 static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; +#endif static struct ctl_table_header *nfs_callback_sysctl_table; static ctl_table nfs_cb_sysctls[] = { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d171696..7b54b8b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1233,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -void nfs_commitdata_release(void *data) +static void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; @@ -1541,6 +1541,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) break; } ret = nfs_wait_on_request(req); + nfs_release_request(req); if (ret < 0) goto out; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 17584c5..105b508 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2829,7 +2829,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); - if (flag || nilfs_segctor_confirm(sci)) + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); WARN_ON(!list_empty(&sci->sc_copied_buffers)); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 83ac4d3..ba98546 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2913,7 +2913,9 @@ int journal_init(struct super_block *sb, const char *j_dev_name, journal->j_mount_id = 10; journal->j_state = 0; atomic_set(&(journal->j_jlock), 0); + reiserfs_write_unlock(sb); journal->j_cnode_free_list = allocate_cnodes(num_cnodes); + reiserfs_write_lock(sb); journal->j_cnode_free_orig = journal->j_cnode_free_list; journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; journal->j_cnode_used = 0; diff --git a/fs/romfs/super.c b/fs/romfs/super.c index c117fa8..42d2135 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -544,6 +544,7 @@ error: error_rsb_inval: ret = -EINVAL; error_rsb: + kfree(rsb); return ret; } |