diff options
Diffstat (limited to 'fs')
43 files changed, 677 insertions, 278 deletions
@@ -325,8 +325,8 @@ config FS_POSIX_ACL source "fs/xfs/Kconfig" config OCFS2_FS - tristate "OCFS2 file system support (EXPERIMENTAL)" - depends on NET && SYSFS && EXPERIMENTAL + tristate "OCFS2 file system support" + depends on NET && SYSFS select CONFIGFS_FS select JBD select CRC32 diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index df02545..816e8ef 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare return sd; } +/* + * + * Return -EEXIST if there is already a configfs element with the same + * name for the same parent. + * + * called with parent inode's i_mutex held + */ +int configfs_dirent_exists(struct configfs_dirent *parent_sd, + const unsigned char *new) +{ + struct configfs_dirent * sd; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (sd->s_element) { + const unsigned char *existing = configfs_get_name(sd); + if (strcmp(existing, new)) + continue; + else + return -EEXIST; + } + } + + return 0; +} + + int configfs_make_dirent(struct configfs_dirent * parent_sd, struct dentry * dentry, void * element, umode_t mode, int type) @@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p, int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; - error = configfs_make_dirent(p->d_fsdata, d, k, mode, - CONFIGFS_DIR); + error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); + if (!error) + error = configfs_make_dirent(p->d_fsdata, d, k, mode, + CONFIGFS_DIR); if (!error) { error = configfs_create(d, mode, init_dir); if (!error) { diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index d487043..b1981d0 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -539,7 +539,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) #endif /* EXT2FS_DEBUG */ -/* Superblock must be locked */ unsigned long ext2_count_free_blocks (struct super_block * sb) { struct ext2_group_desc * desc; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index de85c61..695f69c 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -637,7 +637,6 @@ fail: return ERR_PTR(err); } -/* Superblock must be locked */ unsigned long ext2_count_free_inodes (struct super_block * sb) { struct ext2_group_desc *desc; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 681dea8..4286ff6 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -251,6 +251,44 @@ static struct super_operations ext2_sops = { #endif }; +static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) +{ + __u32 *objp = vobjp; + unsigned long ino = objp[0]; + __u32 generation = objp[1]; + struct inode *inode; + struct dentry *result; + + if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) + return ERR_PTR(-ESTALE); + if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) + return ERR_PTR(-ESTALE); + + /* iget isn't really right if the inode is currently unallocated!! + * ext2_read_inode currently does appropriate checks, but + * it might be "neater" to call ext2_get_inode first and check + * if the inode is valid..... + */ + inode = iget(sb, ino); + if (inode == NULL) + return ERR_PTR(-ENOMEM); + if (is_bad_inode(inode) || + (generation && inode->i_generation != generation)) { + /* we didn't find the right inode.. */ + iput(inode); + return ERR_PTR(-ESTALE); + } + /* now to find a dentry. + * If possible, get a well-connected one + */ + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + /* Yes, most of these are left as NULL!! * A NULL value implies the default, which works with ext2-like file * systems, but can be improved upon. @@ -258,6 +296,7 @@ static struct super_operations ext2_sops = { */ static struct export_operations ext2_export_ops = { .get_parent = ext2_get_parent, + .get_dentry = ext2_get_dentry, }; static unsigned long get_sb_block(void **data) @@ -1044,7 +1083,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) unsigned long overhead; int i; - lock_super(sb); if (test_opt (sb, MINIX_DF)) overhead = 0; else { @@ -1085,7 +1123,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); buf->f_ffree = ext2_count_free_inodes (sb); buf->f_namelen = EXT2_NAME_LEN; - unlock_super(sb); return 0; } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c5ee9f0..84be02e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -925,7 +925,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, set_buffer_new(bh_result); got_it: map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); - if (blocks_to_boundary == 0) + if (count > blocks_to_boundary) set_buffer_boundary(bh_result); err = count; /* Clean up and exit */ @@ -1009,11 +1009,14 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, buffer_trace_init(&dummy.b_history); err = ext3_get_blocks_handle(handle, inode, block, 1, &dummy, create, 1); - if (err == 1) { + /* + * ext3_get_blocks_handle() returns number of blocks + * mapped. 0 in case of a HOLE. + */ + if (err > 0) { + if (err > 1) + WARN_ON(1); err = 0; - } else if (err >= 0) { - WARN_ON(1); - err = -EIO; } *errp = err; if (!err && buffer_mapped(&dummy)) { diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 813d589..3559086 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -554,6 +554,47 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } + +static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) +{ + __u32 *objp = vobjp; + unsigned long ino = objp[0]; + __u32 generation = objp[1]; + struct inode *inode; + struct dentry *result; + + if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) + return ERR_PTR(-ESTALE); + if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) + return ERR_PTR(-ESTALE); + + /* iget isn't really right if the inode is currently unallocated!! + * + * ext3_read_inode will return a bad_inode if the inode had been + * deleted, so we should be safe. + * + * Currently we don't know the generation for parent directory, so + * a generation of 0 means "accept any" + */ + inode = iget(sb, ino); + if (inode == NULL) + return ERR_PTR(-ENOMEM); + if (is_bad_inode(inode) || + (generation && inode->i_generation != generation)) { + iput(inode); + return ERR_PTR(-ESTALE); + } + /* now to find a dentry. + * If possible, get a well-connected one + */ + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + #ifdef CONFIG_QUOTA #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) @@ -622,6 +663,7 @@ static struct super_operations ext3_sops = { static struct export_operations ext3_export_ops = { .get_parent = ext3_get_parent, + .get_dentry = ext3_get_dentry, }; enum { diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 2e0cc8e..3a56607 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h @@ -41,11 +41,7 @@ struct jffs2_inode_info { uint16_t flags; uint8_t usercompr; -#if !defined (__ECOS) -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2) struct inode vfs_inode; -#endif -#endif #ifdef CONFIG_JFFS2_FS_POSIX_ACL struct posix_acl *i_acl_access; struct posix_acl *i_acl_default; diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index 7675b33..5a6b4d6 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -21,6 +21,9 @@ #include <linux/pagemap.h> #include "nodelist.h" +static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, + struct jffs2_node_frag *this); + void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list) { struct jffs2_full_dirent **prev = list; @@ -87,7 +90,8 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint } } -void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this) +static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, + struct jffs2_node_frag *this) { if (this->node) { this->node->frags--; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index cae92c1..0ddfd70 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -334,7 +334,6 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c_delete); struct rb_node *rb_next(struct rb_node *); struct rb_node *rb_prev(struct rb_node *); void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); -void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this); int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn); void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size); int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn); diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index c19bd47..e52cef5 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -252,6 +252,11 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs, union jffs2_node_union *node; struct jffs2_eraseblock *jeb; + if (c->summary->sum_size == JFFS2_SUMMARY_NOSUM_SIZE) { + dbg_summary("Summary is disabled for this jeb! Skipping summary info!\n"); + return 0; + } + node = invecs[0].iov_base; jeb = &c->blocks[ofs / c->sector_size]; ofs -= jeb->offset; diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 25bc1ae..4da09ce 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1215,7 +1215,6 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE); if (rc) { JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen); - rc = rc ? rc : -EBADFD; goto out; } rc = save_xattr_datum(c, xd); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fecd3b09..76ca1cb 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } -/* - * "size" is never larger than rsize or wsize. - */ -static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) -{ - int page_count; - - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; - page_count -= user_addr >> PAGE_SHIFT; - BUG_ON(page_count < 0); - - return page_count; -} - -static inline unsigned int nfs_max_pages(unsigned int size) -{ - return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} - /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int rpages = nfs_max_pages(rsize); unsigned int pgbase; int result; ssize_t started = 0; get_dreq(dreq); - pgbase = user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; size_t bytes; + pgbase = user_addr & ~PAGE_MASK; + bytes = min(rsize,count); + result = -ENOMEM; - data = nfs_readdata_alloc(rpages); + data = nfs_readdata_alloc(pgbase + bytes); if (unlikely(!data)) break; - bytes = rsize; - if (count < rsize) - bytes = count; - - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 1, 0, data->pagevec, NULL); @@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo started += bytes; user_addr += bytes; pos += bytes; + /* FIXME: Remove this unnecessary math from final patch */ pgbase += bytes; pgbase &= ~PAGE_MASK; + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); count -= bytes; } while (count != 0); @@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) { - dreq->commit_data = nfs_commit_alloc(0); + dreq->commit_data = nfs_commit_alloc(); if (dreq->commit_data != NULL) dreq->commit_data->req = (struct nfs_page *) dreq; } @@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int wpages = nfs_max_pages(wsize); unsigned int pgbase; int result; ssize_t started = 0; get_dreq(dreq); - pgbase = user_addr & ~PAGE_MASK; do { struct nfs_write_data *data; size_t bytes; + pgbase = user_addr & ~PAGE_MASK; + bytes = min(wsize,count); + result = -ENOMEM; - data = nfs_writedata_alloc(wpages); + data = nfs_writedata_alloc(pgbase + bytes); if (unlikely(!data)) break; - bytes = wsize; - if (count < wsize) - bytes = count; - - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 0, 0, data->pagevec, NULL); @@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l started += bytes; user_addr += bytes; pos += bytes; + + /* FIXME: Remove this useless math from the final patch */ pgbase += bytes; pgbase &= ~PAGE_MASK; + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); count -= bytes; } while (count != 0); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 153898e..b14145b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -970,7 +970,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st status = -ENOMEM; opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); if (opendata == NULL) - goto err_put_state_owner; + goto err_release_rwsem; status = _nfs4_proc_open(opendata); if (status != 0) @@ -989,11 +989,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st return 0; err_opendata_free: nfs4_opendata_free(opendata); +err_release_rwsem: + up_read(&clp->cl_sem); err_put_state_owner: nfs4_put_state_owner(sp); out_err: - /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ - up_read(&clp->cl_sem); *res = NULL; return status; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index da9cf11..f0aff82 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +struct nfs_read_data *nfs_readdata_alloc(size_t len) { + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, int result; struct nfs_read_data *rdata; - rdata = nfs_readdata_alloc(1); + rdata = nfs_readdata_alloc(count); if (!rdata) return -ENOMEM; @@ -202,9 +204,11 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); - nfs_readpage_truncate_uninitialised_page(rdata); - if (rdata->res.eof || rdata->res.count == rdata->args.count) + if (rdata->res.eof || rdata->res.count == rdata->args.count) { SetPageUptodate(page); + if (rdata->res.eof && count != 0) + memclear_highpage_flush(page, rdata->args.pgbase, count); + } result = 0; io_error: @@ -336,25 +340,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; struct nfs_read_data *data; - unsigned int rsize = NFS_SERVER(inode)->rsize; - unsigned int nbytes, offset; + size_t rsize = NFS_SERVER(inode)->rsize, nbytes; + unsigned int offset; int requests = 0; LIST_HEAD(list); nfs_list_remove_request(req); nbytes = req->wb_bytes; - for(;;) { - data = nfs_readdata_alloc(1); + do { + size_t len = min(nbytes,rsize); + + data = nfs_readdata_alloc(len); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); list_add(&data->pages, &list); requests++; - if (nbytes <= rsize) - break; - nbytes -= rsize; - } + nbytes -= len; + } while(nbytes != 0); atomic_set(&req->wb_complete, requests); ClearPageError(page); @@ -402,7 +406,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) return nfs_pagein_multi(head, inode); - data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); + data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); if (!data) goto out_bad; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5077499..7084ac9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) +struct nfs_write_data *nfs_commit_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; - else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); - if (!p->pagevec) { - mempool_free(p, nfs_commit_mempool); - p = NULL; - } - } } return p; } @@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p) mempool_free(p, nfs_commit_mempool); } -struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +struct nfs_write_data *nfs_writedata_alloc(size_t len) { + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, int result, written = 0; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(1); + wdata = nfs_writedata_alloc(wsize); if (!wdata) return -ENOMEM; @@ -597,8 +590,8 @@ static void nfs_cancel_commit_list(struct list_head *head) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_inode_remove_request(req); - nfs_clear_page_writeback(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + nfs_clear_page_writeback(req); } } @@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; struct nfs_write_data *data; - unsigned int wsize = NFS_SERVER(inode)->wsize; - unsigned int nbytes, offset; + size_t wsize = NFS_SERVER(inode)->wsize, nbytes; + unsigned int offset; int requests = 0; LIST_HEAD(list); nfs_list_remove_request(req); nbytes = req->wb_bytes; - for (;;) { - data = nfs_writedata_alloc(1); + do { + size_t len = min(nbytes, wsize); + + data = nfs_writedata_alloc(len); if (!data) goto out_bad; list_add(&data->pages, &list); requests++; - if (nbytes <= wsize) - break; - nbytes -= wsize; - } + nbytes -= len; + } while (nbytes != 0); atomic_set(&req->wb_complete, requests); ClearPageError(page); @@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) struct nfs_write_data *data; unsigned int count; - data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); + data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); if (!data) goto out_bad; @@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) struct nfs_write_data *data; struct nfs_page *req; - data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); + data = nfs_commit_alloc(); if (!data) goto out_bad; @@ -1393,8 +1386,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); - nfs_clear_page_writeback(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + nfs_clear_page_writeback(req); } return -ENOMEM; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 06da750..e35d7e5 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -33,7 +33,7 @@ * */ - +#include <linux/err.h> #include <linux/sunrpc/svc.h> #include <linux/nfsd/nfsd.h> #include <linux/nfs4.h> @@ -87,34 +87,35 @@ int nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) { struct xdr_netobj cksum; - struct crypto_tfm *tfm; + struct hash_desc desc; struct scatterlist sg[1]; int status = nfserr_resource; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); - tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); - if (tfm == NULL) - goto out; - cksum.len = crypto_tfm_alg_digestsize(tfm); + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) + goto out_no_tfm; + cksum.len = crypto_hash_digestsize(desc.tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) goto out; - crypto_digest_init(tfm); sg[0].page = virt_to_page(clname->data); sg[0].offset = offset_in_page(clname->data); sg[0].length = clname->len; - crypto_digest_update(tfm, sg, 1); - crypto_digest_final(tfm, cksum.data); + if (crypto_hash_digest(&desc, sg, sg->length, cksum.data)) + goto out; md5_to_hex(dname, cksum.data); kfree(cksum.data); status = nfs_ok; out: - crypto_free_tfm(tfm); + crypto_free_hash(desc.tfm); +out_no_tfm: return status; } diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 7d3be84..9fb8132f 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -16,6 +16,7 @@ ocfs2-objs := \ file.o \ heartbeat.o \ inode.o \ + ioctl.o \ journal.o \ localalloc.o \ mmap.o \ diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index edaab05..f43bc5f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, ocfs2_remove_from_cache(inode, eb_bh); - BUG_ON(eb->h_suballoc_slot); BUG_ON(el->l_recs[0].e_clusters); BUG_ON(el->l_recs[0].e_cpos); BUG_ON(el->l_recs[0].e_blkno); - status = ocfs2_free_extent_block(handle, - tc->tc_ext_alloc_inode, - tc->tc_ext_alloc_bh, - eb); - if (status < 0) { - mlog_errno(status); - goto bail; + if (eb->h_suballoc_slot == 0) { + /* + * This code only understands how to + * lock the suballocator in slot 0, + * which is fine because allocation is + * only ever done out of that + * suballocator too. A future version + * might change that however, so avoid + * a free if we don't know how to + * handle it. This way an fs incompat + * bit will not be necessary. + */ + status = ocfs2_free_extent_block(handle, + tc->tc_ext_alloc_inode, + tc->tc_ext_alloc_bh, + eb); + if (status < 0) { + mlog_errno(status); + goto bail; + } } } brelse(eb_bh); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f1d1c34..3d7c082 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -391,31 +391,28 @@ out: static int ocfs2_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - int ret, extending = 0, locklevel = 0; - loff_t new_i_size; + int ret; struct buffer_head *di_bh = NULL; struct inode *inode = page->mapping->host; struct ocfs2_journal_handle *handle = NULL; + struct ocfs2_dinode *di; mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); /* NOTE: ocfs2_file_aio_write has ensured that it's safe for - * us to sample inode->i_size here without the metadata lock: + * us to continue here without rechecking the I/O against + * changed inode values. * * 1) We're currently holding the inode alloc lock, so no * nodes can change it underneath us. * * 2) We've had to take the metadata lock at least once - * already to check for extending writes, hence insuring - * that our current copy is also up to date. + * already to check for extending writes, suid removal, etc. + * The meta data update code then ensures that we don't get a + * stale inode allocation image (i_size, i_clusters, etc). */ - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - if (new_i_size > i_size_read(inode)) { - extending = 1; - locklevel = 1; - } - ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page); + ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page); if (ret != 0) { mlog_errno(ret); goto out; @@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page, goto out_unlock_meta; } - if (extending) { - handle = ocfs2_start_walk_page_trans(inode, page, from, to); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - goto out_unlock_data; - } + handle = ocfs2_start_walk_page_trans(inode, page, from, to); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock_data; + } - /* Mark our buffer early. We'd rather catch this error up here - * as opposed to after a successful commit_write which would - * require us to set back inode->i_size. */ - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out_commit; - } + /* Mark our buffer early. We'd rather catch this error up here + * as opposed to after a successful commit_write which would + * require us to set back inode->i_size. */ + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out_commit; } /* might update i_size */ @@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page, goto out_commit; } - if (extending) { - loff_t size = (u64) i_size_read(inode); - struct ocfs2_dinode *di = - (struct ocfs2_dinode *)di_bh->b_data; + di = (struct ocfs2_dinode *)di_bh->b_data; - /* ocfs2_mark_inode_dirty is too heavy to use here. */ - inode->i_blocks = ocfs2_align_bytes_to_sectors(size); - inode->i_ctime = inode->i_mtime = CURRENT_TIME; + /* ocfs2_mark_inode_dirty() is too heavy to use here. */ + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); + di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - di->i_size = cpu_to_le64(size); - di->i_ctime = di->i_mtime = - cpu_to_le64(inode->i_mtime.tv_sec); - di->i_ctime_nsec = di->i_mtime_nsec = - cpu_to_le32(inode->i_mtime.tv_nsec); + inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode))); + di->i_size = cpu_to_le64((u64)i_size_read(inode)); - ret = ocfs2_journal_dirty(handle, di_bh); - if (ret < 0) { - mlog_errno(ret); - goto out_commit; - } + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret < 0) { + mlog_errno(ret); + goto out_commit; } - BUG_ON(extending && (i_size_read(inode) != new_i_size)); - out_commit: - if (handle) - ocfs2_commit_trans(handle); + ocfs2_commit_trans(handle); out_unlock_data: ocfs2_data_unlock(inode, 1); out_unlock_meta: - ocfs2_meta_unlock(inode, locklevel); + ocfs2_meta_unlock(inode, 1); out: if (di_bh) brelse(di_bh); diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 9a24adf..c903741 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", (unsigned long long)block, nr, flags, inode); + BUG_ON((flags & OCFS2_BH_READAHEAD) && + (!inode || !(flags & OCFS2_BH_CACHED))); + if (osb == NULL || osb->sb == NULL || bhs == NULL) { status = -EINVAL; mlog_errno(status); @@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, bh = bhs[i]; ignore_cache = 0; + /* There are three read-ahead cases here which we need to + * be concerned with. All three assume a buffer has + * previously been submitted with OCFS2_BH_READAHEAD + * and it hasn't yet completed I/O. + * + * 1) The current request is sync to disk. This rarely + * happens these days, and never when performance + * matters - the code can just wait on the buffer + * lock and re-submit. + * + * 2) The current request is cached, but not + * readahead. ocfs2_buffer_uptodate() will return + * false anyway, so we'll wind up waiting on the + * buffer lock to do I/O. We re-check the request + * with after getting the lock to avoid a re-submit. + * + * 3) The current request is readahead (and so must + * also be a caching one). We short circuit if the + * buffer is locked (under I/O) and if it's in the + * uptodate cache. The re-check from #2 catches the + * case that the previous read-ahead completes just + * before our is-it-in-flight check. + */ + if (flags & OCFS2_BH_CACHED && !ocfs2_buffer_uptodate(inode, bh)) { mlog(ML_UPTODATE, @@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, continue; } + /* A read-ahead request was made - if the + * buffer is already under read-ahead from a + * previously submitted request than we are + * done here. */ + if ((flags & OCFS2_BH_READAHEAD) + && ocfs2_buffer_read_ahead(inode, bh)) + continue; + lock_buffer(bh); if (buffer_jbd(bh)) { #ifdef CATCH_BH_JBD_RACES @@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, continue; #endif } + + /* Re-check ocfs2_buffer_uptodate() as a + * previously read-ahead buffer may have + * completed I/O while we were waiting for the + * buffer lock. */ + if ((flags & OCFS2_BH_CACHED) + && !(flags & OCFS2_BH_READAHEAD) + && ocfs2_buffer_uptodate(inode, bh)) { + unlock_buffer(bh); + continue; + } + clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; - if (flags & OCFS2_BH_READAHEAD) - submit_bh(READA, bh); - else - submit_bh(READ, bh); + submit_bh(READ, bh); continue; } } @@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, for (i = (nr - 1); i >= 0; i--) { bh = bhs[i]; - /* We know this can't have changed as we hold the - * inode sem. Avoid doing any work on the bh if the - * journal has it. */ - if (!buffer_jbd(bh)) - wait_on_buffer(bh); - - if (!buffer_uptodate(bh)) { - /* Status won't be cleared from here on out, - * so we can safely record this and loop back - * to cleanup the other buffers. Don't need to - * remove the clustered uptodate information - * for this bh as it's not marked locally - * uptodate. */ - status = -EIO; - brelse(bh); - bhs[i] = NULL; - continue; + if (!(flags & OCFS2_BH_READAHEAD)) { + /* We know this can't have changed as we hold the + * inode sem. Avoid doing any work on the bh if the + * journal has it. */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) { + /* Status won't be cleared from here on out, + * so we can safely record this and loop back + * to cleanup the other buffers. Don't need to + * remove the clustered uptodate information + * for this bh as it's not marked locally + * uptodate. */ + status = -EIO; + brelse(bh); + bhs[i] = NULL; + continue; + } } + /* Always set the buffer in the cache, even if it was + * a forced read, or read-ahead which hasn't yet + * completed. */ if (inode) ocfs2_set_buffer_uptodate(inode, bh); } if (inode) mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); - mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", + mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", (unsigned long long)block, nr, - (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); + (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); bail: diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 6ecb909..6cc2093 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h @@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, #define OCFS2_BH_CACHED 1 -#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */ +#define OCFS2_BH_READAHEAD 8 static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, struct buffer_head **bh, int flags, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 504595d..305cba3 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev) max_pages = q->max_hw_segments; max_pages--; /* Handle I/Os that straddle a page */ - max_sectors = max_pages << (PAGE_SHIFT - 9); - + if (max_pages) { + max_sectors = max_pages << (PAGE_SHIFT - 9); + } else { + /* If BIO contains 1 or less than 1 page. */ + max_sectors = q->max_sectors; + } /* Why is fls() 1-based???? */ pow_two_sectors = 1 << (fls(max_sectors) - 1); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 3d494d1..04e0191 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; - unsigned long offset, blk; - int i, num, stored; + unsigned long offset, blk, last_ra_blk = 0; + int i, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; int err; struct inode *inode = filp->f_dentry->d_inode; struct super_block * sb = inode->i_sb; - int have_disk_lock = 0; + unsigned int ra_sectors = 16; mlog_entry("dirino=%llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) mlog_errno(error); /* we haven't got any yet, so propagate the error. */ stored = error; - goto bail; + goto bail_nolock; } - have_disk_lock = 1; offset = filp->f_pos & (sb->s_blocksize - 1); @@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) continue; } - /* - * Do the readahead (8k) - */ - if (!offset) { - for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0; + /* The idea here is to begin with 8k read-ahead and to stay + * 4k ahead of our current position. + * + * TODO: Use the pagecache for this. We just need to + * make sure it's cluster-safe... */ + if (!last_ra_blk + || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { + for (i = ra_sectors >> (sb->s_blocksize_bits - 9); i > 0; i--) { tmp = ocfs2_bread(inode, ++blk, &err, 1); if (tmp) brelse(tmp); } + last_ra_blk = blk; + ra_sectors = 8; } revalidate: @@ -194,9 +198,9 @@ revalidate: stored = 0; bail: - if (have_disk_lock) - ocfs2_meta_unlock(inode, 0); + ocfs2_meta_unlock(inode, 0); +bail_nolock: mlog_exit(stored); return stored; diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 42775e2..f13a4ba 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) goto do_ast; } - mlog(ML_ERROR, "got %sast for unknown lock! cookie=%u:%llu, " - "name=%.*s, namelen=%u\n", - past->type == DLM_AST ? "" : "b", - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name, locklen); + mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " + "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", + dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie), + locklen, name, locklen); ret = DLM_NORMAL; unlock_out: diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 762eb1f..151b417 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1330,6 +1330,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); + lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); mlog_meta_lvb(0, lockres); @@ -1360,6 +1361,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); + oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); + ocfs2_set_inode_flags(inode); + /* fast-symlinks are a special case */ if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) inode->i_blocks = 0; @@ -2899,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level, be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode)); mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " - "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink), + "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), (long long)be64_to_cpu(lvb->lvb_iatime_packed), (long long)be64_to_cpu(lvb->lvb_ictime_packed), - (long long)be64_to_cpu(lvb->lvb_imtime_packed)); + (long long)be64_to_cpu(lvb->lvb_imtime_packed), + be32_to_cpu(lvb->lvb_iattr)); } diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 8f2d1db..243ae86 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -27,7 +27,7 @@ #ifndef DLMGLUE_H #define DLMGLUE_H -#define OCFS2_LVB_VERSION 2 +#define OCFS2_LVB_VERSION 3 struct ocfs2_meta_lvb { __be32 lvb_version; @@ -40,7 +40,8 @@ struct ocfs2_meta_lvb { __be64 lvb_isize; __be16 lvb_imode; __be16 lvb_inlink; - __be32 lvb_reserved[3]; + __be32 lvb_iattr; + __be32 lvb_reserved[2]; }; /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a9559c8..2bbfa17 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -44,6 +44,7 @@ #include "file.h" #include "sysfile.h" #include "inode.h" +#include "ioctl.h" #include "journal.h" #include "mmap.h" #include "suballoc.h" @@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = { .open = ocfs2_file_open, .aio_read = ocfs2_file_aio_read, .aio_write = ocfs2_file_aio_write, + .ioctl = ocfs2_ioctl, }; const struct file_operations ocfs2_dops = { .read = generic_read_dir, .readdir = ocfs2_readdir, .fsync = ocfs2_sync_file, + .ioctl = ocfs2_ioctl, }; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 327a5b7..7bcf691 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -71,6 +71,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, struct inode *inode, struct buffer_head *fe_bh); +void ocfs2_set_inode_flags(struct inode *inode) +{ + unsigned int flags = OCFS2_I(inode)->ip_attr; + + inode->i_flags &= ~(S_IMMUTABLE | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & OCFS2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + + if (flags & OCFS2_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & OCFS2_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & OCFS2_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & OCFS2_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, u64 blkno, int delete_vote) @@ -260,7 +280,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_blocks = ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size)); inode->i_mapping->a_ops = &ocfs2_aops; - inode->i_flags |= S_NOATIME; inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); @@ -276,6 +295,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; + OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); if (create_ino) inode->i_ino = ino_from_blkno(inode->i_sb, @@ -330,6 +350,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, OCFS2_LOCK_TYPE_DATA, inode); + ocfs2_set_inode_flags(inode); + inode->i_flags |= S_NOATIME; + status = 0; bail: mlog_exit(status); @@ -1027,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode, u64 p_blkno; int readflags = OCFS2_BH_CACHED; -#if 0 - /* only turn this on if we know we can deal with read_block - * returning nothing */ if (reada) readflags |= OCFS2_BH_READAHEAD; -#endif if (((u64)block << inode->i_sb->s_blocksize_bits) >= i_size_read(inode)) { @@ -1131,6 +1150,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle, spin_lock(&OCFS2_I(inode)->ip_lock); fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); + fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); spin_unlock(&OCFS2_I(inode)->ip_lock); fe->i_size = cpu_to_le64(i_size_read(inode)); @@ -1169,6 +1189,8 @@ void ocfs2_refresh_inode(struct inode *inode, spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); + OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); + ocfs2_set_inode_flags(inode); i_size_write(inode, le64_to_cpu(fe->i_size)); inode->i_nlink = le16_to_cpu(fe->i_links_count); inode->i_uid = le32_to_cpu(fe->i_uid); diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 35140f6..4d1e539 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -56,6 +56,7 @@ struct ocfs2_inode_info struct ocfs2_journal_handle *ip_handle; u32 ip_flags; /* see below */ + u32 ip_attr; /* inode attributes */ /* protected by recovery_lock. */ struct inode *ip_next_orphan; @@ -142,4 +143,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle, int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); +void ocfs2_set_inode_flags(struct inode *inode); + #endif /* OCFS2_INODE_H */ diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c new file mode 100644 index 0000000..3663cef --- /dev/null +++ b/fs/ocfs2/ioctl.c @@ -0,0 +1,136 @@ +/* + * linux/fs/ocfs2/ioctl.c + * + * Copyright (C) 2006 Herbert Poetzl + * adapted from Remy Card's ext2/ioctl.c + */ + +#include <linux/fs.h> +#include <linux/mount.h> + +#define MLOG_MASK_PREFIX ML_INODE +#include <cluster/masklog.h> + +#include "ocfs2.h" +#include "alloc.h" +#include "dlmglue.h" +#include "inode.h" +#include "journal.h" + +#include "ocfs2_fs.h" +#include "ioctl.h" + +#include <linux/ext2_fs.h> + +static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) +{ + int status; + + status = ocfs2_meta_lock(inode, NULL, NULL, 0); + if (status < 0) { + mlog_errno(status); + return status; + } + *flags = OCFS2_I(inode)->ip_attr; + ocfs2_meta_unlock(inode, 0); + + mlog_exit(status); + return status; +} + +static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, + unsigned mask) +{ + struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_journal_handle *handle = NULL; + struct buffer_head *bh = NULL; + unsigned oldflags; + int status; + + mutex_lock(&inode->i_mutex); + + status = ocfs2_meta_lock(inode, NULL, &bh, 1); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + status = -EROFS; + if (IS_RDONLY(inode)) + goto bail_unlock; + + status = -EACCES; + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + goto bail_unlock; + + if (!S_ISDIR(inode->i_mode)) + flags &= ~OCFS2_DIRSYNC_FL; + + handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail_unlock; + } + + oldflags = ocfs2_inode->ip_attr; + flags = flags & mask; + flags |= oldflags & ~mask; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + */ + status = -EPERM; + if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & + (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { + if (!capable(CAP_LINUX_IMMUTABLE)) + goto bail_unlock; + } + + ocfs2_inode->ip_attr = flags; + ocfs2_set_inode_flags(inode); + + status = ocfs2_mark_inode_dirty(handle, inode, bh); + if (status < 0) + mlog_errno(status); + + ocfs2_commit_trans(handle); +bail_unlock: + ocfs2_meta_unlock(inode, 1); +bail: + mutex_unlock(&inode->i_mutex); + + if (bh) + brelse(bh); + + mlog_exit(status); + return status; +} + +int ocfs2_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + unsigned int flags; + int status; + + switch (cmd) { + case OCFS2_IOC_GETFLAGS: + status = ocfs2_get_inode_attr(inode, &flags); + if (status < 0) + return status; + + flags &= OCFS2_FL_VISIBLE; + return put_user(flags, (int __user *) arg); + case OCFS2_IOC_SETFLAGS: + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + return ocfs2_set_inode_attr(inode, flags, + OCFS2_FL_MODIFIABLE); + default: + return -ENOTTY; + } +} + diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h new file mode 100644 index 0000000..4a7c829 --- /dev/null +++ b/fs/ocfs2/ioctl.h @@ -0,0 +1,16 @@ +/* + * ioctl.h + * + * Function prototypes + * + * Copyright (C) 2006 Herbert Poetzl + * + */ + +#ifndef OCFS2_IOCTL_H +#define OCFS2_IOCTL_H + +int ocfs2_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg); + +#endif /* OCFS2_IOCTL_H */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 0673862..0d3e939 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -56,6 +56,7 @@ #include "journal.h" #include "namei.h" #include "suballoc.h" +#include "super.h" #include "symlink.h" #include "sysfile.h" #include "uptodate.h" @@ -310,13 +311,6 @@ static int ocfs2_mknod(struct inode *dir, /* get our super block */ osb = OCFS2_SB(dir->i_sb); - if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) { - mlog(ML_ERROR, "inode %llu has i_nlink of %u\n", - (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink); - status = -EMLINK; - goto leave; - } - handle = ocfs2_alloc_handle(osb); if (handle == NULL) { status = -ENOMEM; @@ -331,6 +325,11 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } + if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) { + status = -EMLINK; + goto leave; + } + dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; if (!dirfe->i_links_count) { /* can't make a file in a deleted directory. */ @@ -643,11 +642,6 @@ static int ocfs2_link(struct dentry *old_dentry, goto bail; } - if (inode->i_nlink >= OCFS2_LINK_MAX) { - err = -EMLINK; - goto bail; - } - handle = ocfs2_alloc_handle(osb); if (handle == NULL) { err = -ENOMEM; @@ -661,6 +655,11 @@ static int ocfs2_link(struct dentry *old_dentry, goto bail; } + if (!dir->i_nlink) { + err = -ENOENT; + goto bail; + } + err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, dentry->d_name.len); if (err) @@ -1964,13 +1963,8 @@ restart: } num++; - /* XXX: questionable readahead stuff here */ bh = ocfs2_bread(dir, b++, &err, 1); bh_use[ra_max] = bh; -#if 0 // ??? - if (bh) - ll_rw_block(READ, 1, &bh); -#endif } } if ((bh = bh_use[ra_ptr++]) == NULL) @@ -1978,6 +1972,10 @@ restart: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ + ocfs2_error(dir->i_sb, "reading directory %llu, " + "offset %lu\n", + (unsigned long long)OCFS2_I(dir)->ip_blkno, + block); brelse(bh); goto next; } diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index c5b1ac5..3330a5d 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -114,6 +114,26 @@ #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ +/* Inode attributes, keep in sync with EXT2 */ +#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ +#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ +#define OCFS2_COMPR_FL (0x00000004) /* Compress file */ +#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ +#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ +#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ +#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ +#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ +#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ + +#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ +#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ + +/* + * ioctl commands + */ +#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) +#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) + /* * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) */ @@ -399,7 +419,9 @@ struct ocfs2_dinode { __le32 i_atime_nsec; __le32 i_ctime_nsec; __le32 i_mtime_nsec; -/*70*/ __le64 i_reserved1[9]; + __le32 i_attr; + __le32 i_reserved1; +/*70*/ __le64 i_reserved2[8]; /*B8*/ union { __le64 i_pad1; /* Generic way to refer to this 64bit union */ diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index b8a00a7..9707ed7 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi, } /* Warning: even if it returns true, this does *not* guarantee that - * the block is stored in our inode metadata cache. */ + * the block is stored in our inode metadata cache. + * + * This can be called under lock_buffer() + */ int ocfs2_buffer_uptodate(struct inode *inode, struct buffer_head *bh) { @@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode, return ocfs2_buffer_cached(OCFS2_I(inode), bh); } +/* + * Determine whether a buffer is currently out on a read-ahead request. + * ip_io_sem should be held to serialize submitters with the logic here. + */ +int ocfs2_buffer_read_ahead(struct inode *inode, + struct buffer_head *bh) +{ + return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh); +} + /* Requires ip_lock */ static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, sector_t block) @@ -403,7 +416,11 @@ out_free: * * Note that this function may actually fail to insert the block if * memory cannot be allocated. This is not fatal however (but may - * result in a performance penalty) */ + * result in a performance penalty) + * + * Readahead buffers can be passed in here before the I/O request is + * completed. + */ void ocfs2_set_buffer_uptodate(struct inode *inode, struct buffer_head *bh) { diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h index 01cd32d..2e73206 100644 --- a/fs/ocfs2/uptodate.h +++ b/fs/ocfs2/uptodate.h @@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode, struct buffer_head *bh); void ocfs2_remove_from_cache(struct inode *inode, struct buffer_head *bh); +int ocfs2_buffer_read_ahead(struct inode *inode, + struct buffer_head *bh); #endif /* OCFS2_UPTODATE_H */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c40f81b..34dcb43 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1390,11 +1390,19 @@ xfs_vm_direct_IO( iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); - ret = blockdev_direct_IO_own_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct); + if (rw == WRITE) { + ret = blockdev_direct_IO_own_locking(rw, iocb, inode, + iomap.iomap_target->bt_bdev, + iov, offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct); + } else { + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, + iomap.iomap_target->bt_bdev, + iov, offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct); + } if (unlikely(ret <= 0 && iocb->private)) xfs_destroy_ioend(iocb->private); diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 5d9cfd9..ee788b1 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -264,7 +264,9 @@ xfs_read( dmflags, &locktype); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); - goto unlock_mutex; + if (unlikely(ioflags & IO_ISDIRECT)) + mutex_unlock(&inode->i_mutex); + return ret; } } @@ -272,6 +274,9 @@ xfs_read( bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), -1, FI_REMAPF_LOCKED); + if (unlikely(ioflags & IO_ISDIRECT)) + mutex_unlock(&inode->i_mutex); + xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, (void *)iovp, segs, *offset, ioflags); ret = __generic_file_aio_read(iocb, iovp, segs, offset); @@ -281,10 +286,6 @@ xfs_read( XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); - -unlock_mutex: - if (unlikely(ioflags & IO_ISDIRECT)) - mutex_unlock(&inode->i_mutex); return ret; } @@ -390,6 +391,8 @@ xfs_splice_write( xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; ssize_t ret; + struct inode *inode = outfilp->f_mapping->host; + xfs_fsize_t isize; XFS_STATS_INC(xs_write_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -416,6 +419,20 @@ xfs_splice_write( if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) + *ppos = isize; + + if (*ppos > ip->i_d.di_size) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (*ppos > ip->i_d.di_size) { + ip->i_d.di_size = *ppos; + i_size_write(inode, *ppos); + ip->i_update_core = 1; + ip->i_update_size = 1; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index f137856..db8872b 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -203,7 +203,7 @@ xfs_qm_statvfs( if (error || !vnode) return error; - mp = XFS_BHVTOM(bhv); + mp = xfs_vfstom(bhvtovfs(bhv)); ip = xfs_vtoi(vnode); if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 650591f..5a42561 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -44,6 +44,26 @@ typedef enum xfs_alloctype #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ /* + * In order to avoid ENOSPC-related deadlock caused by + * out-of-order locking of AGF buffer (PV 947395), we place + * constraints on the relationship among actual allocations for + * data blocks, freelist blocks, and potential file data bmap + * btree blocks. However, these restrictions may result in no + * actual space allocated for a delayed extent, for example, a data + * block in a certain AG is allocated but there is no additional + * block for the additional bmap btree block due to a split of the + * bmap btree of the file. The result of this may lead to an + * infinite loop in xfssyncd when the file gets flushed to disk and + * all delayed extents need to be actually allocated. To get around + * this, we explicitly set aside a few blocks which will not be + * reserved in delayed allocation. Considering the minimum number of + * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap + * btree requires 1 fsb, so we set the number of set-aside blocks + * to 4 + 4*agcount. + */ +#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) + +/* * Argument structure for xfs_alloc routines. * This is turned into a structure to avoid having 20 arguments passed * down several levels of the stack. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 077629b..c064e72 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -462,7 +462,7 @@ xfs_fs_counts( xfs_icsb_sync_counters_lazy(mp); s = XFS_SB_LOCK(mp); - cnt->freedata = mp->m_sb.sb_fdblocks; + cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); cnt->freertx = mp->m_sb.sb_frextents; cnt->freeino = mp->m_sb.sb_ifree; cnt->allocino = mp->m_sb.sb_icount; @@ -519,15 +519,19 @@ xfs_reserve_blocks( } mp->m_resblks = request; } else { + __int64_t free; + + free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); delta = request - mp->m_resblks; - lcounter = mp->m_sb.sb_fdblocks - delta; + lcounter = free - delta; if (lcounter < 0) { /* We can't satisfy the request, just get what we can */ - mp->m_resblks += mp->m_sb.sb_fdblocks; - mp->m_resblks_avail += mp->m_sb.sb_fdblocks; - mp->m_sb.sb_fdblocks = 0; + mp->m_resblks += free; + mp->m_resblks_avail += free; + mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp); } else { - mp->m_sb.sb_fdblocks = lcounter; + mp->m_sb.sb_fdblocks = + lcounter + XFS_ALLOC_SET_ASIDE(mp); mp->m_resblks = request; mp->m_resblks_avail += delta; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 4be5c0b..9dfae18 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) xfs_trans_log_buf(tp, bp, first, last); } -/* - * In order to avoid ENOSPC-related deadlock caused by - * out-of-order locking of AGF buffer (PV 947395), we place - * constraints on the relationship among actual allocations for - * data blocks, freelist blocks, and potential file data bmap - * btree blocks. However, these restrictions may result in no - * actual space allocated for a delayed extent, for example, a data - * block in a certain AG is allocated but there is no additional - * block for the additional bmap btree block due to a split of the - * bmap btree of the file. The result of this may lead to an - * infinite loop in xfssyncd when the file gets flushed to disk and - * all delayed extents need to be actually allocated. To get around - * this, we explicitly set aside a few blocks which will not be - * reserved in delayed allocation. Considering the minimum number of - * needed freelist blocks is 4 fsbs, a potential split of file's bmap - * btree requires 1 fsb, so we set the number of set-aside blocks to 8. -*/ -#define SET_ASIDE_BLOCKS 8 /* * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply @@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, return 0; case XFS_SBS_FDBLOCKS: - lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; + lcounter = (long long) + mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); if (delta > 0) { /* Putting blocks back */ @@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, } } - mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; + mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); return 0; case XFS_SBS_FREXTENTS: lcounter = (long long)mp->m_sb.sb_frextents; @@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy( * when we get near ENOSPC. */ #define XFS_ICSB_INO_CNTR_REENABLE 64 -#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 +#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ + (512 + XFS_ALLOC_SET_ASIDE(mp)) STATIC void xfs_icsb_balance_counter( xfs_mount_t *mp, @@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter( case XFS_SBS_FDBLOCKS: count = mp->m_sb.sb_fdblocks; resid = do_div(count, weight); - if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) + if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp)) goto out; break; default: @@ -2110,11 +2094,11 @@ again: case XFS_SBS_FDBLOCKS: BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); - lcounter = icsbp->icsb_fdblocks; + lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); lcounter += delta; if (unlikely(lcounter < 0)) goto slow_path; - icsbp->icsb_fdblocks = lcounter; + icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); break; default: BUG(); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b427d22..a34796e 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -811,7 +811,8 @@ xfs_statvfs( statp->f_bsize = sbp->sb_blocksize; lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; statp->f_blocks = sbp->sb_dblocks - lsize; - statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks; + statp->f_bfree = statp->f_bavail = + sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); fakeinos = statp->f_bfree << sbp->sb_inopblog; #if XFS_BIG_INUMS fakeinos += mp->m_inoadd; |