diff options
Diffstat (limited to 'fs')
57 files changed, 634 insertions, 949 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9..722743b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) { struct address_space *mapping = vnode->vfs_inode.i_mapping; struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_cyclic = 1, diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index b6ab27c..811384b 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -68,11 +68,7 @@ * Here we can be a bit looser than the data sections since this * needs to only meet arch ABI requirements. */ -#ifdef ARCH_SLAB_MINALIGN -#define FLAT_STACK_ALIGN (ARCH_SLAB_MINALIGN) -#else -#define FLAT_STACK_ALIGN (sizeof(void *)) -#endif +#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN) #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c2..d74e6af 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, .sync_mode = wbc->sync_mode, .older_than_this = NULL, .nr_to_write = 64, @@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .sync_io = mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = inode->i_mapping->backing_dev_info, .sync_mode = mode, .older_than_this = NULL, .nr_to_write = nr_pages * 2, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 83d4d27..3fe4904 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, return -EAGAIN; } - op = le32_to_cpu(head->op); + op = le16_to_cpu(head->op); result = le32_to_cpu(head->result); dout("handle_reply op %d result %d\n", op, result); switch (op) { diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 619b616..74144d6 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) struct ceph_cap *cap = NULL; /* temporary, until we do something about cap import/export */ - if (!ctx) - return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); + if (!ctx) { + cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); + if (cap) { + caps_use_count++; + caps_total_count++; + } + return cap; + } spin_lock(&caps_list_lock); dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", @@ -2886,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; + int used, dirty; int ret = 0; - int used = 0; spin_lock(&inode->i_lock); used = __ceph_caps_used(ci); + dirty = __ceph_caps_dirty(ci); - dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, - mds, ceph_cap_string(used), ceph_cap_string(drop), + dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n", + inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop), ceph_cap_string(unless)); - /* only drop unused caps */ - drop &= ~used; + /* only drop unused, clean caps */ + drop &= ~(used | dirty); cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c index 9ba54ef..a4eec13 100644 --- a/fs/ceph/crush/mapper.c +++ b/fs/ceph/crush/mapper.c @@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, static int crush_bucket_choose(struct crush_bucket *in, int x, int r) { - dprintk("choose %d x=%d r=%d\n", in->id, x, r); + dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); switch (in->alg) { case CRUSH_BUCKET_UNIFORM: return bucket_uniform_choose((struct crush_bucket_uniform *)in, @@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) */ static int is_out(struct crush_map *map, __u32 *weight, int item, int x) { - if (weight[item] >= 0x1000) + if (weight[item] >= 0x10000) return 0; if (weight[item] == 0) return 1; @@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map, int itemtype; int collide, reject; const int orig_tries = 5; /* attempts before we fall back to search */ - dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos); + + dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", + bucket->id, x, outpos, numrep); for (rep = outpos; rep < numrep; rep++) { /* keep trying until we get a non-out, non-colliding item */ @@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map, BUG_ON(item >= 0 || (-1-item) >= map->max_buckets); in = map->buckets[-1-item]; + retry_bucket = 1; continue; } @@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map, } } - if (recurse_to_leaf && - item < 0 && - crush_choose(map, map->buckets[-1-item], - weight, - x, outpos+1, 0, - out2, outpos, - firstn, 0, NULL) <= outpos) { - reject = 1; - } else { + reject = 0; + if (recurse_to_leaf) { + if (item < 0) { + if (crush_choose(map, + map->buckets[-1-item], + weight, + x, outpos+1, 0, + out2, outpos, + firstn, 0, + NULL) <= outpos) + /* didn't get leaf */ + reject = 1; + } else { + /* we already have a leaf! */ + out2[outpos] = item; + } + } + + if (!reject) { /* out? */ if (itemtype == 0) reject = is_out(map, weight, @@ -424,12 +437,12 @@ reject: continue; } - dprintk("choose got %d\n", item); + dprintk("CHOOSE got %d\n", item); out[outpos] = item; outpos++; } - dprintk("choose returns %d\n", outpos); + dprintk("CHOOSE returns %d\n", outpos); return outpos; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3be33fb..f2f5332 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp) static int caps_show(struct seq_file *s, void *p) { - struct ceph_client *client = p; + struct ceph_client *client = s->private; int total, avail, used, reserved, min; ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ab47f46..8f9b9fe 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, d_drop(dn); realdn = d_materialise_unique(dn, in); if (IS_ERR(realdn)) { - pr_err("splice_dentry error %p inode %p ino %llx.%llx\n", - dn, in, ceph_vinop(in)); + pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", + PTR_ERR(realdn), dn, in, ceph_vinop(in)); if (prehash) *prehash = false; /* don't rehash on error */ dn = realdn; /* note realdn contains the error */ @@ -1234,18 +1234,23 @@ retry_lookup: goto out; } dn = splice_dentry(dn, in, NULL); + if (IS_ERR(dn)) + dn = NULL; } if (fill_inode(in, &rinfo->dir_in[i], NULL, session, req->r_request_started, -1, &req->r_caps_reservation) < 0) { pr_err("fill_inode badness on %p\n", in); - dput(dn); - continue; + goto next_item; } - update_dentry_lease(dn, rinfo->dir_dlease[i], - req->r_session, req->r_request_started); - dput(dn); + if (dn) + update_dentry_lease(dn, rinfo->dir_dlease[i], + req->r_session, + req->r_request_started); +next_item: + if (dn) + dput(dn); } req->r_did_prepopulate = true; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1766947..3ab79f6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2783,6 +2783,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) drop_leases(mdsc); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); + + /* + * wait for reply handlers to drop their request refs and + * their inode/dcache refs + */ + ceph_msgr_flush(); } /* diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 64b8b1f..9ad43a3 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -657,7 +657,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->connect_seq, global_seq, proto); - con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; + con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); con->out_connect.global_seq = cpu_to_le32(global_seq); @@ -1396,10 +1396,12 @@ static int read_partial_message(struct ceph_connection *con) if (!con->in_msg) { dout("got hdr type %d front %d data %d\n", con->in_hdr.type, con->in_hdr.front_len, con->in_hdr.data_len); + skip = 0; con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); + BUG_ON(con->in_msg); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 07a5399..cc115ea 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -725,7 +725,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc, dout("authenticated, starting session\n"); monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; - monc->client->msgr->inst.name.num = monc->auth->global_id; + monc->client->msgr->inst.name.num = + cpu_to_le64(monc->auth->global_id); __send_subscribe(monc); __resend_generic_request(monc); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index d25b4ad..92b7251 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) int type = le16_to_cpu(msg->hdr.type); if (!osd) - return; + goto out; osdc = osd->o_osdc; switch (type) { @@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) pr_err("received unknown message type %d %s\n", type, ceph_msg_type_name(type)); } +out: ceph_msg_put(msg); } diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index ddc656f..50ce64e 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -707,6 +707,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, newcrush = crush_decode(*p, min(*p+len, end)); if (IS_ERR(newcrush)) return ERR_CAST(newcrush); + *p += len; } /* new flags? */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 78c02eb..484e52b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data) return 0; } +void cifs_drop_inode(struct inode *inode) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + return generic_drop_inode(inode); + + return generic_delete_inode(inode); +} + static const struct super_operations cifs_super_ops = { .put_super = cifs_put_super, .statfs = cifs_statfs, .alloc_inode = cifs_alloc_inode, .destroy_inode = cifs_destroy_inode, -/* .drop_inode = generic_delete_inode, - .delete_inode = cifs_delete_inode, */ /* Do not need above two - functions unless later we add lazy close of inodes or unless the + .drop_inode = cifs_drop_inode, +/* .delete_inode = cifs_delete_inode, */ /* Do not need above + function unless later we add lazy close of inodes or unless the kernel forgets to call us with the same number of releases (closes) as opens */ .show_options = cifs_show_options, diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index fb1657e..fb6318b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, struct file *file, struct vfsmount *mnt, unsigned int oflags); extern int cifs_posix_open(char *full_path, struct inode **pinode, - struct vfsmount *mnt, struct super_block *sb, int mode, int oflags, __u32 *poplock, __u16 *pnetfid, int xid); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 391816b..e7ae78b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/file.h> #include "cifsfs.h" #include "cifspdu.h" #include "cifsglob.h" @@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, } write_unlock(&GlobalSMBSeslock); + file->private_data = pCifsFile; + return pCifsFile; } int cifs_posix_open(char *full_path, struct inode **pinode, - struct vfsmount *mnt, struct super_block *sb, - int mode, int oflags, + struct super_block *sb, int mode, int oflags, __u32 *poplock, __u16 *pnetfid, int xid) { int rc; @@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode, cifs_fattr_to_inode(*pinode, &fattr); } - /* - * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to - * file->private_data. - */ - if (mnt) { - struct cifsFileInfo *pfile_info; - - pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, - oflags); - if (pfile_info == NULL) - rc = -ENOMEM; - } - posix_open_ret: kfree(presp_data); return rc; @@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, int create_options = CREATE_NOT_DIR; __u32 oplock = 0; int oflags; - bool posix_create = false; /* * BB below access is probably too much for mknod to request * but we have to do query and setpathinfo so requesting @@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = cifs_posix_open(full_path, &newinode, - nd ? nd->path.mnt : NULL, inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); /* EIO could indicate that (posix open) operation is not supported, despite what server claimed in capability @@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, handled in posix open */ if (rc == 0) { - posix_create = true; if (newinode == NULL) /* query inode info */ goto cifs_create_get_file_info; else /* success, no need to query */ @@ -478,21 +464,28 @@ cifs_create_set_dentry: else cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); - /* nfsd case - nfs srv does not set nd */ - if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) { - /* mknod case - do not leave file open */ - CIFSSMBClose(xid, tcon, fileHandle); - } else if (!(posix_create) && (newinode)) { + if (newinode && nd && (nd->flags & LOOKUP_OPEN)) { struct cifsFileInfo *pfile_info; - /* - * cifs_fill_filedata() takes care of setting cifsFileInfo - * pointer to file->private_data. - */ - pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL, + struct file *filp; + + filp = lookup_instantiate_filp(nd, direntry, generic_file_open); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + CIFSSMBClose(xid, tcon, fileHandle); + goto cifs_create_out; + } + + pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp, nd->path.mnt, oflags); - if (pfile_info == NULL) + if (pfile_info == NULL) { + fput(filp); + CIFSSMBClose(xid, tcon, fileHandle); rc = -ENOMEM; + } + } else { + CIFSSMBClose(xid, tcon, fileHandle); } + cifs_create_out: kfree(buf); kfree(full_path); @@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, bool posix_open = false; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; + struct cifsFileInfo *cfile; struct inode *newInode = NULL; char *full_path = NULL; struct file *filp; @@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && (nd->intent.open.flags & O_CREAT)) { - rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, + rc = cifs_posix_open(full_path, &newInode, parent_dir_inode->i_sb, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, @@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, else direntry->d_op = &cifs_dentry_ops; d_add(direntry, newInode); - if (posix_open) - filp = lookup_instantiate_filp(nd, direntry, NULL); + if (posix_open) { + filp = lookup_instantiate_filp(nd, direntry, + generic_file_open); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + CIFSSMBClose(xid, pTcon, fileHandle); + goto lookup_out; + } + + cfile = cifs_new_fileinfo(newInode, fileHandle, filp, + nd->path.mnt, + nd->intent.open.flags); + if (cfile == NULL) { + fput(filp); + CIFSSMBClose(xid, pTcon, fileHandle); + rc = -ENOMEM; + goto lookup_out; + } + } /* since paths are not looked up by component - the parent directories are presumed to be good here */ renew_parental_timestamps(direntry); @@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, is a common return code */ } +lookup_out: kfree(full_path); FreeXid(xid); return ERR_PTR(rc); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 75541af..409e4f5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -162,44 +162,12 @@ psx_client_can_cache: return 0; } -static struct cifsFileInfo * -cifs_fill_filedata(struct file *file) -{ - struct list_head *tmp; - struct cifsFileInfo *pCifsFile = NULL; - struct cifsInodeInfo *pCifsInode = NULL; - - /* search inode for this file and fill in file->private_data */ - pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &pCifsInode->openFileList) { - pCifsFile = list_entry(tmp, struct cifsFileInfo, flist); - if ((pCifsFile->pfile == NULL) && - (pCifsFile->pid == current->tgid)) { - /* mode set in cifs_create */ - - /* needed for writepage */ - pCifsFile->pfile = file; - file->private_data = pCifsFile; - break; - } - } - read_unlock(&GlobalSMBSeslock); - - if (file->private_data != NULL) { - return pCifsFile; - } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) - cERROR(1, "could not find file instance for " - "new file %p", file); - return NULL; -} - /* all arguments to this function must be checked for validity in caller */ -static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, - struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, +static inline int cifs_open_inode_helper(struct inode *inode, struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf, char *full_path, int xid) { + struct cifsInodeInfo *pCifsInode = CIFS_I(inode); struct timespec temp; int rc; @@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, /* if not oplocked, invalidate inode pages if mtime or file size changed */ temp = cifs_NTtimeToUnix(buf->LastWriteTime); - if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && - (file->f_path.dentry->d_inode->i_size == + if (timespec_equal(&inode->i_mtime, &temp) && + (inode->i_size == (loff_t)le64_to_cpu(buf->EndOfFile))) { cFYI(1, "inode unchanged on server"); } else { - if (file->f_path.dentry->d_inode->i_mapping) { + if (inode->i_mapping) { /* BB no need to lock inode until after invalidate since namei code should already have it locked? */ - rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); + rc = filemap_write_and_wait(inode->i_mapping); if (rc != 0) - CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; + pCifsInode->write_behind_rc = rc; } cFYI(1, "invalidating remote inode since open detected it " "changed"); - invalidate_remote_inode(file->f_path.dentry->d_inode); + invalidate_remote_inode(inode); } client_can_cache: if (pTcon->unix_ext) - rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode, - full_path, inode->i_sb, xid); + rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, + xid); else - rc = cifs_get_inode_info(&file->f_path.dentry->d_inode, - full_path, buf, inode->i_sb, xid, NULL); + rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, + xid, NULL); if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { pCifsInode->clientCanCacheAll = true; pCifsInode->clientCanCacheRead = true; - cFYI(1, "Exclusive Oplock granted on inode %p", - file->f_path.dentry->d_inode); + cFYI(1, "Exclusive Oplock granted on inode %p", inode); } else if ((*oplock & 0xF) == OPLOCK_READ) pCifsInode->clientCanCacheRead = true; @@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file) __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; - struct cifsFileInfo *pCifsFile; + struct cifsFileInfo *pCifsFile = NULL; struct cifsInodeInfo *pCifsInode; char *full_path = NULL; int desiredAccess; @@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file) tcon = cifs_sb->tcon; pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - pCifsFile = cifs_fill_filedata(file); - if (pCifsFile) { - rc = 0; - FreeXid(xid); - return rc; - } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { @@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file) int oflags = (int) cifs_posix_convert_flags(file->f_flags); oflags |= SMB_O_CREAT; /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, - inode->i_sb, + rc = cifs_posix_open(full_path, &inode, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file) /* no need for special case handling of setting mode on read only files needed here */ - pCifsFile = cifs_fill_filedata(file); - cifs_posix_open_inode_helper(inode, file, pCifsInode, - oplock, netfid); + rc = cifs_posix_open_inode_helper(inode, file, + pCifsInode, oplock, netfid); + if (rc != 0) { + CIFSSMBClose(xid, tcon, netfid); + goto out; + } + + pCifsFile = cifs_new_fileinfo(inode, netfid, file, + file->f_path.mnt, + oflags); + if (pCifsFile == NULL) { + CIFSSMBClose(xid, tcon, netfid); + rc = -ENOMEM; + } goto out; } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { if (tcon->ses->serverNOS) @@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file) goto out; } + rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid); + if (rc != 0) + goto out; + pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, file->f_flags); - file->private_data = pCifsFile; - if (file->private_data == NULL) { + if (pCifsFile == NULL) { rc = -ENOMEM; goto out; } - rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon, - &oplock, buf, full_path, xid); - if (oplock & CIFS_CREATE_ACTION) { /* time to set mode which we can not set earlier due to problems creating new read-only files */ @@ -513,8 +484,7 @@ reopen_error_exit: le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, - inode->i_sb, + rc = cifs_posix_open(full_path, NULL, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 62b324f..6f0683c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, if (rc == 0 || rc != -ETXTBSY) return rc; + /* open-file renames don't work across directories */ + if (to_dentry->d_parent != from_dentry->d_parent) + return rc; + /* open the file to be renamed -- we need DELETE perms */ rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, CREATE_NOT_DIR, &srcfid, &oplock, NULL, diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7707389..0a57cb7 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate: /* calculate session key */ setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); - if (first_time) /* should this be moved into common code - with similar ntlmv2 path? */ - /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key, - response BB FIXME, v2_sess_key); */ - - /* copy session key */ - - /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE); - bcc_ptr += LM2_SESS_KEY_SIZE; */ + /* FIXME: calculate MAC key */ memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp)); bcc_ptr += sizeof(struct ntlmv2_resp); diff --git a/fs/dcache.c b/fs/dcache.c index d96047b..c8c78ba 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -590,6 +590,8 @@ static void prune_dcache(int count) up_read(&sb->s_umount); } spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); count -= pruned; __put_super(sb); /* more work left to do? */ diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index ca7e2a0..2bcc043 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return error; else { inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); if (error == 0) acl = NULL; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 01552ab..8a11fe2 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, return error; else { inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); if (error == 0) acl = NULL; @@ -733,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; + unsigned long flags; + if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - spin_lock(&fa->fa_lock); + spin_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -747,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - spin_unlock(&fa->fa_lock); + spin_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1d1088f..d5be169 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -38,51 +38,18 @@ int nr_pdflush_threads; /* * Passed into wb_writeback(), essentially a subset of writeback_control */ -struct wb_writeback_args { +struct wb_writeback_work { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; -}; -/* - * Work items for the bdi_writeback threads - */ -struct bdi_work { struct list_head list; /* pending work list */ - struct rcu_head rcu_head; /* for RCU free/clear of work */ - - unsigned long seen; /* threads that have seen this work */ - atomic_t pending; /* number of threads still to do work */ - - struct wb_writeback_args args; /* writeback arguments */ - - unsigned long state; /* flag bits, see WS_* */ + struct completion *done; /* set if the caller waits */ }; -enum { - WS_USED_B = 0, - WS_ONSTACK_B, -}; - -#define WS_USED (1 << WS_USED_B) -#define WS_ONSTACK (1 << WS_ONSTACK_B) - -static inline bool bdi_work_on_stack(struct bdi_work *work) -{ - return test_bit(WS_ONSTACK_B, &work->state); -} - -static inline void bdi_work_init(struct bdi_work *work, - struct wb_writeback_args *args) -{ - INIT_RCU_HEAD(&work->rcu_head); - work->args = *args; - work->state = WS_USED; -} - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -95,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) return !list_empty(&bdi->work_list); } -static void bdi_work_clear(struct bdi_work *work) -{ - clear_bit(WS_USED_B, &work->state); - smp_mb__after_clear_bit(); - /* - * work can have disappeared at this point. bit waitq functions - * should be able to tolerate this, provided bdi_sched_wait does - * not dereference it's pointer argument. - */ - wake_up_bit(&work->state, WS_USED_B); -} - -static void bdi_work_free(struct rcu_head *head) -{ - struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); - - if (!bdi_work_on_stack(work)) - kfree(work); - else - bdi_work_clear(work); -} - -static void wb_work_complete(struct bdi_work *work) -{ - const enum writeback_sync_modes sync_mode = work->args.sync_mode; - int onstack = bdi_work_on_stack(work); - - /* - * For allocated work, we can clear the done/seen bit right here. - * For on-stack work, we need to postpone both the clear and free - * to after the RCU grace period, since the stack could be invalidated - * as soon as bdi_work_clear() has done the wakeup. - */ - if (!onstack) - bdi_work_clear(work); - if (sync_mode == WB_SYNC_NONE || onstack) - call_rcu(&work->rcu_head, bdi_work_free); -} - -static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) -{ - /* - * The caller has retrieved the work arguments from this work, - * drop our reference. If this is the last ref, delete and free it - */ - if (atomic_dec_and_test(&work->pending)) { - struct backing_dev_info *bdi = wb->bdi; - - spin_lock(&bdi->wb_lock); - list_del_rcu(&work->list); - spin_unlock(&bdi->wb_lock); - - wb_work_complete(work); - } -} - -static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) +static void bdi_queue_work(struct backing_dev_info *bdi, + struct wb_writeback_work *work) { - work->seen = bdi->wb_mask; - BUG_ON(!work->seen); - atomic_set(&work->pending, bdi->wb_cnt); - BUG_ON(!bdi->wb_cnt); - - /* - * list_add_tail_rcu() contains the necessary barriers to - * make sure the above stores are seen before the item is - * noticed on the list - */ spin_lock(&bdi->wb_lock); - list_add_tail_rcu(&work->list, &bdi->work_list); + list_add_tail(&work->list, &bdi->work_list); spin_unlock(&bdi->wb_lock); /* @@ -181,97 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) } } -/* - * Used for on-stack allocated work items. The caller needs to wait until - * the wb threads have acked the work before it's safe to continue. - */ -static void bdi_wait_on_work_clear(struct bdi_work *work) -{ - wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, - TASK_UNINTERRUPTIBLE); -} - -static void bdi_alloc_queue_work(struct backing_dev_info *bdi, - struct wb_writeback_args *args) +static void +__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, + bool range_cyclic, bool for_background) { - struct bdi_work *work; + struct wb_writeback_work *work; /* * This is WB_SYNC_NONE writeback, so if allocation fails just * wakeup the thread for old dirty data writeback */ - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - bdi_work_init(work, args); - bdi_queue_work(bdi, work); - } else { - struct bdi_writeback *wb = &bdi->wb; - - if (wb->task) - wake_up_process(wb->task); + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + if (bdi->wb.task) + wake_up_process(bdi->wb.task); + return; } + + work->sync_mode = WB_SYNC_NONE; + work->nr_pages = nr_pages; + work->range_cyclic = range_cyclic; + work->for_background = for_background; + + bdi_queue_work(bdi, work); } /** - * bdi_sync_writeback - start and wait for writeback + * bdi_start_writeback - start writeback * @bdi: the backing device to write from - * @sb: write inodes from this super_block + * @nr_pages: the number of pages to write * * Description: - * This does WB_SYNC_ALL data integrity writeback and waits for the - * IO to complete. Callers must hold the sb s_umount semaphore for - * reading, to avoid having the super disappear before we are done. + * This does WB_SYNC_NONE opportunistic writeback. The IO is only + * started when this function returns, we make no guarentees on + * completion. Caller need not hold sb s_umount semaphore. + * */ -static void bdi_sync_writeback(struct backing_dev_info *bdi, - struct super_block *sb) +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { - struct wb_writeback_args args = { - .sb = sb, - .sync_mode = WB_SYNC_ALL, - .nr_pages = LONG_MAX, - .range_cyclic = 0, - }; - struct bdi_work work; - - bdi_work_init(&work, &args); - work.state |= WS_ONSTACK; - - bdi_queue_work(bdi, &work); - bdi_wait_on_work_clear(&work); + __bdi_start_writeback(bdi, nr_pages, true, false); } /** - * bdi_start_writeback - start writeback + * bdi_start_background_writeback - start background writeback * @bdi: the backing device to write from - * @sb: write inodes from this super_block - * @nr_pages: the number of pages to write * * Description: - * This does WB_SYNC_NONE opportunistic writeback. The IO is only + * This does WB_SYNC_NONE background writeback. The IO is only * started when this function returns, we make no guarentees on * completion. Caller need not hold sb s_umount semaphore. - * */ -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) +void bdi_start_background_writeback(struct backing_dev_info *bdi) { - struct wb_writeback_args args = { - .sb = sb, - .sync_mode = WB_SYNC_NONE, - .nr_pages = nr_pages, - .range_cyclic = 1, - }; - - /* - * We treat @nr_pages=0 as the special case to do background writeback, - * ie. to sync pages until the background dirty threshold is reached. - */ - if (!nr_pages) { - args.nr_pages = LONG_MAX; - args.for_background = 1; - } - - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, LONG_MAX, true, true); } /* @@ -561,75 +425,69 @@ select_queue: return ret; } -static void unpin_sb_for_writeback(struct super_block *sb) -{ - up_read(&sb->s_umount); - put_super(sb); -} - -enum sb_pin_state { - SB_PINNED, - SB_NOT_PINNED, - SB_PIN_FAILED -}; - /* - * For WB_SYNC_NONE writeback, the caller does not have the sb pinned + * For background writeback the caller does not have the sb pinned * before calling writeback. So make sure that we do pin it, so it doesn't * go away while we are writing inodes from it. */ -static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, - struct super_block *sb) +static bool pin_sb_for_writeback(struct super_block *sb) { - /* - * Caller must already hold the ref for this - */ - if (wbc->sync_mode == WB_SYNC_ALL) { - WARN_ON(!rwsem_is_locked(&sb->s_umount)); - return SB_NOT_PINNED; - } spin_lock(&sb_lock); + if (list_empty(&sb->s_instances)) { + spin_unlock(&sb_lock); + return false; + } + sb->s_count++; + spin_unlock(&sb_lock); + if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) { - spin_unlock(&sb_lock); - return SB_PINNED; - } - /* - * umounted, drop rwsem again and fall through to failure - */ + if (sb->s_root) + return true; up_read(&sb->s_umount); } - sb->s_count--; - spin_unlock(&sb_lock); - return SB_PIN_FAILED; + + put_super(sb); + return false; } /* * Write a portion of b_io inodes which belong to @sb. - * If @wbc->sb != NULL, then find and write all such + * + * If @only_this_sb is true, then find and write all such * inodes. Otherwise write only ones which go sequentially * in reverse order. + * * Return 1, if the caller writeback routine should be * interrupted. Otherwise return 0. */ -static int writeback_sb_inodes(struct super_block *sb, - struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, + struct writeback_control *wbc, bool only_this_sb) { while (!list_empty(&wb->b_io)) { long pages_skipped; struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); - if (wbc->sb && sb != inode->i_sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - if (sb != inode->i_sb) - /* finish with this superblock */ + + if (inode->i_sb != sb) { + if (only_this_sb) { + /* + * We only want to write back data for this + * superblock, move all inodes not belonging + * to it back onto the dirty list. + */ + redirty_tail(inode); + continue; + } + + /* + * The inode belongs to a different superblock. + * Bounce back to the caller to unpin this and + * pin the next superblock. + */ return 0; + } + if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; @@ -667,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb, return 1; } -static void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc) { int ret = 0; @@ -681,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); struct super_block *sb = inode->i_sb; - enum sb_pin_state state; - if (wbc->sb && sb != wbc->sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - state = pin_sb_for_writeback(wbc, sb); - - if (state == SB_PIN_FAILED) { + if (!pin_sb_for_writeback(sb)) { requeue_io(inode); continue; } - ret = writeback_sb_inodes(sb, wb, wbc); + ret = writeback_sb_inodes(sb, wb, wbc, false); + drop_super(sb); - if (state == SB_PINNED) - unpin_sb_for_writeback(sb); if (ret) break; } @@ -706,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, /* Leave any unwritten inodes on b_io */ } -void writeback_inodes_wbc(struct writeback_control *wbc) +static void __writeback_inodes_sb(struct super_block *sb, + struct bdi_writeback *wb, struct writeback_control *wbc) { - struct backing_dev_info *bdi = wbc->bdi; + WARN_ON(!rwsem_is_locked(&sb->s_umount)); - writeback_inodes_wb(&bdi->wb, wbc); + wbc->wb_start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&wb->b_io)) + queue_io(wb, wbc->older_than_this); + writeback_sb_inodes(sb, wb, wbc, true); + spin_unlock(&inode_lock); } /* @@ -748,16 +602,14 @@ static inline bool over_bground_thresh(void) * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, - struct wb_writeback_args *args) + struct wb_writeback_work *work) { struct writeback_control wbc = { - .bdi = wb->bdi, - .sb = args->sb, - .sync_mode = args->sync_mode, + .sync_mode = work->sync_mode, .older_than_this = NULL, - .for_kupdate = args->for_kupdate, - .for_background = args->for_background, - .range_cyclic = args->range_cyclic, + .for_kupdate = work->for_kupdate, + .for_background = work->for_background, + .range_cyclic = work->range_cyclic, }; unsigned long oldest_jif; long wrote = 0; @@ -777,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, /* * Stop writeback when nr_pages has been consumed */ - if (args->nr_pages <= 0) + if (work->nr_pages <= 0) break; /* * For background writeout, stop when we are below the * background dirty threshold */ - if (args->for_background && !over_bground_thresh()) + if (work->for_background && !over_bground_thresh()) break; wbc.more_io = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; - writeback_inodes_wb(wb, &wbc); - args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; + if (work->sb) + __writeback_inodes_sb(work->sb, wb, &wbc); + else + writeback_inodes_wb(wb, &wbc); + work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; /* @@ -827,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, } /* - * Return the next bdi_work struct that hasn't been processed by this - * wb thread yet. ->seen is initially set for each thread that exists - * for this device, when a thread first notices a piece of work it - * clears its bit. Depending on writeback type, the thread will notify - * completion on either receiving the work (WB_SYNC_NONE) or after - * it is done (WB_SYNC_ALL). + * Return the next wb_writeback_work struct that hasn't been processed yet. */ -static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, - struct bdi_writeback *wb) +static struct wb_writeback_work * +get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) { - struct bdi_work *work, *ret = NULL; + struct wb_writeback_work *work = NULL; - rcu_read_lock(); - - list_for_each_entry_rcu(work, &bdi->work_list, list) { - if (!test_bit(wb->nr, &work->seen)) - continue; - clear_bit(wb->nr, &work->seen); - - ret = work; - break; + spin_lock(&bdi->wb_lock); + if (!list_empty(&bdi->work_list)) { + work = list_entry(bdi->work_list.next, + struct wb_writeback_work, list); + list_del_init(&work->list); } - - rcu_read_unlock(); - return ret; + spin_unlock(&bdi->wb_lock); + return work; } static long wb_check_old_data_flush(struct bdi_writeback *wb) @@ -876,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) (inodes_stat.nr_inodes - inodes_stat.nr_unused); if (nr_pages) { - struct wb_writeback_args args = { + struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .for_kupdate = 1, .range_cyclic = 1, }; - return wb_writeback(wb, &args); + return wb_writeback(wb, &work); } return 0; @@ -895,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) long wb_do_writeback(struct bdi_writeback *wb, int force_wait) { struct backing_dev_info *bdi = wb->bdi; - struct bdi_work *work; + struct wb_writeback_work *work; long wrote = 0; while ((work = get_next_work_item(bdi, wb)) != NULL) { - struct wb_writeback_args args = work->args; - /* * Override sync mode, in case we must wait for completion + * because this thread is exiting now. */ if (force_wait) - work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; - - /* - * If this isn't a data integrity operation, just notify - * that we have seen this work and we are now starting it. - */ - if (args.sync_mode == WB_SYNC_NONE) - wb_clear_pending(wb, work); + work->sync_mode = WB_SYNC_ALL; - wrote += wb_writeback(wb, &args); + wrote += wb_writeback(wb, work); /* - * This is a data integrity writeback, so only do the - * notification when we have completed the work. + * Notify the caller of completion if this is a synchronous + * work item, otherwise just free it. */ - if (args.sync_mode == WB_SYNC_ALL) - wb_clear_pending(wb, work); + if (work->done) + complete(work->done); + else + kfree(work); } /* @@ -978,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb) } /* - * Schedule writeback for all backing devices. This does WB_SYNC_NONE - * writeback, for integrity writeback see bdi_sync_writeback(). + * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back + * the whole world. */ -static void bdi_writeback_all(struct super_block *sb, long nr_pages) +void wakeup_flusher_threads(long nr_pages) { - struct wb_writeback_args args = { - .sb = sb, - .nr_pages = nr_pages, - .sync_mode = WB_SYNC_NONE, - }; struct backing_dev_info *bdi; - rcu_read_lock(); + if (!nr_pages) { + nr_pages = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); + } + rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, nr_pages, false, false); } - rcu_read_unlock(); } -/* - * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back - * the whole world. - */ -void wakeup_flusher_threads(long nr_pages) -{ - if (nr_pages == 0) - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - bdi_writeback_all(NULL, nr_pages); -} - static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { @@ -1218,12 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) { unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { + .sb = sb, + .sync_mode = WB_SYNC_NONE, + .done = &done, + }; + + WARN_ON(!rwsem_is_locked(&sb->s_umount)); - nr_to_write = nr_dirty + nr_unstable + + work.nr_pages = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_start_writeback(sb->s_bdi, sb, nr_to_write); + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); } EXPORT_SYMBOL(writeback_inodes_sb); @@ -1237,7 +1069,9 @@ EXPORT_SYMBOL(writeback_inodes_sb); int writeback_inodes_sb_if_idle(struct super_block *sb) { if (!writeback_in_progress(sb->s_bdi)) { + down_read(&sb->s_umount); writeback_inodes_sb(sb); + up_read(&sb->s_umount); return 1; } else return 0; @@ -1253,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); */ void sync_inodes_sb(struct super_block *sb) { - bdi_sync_writeback(sb->s_bdi, sb); + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { + .sb = sb, + .sync_mode = WB_SYNC_ALL, + .nr_pages = LONG_MAX, + .range_cyclic = 0, + .done = &done, + }; + + WARN_ON(!rwsem_is_locked(&sb->s_umount)); + + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); + wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 4a48c0f..84da64b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (gfs2_is_stuffed(ip)) { u64 dsize = size + sizeof(struct gfs2_inode); + ip->i_disksize = size; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 8295c5b..26ca336 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, unsigned totlen = be16_to_cpu(dent->de_rec_len); if (gfs2_dirent_sentinel(dent)) - actual = GFS2_DIRENT_SIZE(0); + actual = 0; if (totlen - actual >= required) return 1; return 0; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ddcdbf4..dbab3fd 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + struct gfs2_holder *gh; int drop_ref = 0; + if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) { + spin_lock(&gl->gl_spin); + gh = find_first_waiter(gl); + if (gh && (gh->gh_flags & LM_FLAG_NOEXP) && + test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + } + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); drop_ref = 1; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b5612cb..f03afd9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, { struct inode *inode; struct gfs2_inode *ip; - struct gfs2_glock *io_gl; + struct gfs2_glock *io_gl = NULL; int error; inode = gfs2_iget(sb, no_addr); @@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); + io_gl = NULL; if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) goto gfs2_nfsbypass; @@ -228,7 +229,8 @@ gfs2_nfsbypass: fail_glock: gfs2_glock_dq(&ip->i_iopen_gh); fail_iopen: - gfs2_glock_put(io_gl); + if (io_gl) + gfs2_glock_put(io_gl); fail_put: if (inode->i_state & I_NEW) ip->i_gl->gl_object = NULL; @@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) { struct gfs2_sbd *sdp; struct gfs2_inode *ip; - struct gfs2_glock *io_gl; + struct gfs2_glock *io_gl = NULL; int error; struct gfs2_holder gh; struct inode *inode; @@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); + io_gl = NULL; inode->i_mode = DT2IF(DT_UNKNOWN); @@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) fail_glock: gfs2_glock_dq(&ip->i_iopen_gh); fail_iopen: - gfs2_glock_put(io_gl); + if (io_gl) + gfs2_glock_put(io_gl); fail_put: ip->i_gl->gl_object = NULL; gfs2_glock_put(ip->i_gl); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 49667d6..b256d6f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -694,10 +694,8 @@ get_a_page: if (!buffer_mapped(bh)) goto unlock_out; /* If it's a newly allocated disk block for quota, zero it */ - if (buffer_new(bh)) { - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - } + if (buffer_new(bh)) + zero_user(page, pos - blocksize, bh->b_size); } if (PageUptodate(page)) @@ -723,7 +721,7 @@ get_a_page: /* If quota straddles page boundary, we need to update the rest of the * quota at the beginning of the next page */ - if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */ + if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) { ptr = ptr + nbytes; nbytes = sizeof(struct gfs2_quota) - nbytes; offset = 0; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7ec9b34..d25b525 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) #endif /* CONFIG_NFS_V4_1 */ } +static int nfs4_server_common_setup(struct nfs_server *server, + struct nfs_fh *mntfh) +{ + struct nfs_fattr *fattr; + int error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* We must ensure the session is initialised first */ + error = nfs4_init_session(server); + if (error < 0) + goto out; + + /* Probe the root fh to retrieve its FSID and filehandle */ + error = nfs4_get_rootfh(server, mntfh); + if (error < 0) + goto out; + + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + nfs4_session_set_rwsize(server); + + error = nfs_probe_fsinfo(server, mntfh, fattr); + if (error < 0) + goto out; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; +out: + nfs_free_fattr(fattr); + return error; +} + /* * Create a version 4 volume record */ @@ -1346,7 +1395,6 @@ error: struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, struct nfs_fh *mntfh) { - struct nfs_fattr *fattr; struct nfs_server *server; int error; @@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, if (!server) return ERR_PTR(-ENOMEM); - error = -ENOMEM; - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - goto error; - /* set up the general RPC client */ error = nfs4_init_server(server, data); if (error < 0) goto error; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - - error = nfs4_init_session(server); - if (error < 0) - goto error; - - /* Probe the root fh to retrieve its FSID */ - error = nfs4_get_rootfh(server, mntfh); + error = nfs4_server_common_setup(server, mntfh); if (error < 0) goto error; - dprintk("Server FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - dprintk("Mount FH: %d\n", mntfh->size); - - nfs4_session_set_rwsize(server); - - error = nfs_probe_fsinfo(server, mntfh, fattr); - if (error < 0) - goto error; - - if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) - server->namelen = NFS4_MAXNAMLEN; - - spin_lock(&nfs_client_lock); - list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); - spin_unlock(&nfs_client_lock); - - server->mount_time = jiffies; dprintk("<-- nfs4_create_server() = %p\n", server); - nfs_free_fattr(fattr); return server; error: - nfs_free_fattr(fattr); nfs_free_server(server); dprintk("<-- nfs4_create_server() = error %d\n", error); return ERR_PTR(error); @@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, { struct nfs_client *parent_client; struct nfs_server *server, *parent_server; - struct nfs_fattr *fattr; int error; dprintk("--> nfs4_create_referral_server()\n"); @@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (!server) return ERR_PTR(-ENOMEM); - error = -ENOMEM; - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - goto error; - parent_server = NFS_SB(data->sb); parent_client = parent_server->nfs_client; @@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - - /* Probe the root fh to retrieve its FSID and filehandle */ - error = nfs4_get_rootfh(server, mntfh); - if (error < 0) - goto error; - - /* probe the filesystem info for this server filesystem */ - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs4_server_common_setup(server, mntfh); if (error < 0) goto error; - if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) - server->namelen = NFS4_MAXNAMLEN; - - dprintk("Referral FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - - spin_lock(&nfs_client_lock); - list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); - spin_unlock(&nfs_client_lock); - - server->mount_time = jiffies; - - nfs_free_fattr(fattr); dprintk("<-- nfs_create_referral_server() = %p\n", server); return server; error: - nfs_free_fattr(fattr); nfs_free_server(server); dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 7428f7d..a70e446 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) goto out; } - if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE) + if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) || !S_ISDIR(fsinfo.fattr->mode)) { printk(KERN_ERR "nfs4_get_rootfh:" " getroot encountered non-directory\n"); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6bdef28..65c8dae 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); - *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + *p++ = cpu_to_be32(iap->ia_atime.tv_sec); + *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); } else if (iap->ia_valid & ATTR_ATIME) { bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 04214fc..f9df16d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_mountd_netid(m, nfss, showdefaults); } +#ifdef CONFIG_NFS_V4 +static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + struct nfs_client *clp = nfss->nfs_client; + + seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); + seq_printf(m, ",minorversion=%u", clp->cl_minorversion); +} +#else +static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ +} +#endif + /* * Describe the mount options in force on this server representation */ @@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (version != 4) nfs_show_mountd_options(m, nfss, showdefaults); + else + nfs_show_nfsv4_options(m, nfss, showdefaults); -#ifdef CONFIG_NFS_V4 - if (clp->rpc_ops->version == 4) - seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); -#endif if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); } diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index 4065002..d8b6e42 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -26,7 +26,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/bitops.h> #include <linux/list.h> diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index ce94801..d9396a4 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np, for (pp = np->properties; pp != NULL; pp = pp->next) { p = pp->name; + if (strchr(p, '/')) + continue; + if (duplicate_name(de, p)) p = fixup_name(np, de, p); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 46d4b5d..cb6306e 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } +static void pad_len_spaces(struct seq_file *m, int len) +{ + len = 25 + sizeof(void*) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); +} + /* * display a single VMA to a sequenced file */ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) { + struct mm_struct *mm = vma->vm_mm; unsigned long ino = 0; struct file *file; dev_t dev = 0; @@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) MAJOR(dev), MINOR(dev), ino, &len); if (file) { - len = 25 + sizeof(void *) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); + pad_len_spaces(m, len); seq_path(m, &file->f_path, ""); + } else if (mm) { + if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) { + pad_len_spaces(m, len); + seq_puts(m, "[stack]"); + } } seq_putc(m, '\n'); diff --git a/fs/splice.c b/fs/splice.c index 740e6b9..efdbfec 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, { struct file *file = sd->u.file; - return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); + return do_splice_from(pipe, file, &file->f_pos, sd->total_len, + sd->flags); } /** @@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, if (off_in) return -ESPIPE; if (off_out) { - if (!out->f_op || !out->f_op->llseek || - out->f_op->llseek == no_llseek) + if (!(out->f_mode & FMODE_PWRITE)) return -EINVAL; if (copy_from_user(&offset, off_out, sizeof(loff_t))) return -EFAULT; @@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, if (off_out) return -ESPIPE; if (off_in) { - if (!in->f_op || !in->f_op->llseek || - in->f_op->llseek == no_llseek) + if (!(in->f_mode & FMODE_PREAD)) return -EINVAL; if (copy_from_user(&offset, off_in, sizeof(loff_t))) return -EFAULT; @@ -374,6 +374,8 @@ void sync_supers(void) up_read(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } } @@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) up_read(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } spin_unlock(&sb_lock); @@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work) } up_write(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } spin_unlock(&sb_lock); diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index bbd69bd..fcc498e 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -25,6 +25,7 @@ #include <linux/stat.h> #include <linux/string.h> #include <linux/buffer_head.h> +#include <linux/writeback.h> #include "sysv.h" /* We don't trust the value of @@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) struct inode *inode; sysv_ino_t ino; unsigned count; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE + }; inode = new_inode(sb); if (!inode) @@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) insert_inode_hash(inode); mark_inode_dirty(inode); - sysv_write_inode(inode, 0); /* ensure inode not allocated again */ + sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */ mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ /* That's it. */ unlock_super(sb); diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 076ca50..c8ff0d1 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -62,7 +62,9 @@ */ static void shrink_liability(struct ubifs_info *c, int nr_to_write) { + down_read(&c->vfs_sb->s_umount); writeback_inodes_sb(c->vfs_sb); + up_read(&c->vfs_sb->s_umount); } /** diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 846b75a..e7839ee 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -128,13 +128,12 @@ xfs_nfs_get_inode( return ERR_PTR(-ESTALE); /* - * The XFS_IGET_BULKSTAT means that an invalid inode number is just - * fine and not an indication of a corrupted filesystem. Because - * clients can send any kind of invalid file handle, e.g. after - * a restore on the server we have to deal with this case gracefully. + * The XFS_IGET_UNTRUSTED means that an invalid inode number is just + * fine and not an indication of a corrupted filesystem as clients can + * send invalid file handles and we have to handle it gracefully.. */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, - XFS_ILOCK_SHARED, &ip, 0); + error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, + XFS_ILOCK_SHARED, &ip); if (error) { /* * EINVAL means the inode cluster doesn't exist anymore. diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 699b60c..e59a810 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -679,10 +679,9 @@ xfs_ioc_bulkstat( error = xfs_bulkstat_single(mp, &inlast, bulkreq.ubuffer, &done); else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, - (bulkstat_one_pf)xfs_bulkstat_one, NULL, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - BULKSTAT_FG_QUICK, &done); + error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, + sizeof(xfs_bstat_t), bulkreq.ubuffer, + &done); if (error) return -error; diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 9287135..52ed49e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -237,15 +237,12 @@ xfs_bulkstat_one_compat( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* buffer to place output in */ int ubsize, /* size of buffer */ - void *private_data, /* my private data */ - xfs_daddr_t bno, /* starting bno of inode cluster */ int *ubused, /* bytes used by me */ - void *dibuff, /* on-disk inode buffer */ int *stat) /* BULKSTAT_RV_... */ { return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, bno, - ubused, dibuff, stat); + xfs_bulkstat_one_fmt_compat, + ubused, stat); } /* copied from xfs_ioctl.c */ @@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat( int res; error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), - NULL, 0, NULL, NULL, &res); + sizeof(compat_xfs_bstat_t), 0, &res); } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one_compat, NULL, - sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, - BULKSTAT_FG_QUICK, &done); + xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), + bulkreq.ubuffer, &done); } else error = XFS_ERROR(EINVAL); if (error) diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 2d8b7bc..8c117ff 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1632,10 +1632,7 @@ xfs_qm_dqusage_adjust( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* not used */ int ubsize, /* not used */ - void *private_data, /* not used */ - xfs_daddr_t bno, /* starting block of inode cluster */ int *ubused, /* not used */ - void *dip, /* on-disk inode pointer (not used) */ int *res) /* result code value */ { xfs_inode_t *ip; @@ -1660,7 +1657,7 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { + if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { *res = BULKSTAT_RV_NOTHING; return error; } @@ -1796,12 +1793,13 @@ xfs_qm_quotacheck( * Iterate thru all the inodes in the file system, * adjusting the corresponding dquot counters in core. */ - if ((error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, NULL, - structsz, NULL, BULKSTAT_FG_IGET, &done))) + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_dqusage_adjust, + structsz, NULL, &done); + if (error) break; - } while (! done); + } while (!done); /* * We've made all the changes that we need to make incore. @@ -1889,14 +1887,14 @@ xfs_qm_init_quotainos( mp->m_sb.sb_uquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_uquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip, 0))) + 0, 0, &uip))) return XFS_ERROR(error); } if (XFS_IS_OQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip, 0))) { + 0, 0, &gip))) { if (uip) IRELE(uip); return XFS_ERROR(error); diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 92b002f..b448776 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles( } if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { - error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); + error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip); if (!error) { error = xfs_truncate_file(mp, qip); IRELE(qip); @@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles( if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && mp->m_sb.sb_gquotino != NULLFSINO) { - error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); + error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip); if (!error2) { error2 = xfs_truncate_file(mp, qip); IRELE(qip); @@ -417,12 +417,12 @@ xfs_qm_scall_getqstat( } if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip, 0) == 0) + 0, 0, &uip) == 0) tempuqip = B_TRUE; } if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip, 0) == 0) + 0, 0, &gip) == 0) tempgqip = B_TRUE; } if (uip) { @@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* not used */ int ubsize, /* not used */ - void *private_data, /* not used */ - xfs_daddr_t bno, /* starting block of inode cluster */ int *ubused, /* not used */ - void *dip, /* not used */ int *res) /* bulkstat result code */ { xfs_inode_t *ip; @@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust( ipreleased = B_FALSE; again: lock_flags = XFS_ILOCK_SHARED; - if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) { + if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) { *res = BULKSTAT_RV_NOTHING; return (error); } @@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck( * Iterate thru all the inodes in the file system, * adjusting the corresponding dquot counters */ - if ((error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_internalqcheck_adjust, NULL, - 0, NULL, BULKSTAT_FG_IGET, &done))) { + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_internalqcheck_adjust, + 0, NULL, &done); + if (error) { + cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); break; } - } while (! done); - if (error) { - cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); - } + } while (!done); + cmn_err(CE_DEBUG, "Checking results against system dquots"); for (i = 0; i < qmtest_hashmask; i++) { xfs_dqtest_t *d, *n; diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 5bba29a..7f159d2 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -69,7 +69,9 @@ xfs_swapext( goto out; } - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { + if (!(file->f_mode & FMODE_WRITE) || + !(file->f_mode & FMODE_READ) || + (file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_file; } @@ -81,6 +83,7 @@ xfs_swapext( } if (!(tmp_file->f_mode & FMODE_WRITE) || + !(tmp_file->f_mode & FMODE_READ) || (tmp_file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_tmp_file; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 9d884c1..c7142a0 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1203,6 +1203,63 @@ error0: return error; } +STATIC int +xfs_imap_lookup( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_agblock_t *chunk_agbno, + xfs_agblock_t *offset_agbno, + int flags) +{ + struct xfs_inobt_rec_incore rec; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + xfs_agino_t startino; + int error; + int i; + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " + "xfs_ialloc_read_agi() returned " + "error %d, agno %d", + error, agno); + return error; + } + + /* + * derive and lookup the exact inode record for the given agino. If the + * record cannot be found, then it's an invalid inode number and we + * should abort. + */ + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); + error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); + if (!error) { + if (i) + error = xfs_inobt_get_rec(cur, &rec, &i); + if (!error && i == 0) + error = EINVAL; + } + + xfs_trans_brelse(tp, agbp); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + if (error) + return error; + + /* for untrusted inodes check it is allocated first */ + if ((flags & XFS_IGET_UNTRUSTED) && + (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) + return EINVAL; + + *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); + *offset_agbno = agbno - *chunk_agbno; + return 0; +} + /* * Return the location of the inode in imap, for mapping it into a buffer. */ @@ -1235,8 +1292,11 @@ xfs_imap( if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || ino != XFS_AGINO_TO_INO(mp, agno, agino)) { #ifdef DEBUG - /* no diagnostics for bulkstat, ino comes from userspace */ - if (flags & XFS_IGET_BULKSTAT) + /* + * Don't output diagnostic information for untrusted inodes + * as they can be invalid without implying corruption. + */ + if (flags & XFS_IGET_UNTRUSTED) return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, @@ -1263,6 +1323,23 @@ xfs_imap( return XFS_ERROR(EINVAL); } + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + + /* + * For bulkstat and handle lookups, we have an untrusted inode number + * that we have to verify is valid. We cannot do this just by reading + * the inode buffer as it may have been unlinked and removed leaving + * inodes in stale state on disk. Hence we have to do a btree lookup + * in all cases where an untrusted inode number is passed. + */ + if (flags & XFS_IGET_UNTRUSTED) { + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); + if (error) + return error; + goto out_map; + } + /* * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. @@ -1277,24 +1354,6 @@ xfs_imap( return 0; } - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; - - /* - * If we get a block number passed from bulkstat we can use it to - * find the buffer easily. - */ - if (imap->im_blkno) { - offset = XFS_INO_TO_OFFSET(mp, ino); - ASSERT(offset < mp->m_sb.sb_inopblock); - - cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno); - offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; - - imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); - return 0; - } - /* * If the inode chunks are aligned then use simple maths to * find the location. Otherwise we have to do a btree @@ -1304,50 +1363,13 @@ xfs_imap( offset_agbno = agbno & mp->m_inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { - xfs_btree_cur_t *cur; /* inode btree cursor */ - xfs_inobt_rec_incore_t chunk_rec; - xfs_buf_t *agbp; /* agi buffer */ - int i; /* temp state */ - - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_ialloc_read_agi() returned " - "error %d, agno %d", - error, agno); - return error; - } - - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_lookup() failed"); - goto error0; - } - - error = xfs_inobt_get_rec(cur, &chunk_rec, &i); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_get_rec() failed"); - goto error0; - } - if (i == 0) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_get_rec() failed"); -#endif /* DEBUG */ - error = XFS_ERROR(EINVAL); - } - error0: - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); if (error) return error; - chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino); - offset_agbno = agbno - chunk_agbno; } +out_map: ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + ((offset_agbno / blks_per_cluster) * blks_per_cluster); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 75df75f..8f8b91b 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -259,7 +259,6 @@ xfs_iget_cache_miss( xfs_trans_t *tp, xfs_ino_t ino, struct xfs_inode **ipp, - xfs_daddr_t bno, int flags, int lock_flags) { @@ -272,7 +271,7 @@ xfs_iget_cache_miss( if (!ip) return ENOMEM; - error = xfs_iread(mp, tp, ip, bno, flags); + error = xfs_iread(mp, tp, ip, flags); if (error) goto out_destroy; @@ -358,8 +357,6 @@ out_destroy: * within the file system for the inode being requested. * lock_flags -- flags indicating how to lock the inode. See the comment * for xfs_ilock() for a list of valid values. - * bno -- the block number starting the buffer containing the inode, - * if known (as by bulkstat), else 0. */ int xfs_iget( @@ -368,8 +365,7 @@ xfs_iget( xfs_ino_t ino, uint flags, uint lock_flags, - xfs_inode_t **ipp, - xfs_daddr_t bno) + xfs_inode_t **ipp) { xfs_inode_t *ip; int error; @@ -397,7 +393,7 @@ again: read_unlock(&pag->pag_ici_lock); XFS_STATS_INC(xs_ig_missed); - error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, + error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, flags, lock_flags); if (error) goto out_error_or_again; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d53c39d..b76a829 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -177,7 +177,7 @@ xfs_imap_to_bp( if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { - if (iget_flags & XFS_IGET_BULKSTAT) { + if (iget_flags & XFS_IGET_UNTRUSTED) { xfs_trans_brelse(tp, bp); return XFS_ERROR(EINVAL); } @@ -787,7 +787,6 @@ xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *ip, - xfs_daddr_t bno, uint iget_flags) { xfs_buf_t *bp; @@ -797,11 +796,9 @@ xfs_iread( /* * Fill in the location information in the in-core inode. */ - ip->i_imap.im_blkno = bno; error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) return error; - ASSERT(bno == 0 || bno == ip->i_imap.im_blkno); /* * Get pointers to the on-disk inode and the buffer containing it. diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9965e40..78550df 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) * xfs_iget.c prototypes. */ int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - uint, uint, xfs_inode_t **, xfs_daddr_t); + uint, uint, xfs_inode_t **); void xfs_iput(xfs_inode_t *, uint); void xfs_iput_new(xfs_inode_t *, uint); void xfs_ilock(xfs_inode_t *, uint); @@ -500,7 +500,7 @@ do { \ * Flags for xfs_iget() */ #define XFS_IGET_CREATE 0x1 -#define XFS_IGET_BULKSTAT 0x2 +#define XFS_IGET_UNTRUSTED 0x2 int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, struct xfs_dinode **, @@ -509,7 +509,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, xfs_daddr_t, uint); + struct xfs_inode *, uint); void xfs_dinode_to_disk(struct xfs_dinode *, struct xfs_icdinode *); void xfs_idestroy_fork(struct xfs_inode *, int); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index b1b801e..2b86f86 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -49,24 +49,40 @@ xfs_internal_inum( (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); } -STATIC int -xfs_bulkstat_one_iget( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - xfs_daddr_t bno, /* starting bno of inode cluster */ - xfs_bstat_t *buf, /* return buffer */ - int *stat) /* BULKSTAT_RV_... */ +/* + * Return stat information for one inode. + * Return 0 if ok, else errno. + */ +int +xfs_bulkstat_one_int( + struct xfs_mount *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ + int *ubused, /* bytes used by me */ + int *stat) /* BULKSTAT_RV_... */ { - xfs_icdinode_t *dic; /* dinode core info pointer */ - xfs_inode_t *ip; /* incore inode pointer */ - struct inode *inode; - int error; + struct xfs_icdinode *dic; /* dinode core info pointer */ + struct xfs_inode *ip; /* incore inode pointer */ + struct inode *inode; + struct xfs_bstat *buf; /* return buffer */ + int error = 0; /* error value */ + + *stat = BULKSTAT_RV_NOTHING; + + if (!buffer || xfs_internal_inum(mp, ino)) + return XFS_ERROR(EINVAL); + + buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); + if (!buf) + return XFS_ERROR(ENOMEM); error = xfs_iget(mp, NULL, ino, - XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); + XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip); if (error) { *stat = BULKSTAT_RV_NOTHING; - return error; + goto out_free; } ASSERT(ip != NULL); @@ -127,77 +143,16 @@ xfs_bulkstat_one_iget( buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; break; } - xfs_iput(ip, XFS_ILOCK_SHARED); - return error; -} -STATIC void -xfs_bulkstat_one_dinode( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - xfs_dinode_t *dic, /* dinode inode pointer */ - xfs_bstat_t *buf) /* return buffer */ -{ - /* - * The inode format changed when we moved the link count and - * made it 32 bits long. If this is an old format inode, - * convert it in memory to look like a new one. If it gets - * flushed to disk we will convert back before flushing or - * logging it. We zero out the new projid field and the old link - * count field. We'll handle clearing the pad field (the remains - * of the old uuid field) when we actually convert the inode to - * the new format. We don't change the version number so that we - * can distinguish this from a real new format inode. - */ - if (dic->di_version == 1) { - buf->bs_nlink = be16_to_cpu(dic->di_onlink); - buf->bs_projid = 0; - } else { - buf->bs_nlink = be32_to_cpu(dic->di_nlink); - buf->bs_projid = be16_to_cpu(dic->di_projid); - } + error = formatter(buffer, ubsize, ubused, buf); - buf->bs_ino = ino; - buf->bs_mode = be16_to_cpu(dic->di_mode); - buf->bs_uid = be32_to_cpu(dic->di_uid); - buf->bs_gid = be32_to_cpu(dic->di_gid); - buf->bs_size = be64_to_cpu(dic->di_size); - buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec); - buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec); - buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec); - buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); - buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); - buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); - buf->bs_xflags = xfs_dic2xflags(dic); - buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; - buf->bs_extents = be32_to_cpu(dic->di_nextents); - buf->bs_gen = be32_to_cpu(dic->di_gen); - memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); - buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); - buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); - buf->bs_aextents = be16_to_cpu(dic->di_anextents); - buf->bs_forkoff = XFS_DFORK_BOFF(dic); + if (!error) + *stat = BULKSTAT_RV_DIDONE; - switch (dic->di_format) { - case XFS_DINODE_FMT_DEV: - buf->bs_rdev = xfs_dinode_get_rdev(dic); - buf->bs_blksize = BLKDEV_IOSIZE; - buf->bs_blocks = 0; - break; - case XFS_DINODE_FMT_LOCAL: - case XFS_DINODE_FMT_UUID: - buf->bs_rdev = 0; - buf->bs_blksize = mp->m_sb.sb_blocksize; - buf->bs_blocks = 0; - break; - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - buf->bs_rdev = 0; - buf->bs_blksize = mp->m_sb.sb_blocksize; - buf->bs_blocks = be64_to_cpu(dic->di_nblocks); - break; - } + out_free: + kmem_free(buf); + return error; } /* Return 0 on success or positive error */ @@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt( return 0; } -/* - * Return stat information for one inode. - * Return 0 if ok, else errno. - */ -int /* error status */ -xfs_bulkstat_one_int( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ - xfs_daddr_t bno, /* starting bno of inode cluster */ - int *ubused, /* bytes used by me */ - void *dibuff, /* on-disk inode buffer */ - int *stat) /* BULKSTAT_RV_... */ -{ - xfs_bstat_t *buf; /* return buffer */ - int error = 0; /* error value */ - xfs_dinode_t *dip; /* dinode inode pointer */ - - dip = (xfs_dinode_t *)dibuff; - *stat = BULKSTAT_RV_NOTHING; - - if (!buffer || xfs_internal_inum(mp, ino)) - return XFS_ERROR(EINVAL); - - buf = kmem_alloc(sizeof(*buf), KM_SLEEP); - - if (dip == NULL) { - /* We're not being passed a pointer to a dinode. This happens - * if BULKSTAT_FG_IGET is selected. Do the iget. - */ - error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); - if (error) - goto out_free; - } else { - xfs_bulkstat_one_dinode(mp, ino, dip, buf); - } - - error = formatter(buffer, ubsize, ubused, buf); - if (error) - goto out_free; - - *stat = BULKSTAT_RV_DIDONE; - - out_free: - kmem_free(buf); - return error; -} - int xfs_bulkstat_one( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* buffer to place output in */ int ubsize, /* size of buffer */ - void *private_data, /* my private data */ - xfs_daddr_t bno, /* starting bno of inode cluster */ int *ubused, /* bytes used by me */ - void *dibuff, /* on-disk inode buffer */ int *stat) /* BULKSTAT_RV_... */ { return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt, bno, - ubused, dibuff, stat); -} - -/* - * Test to see whether we can use the ondisk inode directly, based - * on the given bulkstat flags, filling in dipp accordingly. - * Returns zero if the inode is dodgey. - */ -STATIC int -xfs_bulkstat_use_dinode( - xfs_mount_t *mp, - int flags, - xfs_buf_t *bp, - int clustidx, - xfs_dinode_t **dipp) -{ - xfs_dinode_t *dip; - unsigned int aformat; - - *dipp = NULL; - if (!bp || (flags & BULKSTAT_FG_IGET)) - return 1; - dip = (xfs_dinode_t *) - xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); - /* - * Check the buffer containing the on-disk inode for di_mode == 0. - * This is to prevent xfs_bulkstat from picking up just reclaimed - * inodes that have their in-core state initialized but not flushed - * to disk yet. This is a temporary hack that would require a proper - * fix in the future. - */ - if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || - !XFS_DINODE_GOOD_VERSION(dip->di_version) || - !dip->di_mode) - return 0; - if (flags & BULKSTAT_FG_QUICK) { - *dipp = dip; - return 1; - } - /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ - aformat = dip->di_aformat; - if ((XFS_DFORK_Q(dip) == 0) || - (aformat == XFS_DINODE_FMT_LOCAL) || - (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { - *dipp = dip; - return 1; - } - return 1; + xfs_bulkstat_one_fmt, ubused, stat); } #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) @@ -342,10 +196,8 @@ xfs_bulkstat( xfs_ino_t *lastinop, /* last inode returned */ int *ubcountp, /* size of buffer/count returned */ bulkstat_one_pf formatter, /* func that'd fill a single buf */ - void *private_data,/* private data for formatter */ size_t statstruct_size, /* sizeof struct filling */ char __user *ubuffer, /* buffer with inode stats */ - int flags, /* defined in xfs_itable.h */ int *done) /* 1 if there are more stats to get */ { xfs_agblock_t agbno=0;/* allocation group block number */ @@ -380,14 +232,12 @@ xfs_bulkstat( int ubelem; /* spaces used in user's buffer */ int ubused; /* bytes used by formatter */ xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ - xfs_dinode_t *dip; /* ptr into bp for specific inode */ /* * Get the last inode value, see if there's nothing to do. */ ino = (xfs_ino_t)*lastinop; lastino = ino; - dip = NULL; agno = XFS_INO_TO_AGNO(mp, ino); agino = XFS_INO_TO_AGINO(mp, ino); if (agno >= mp->m_sb.sb_agcount || @@ -612,37 +462,6 @@ xfs_bulkstat( irbp->ir_startino) + ((chunkidx & nimask) >> mp->m_sb.sb_inopblog); - - if (flags & (BULKSTAT_FG_QUICK | - BULKSTAT_FG_INLINE)) { - int offset; - - ino = XFS_AGINO_TO_INO(mp, agno, - agino); - bno = XFS_AGB_TO_DADDR(mp, agno, - agbno); - - /* - * Get the inode cluster buffer - */ - if (bp) - xfs_buf_relse(bp); - - error = xfs_inotobp(mp, NULL, ino, &dip, - &bp, &offset, - XFS_IGET_BULKSTAT); - - if (!error) - clustidx = offset / mp->m_sb.sb_inodesize; - if (XFS_TEST_ERROR(error != 0, - mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, - XFS_RANDOM_BULKSTAT_READ_CHUNK)) { - bp = NULL; - ubleft = 0; - rval = error; - break; - } - } } ino = XFS_AGINO_TO_INO(mp, agno, agino); bno = XFS_AGB_TO_DADDR(mp, agno, agbno); @@ -658,35 +477,13 @@ xfs_bulkstat( * when the chunk is used up. */ irbp->ir_freecount++; - if (!xfs_bulkstat_use_dinode(mp, flags, bp, - clustidx, &dip)) { - lastino = ino; - continue; - } - /* - * If we need to do an iget, cannot hold bp. - * Drop it, until starting the next cluster. - */ - if ((flags & BULKSTAT_FG_INLINE) && !dip) { - if (bp) - xfs_buf_relse(bp); - bp = NULL; - } /* * Get the inode and fill in a single buffer. - * BULKSTAT_FG_QUICK uses dip to fill it in. - * BULKSTAT_FG_IGET uses igets. - * BULKSTAT_FG_INLINE uses dip if we have an - * inline attr fork, else igets. - * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. - * This is also used to count inodes/blks, etc - * in xfs_qm_quotacheck. */ ubused = statstruct_size; - error = formatter(mp, ino, ubufp, - ubleft, private_data, - bno, &ubused, dip, &fmterror); + error = formatter(mp, ino, ubufp, ubleft, + &ubused, &fmterror); if (fmterror == BULKSTAT_RV_NOTHING) { if (error && error != ENOENT && error != EINVAL) { @@ -778,8 +575,7 @@ xfs_bulkstat_single( */ ino = (xfs_ino_t)*lastinop; - error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), - NULL, 0, NULL, NULL, &res); + error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res); if (error) { /* * Special case way failed, do it the "long" way @@ -788,8 +584,7 @@ xfs_bulkstat_single( (*lastinop)--; count = 1; if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, - NULL, sizeof(xfs_bstat_t), buffer, - BULKSTAT_FG_IGET, done)) + sizeof(xfs_bstat_t), buffer, done)) return error; if (count == 0 || (xfs_ino_t)*lastinop != ino) return error == EFSCORRUPTED ? diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 20792bf..97295d9 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, xfs_ino_t ino, void __user *buffer, int ubsize, - void *private_data, - xfs_daddr_t bno, int *ubused, - void *dip, int *stat); /* @@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, #define BULKSTAT_RV_GIVEUP 2 /* - * Values for bulkstat flag argument. - */ -#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ -#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ -#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */ - -/* * Return stat information in bulk (by-inode) for the filesystem. */ int /* error status */ @@ -56,10 +46,8 @@ xfs_bulkstat( xfs_ino_t *lastino, /* last inode returned */ int *count, /* size of buffer/count returned */ bulkstat_one_pf formatter, /* func that'd fill a single buf */ - void *private_data, /* private data for formatter */ size_t statstruct_size,/* sizeof struct that we're filling */ char __user *ubuffer,/* buffer with inode stats */ - int flags, /* flag to control access method */ int *done); /* 1 if there are more stats to get */ int @@ -82,9 +70,7 @@ xfs_bulkstat_one_int( void __user *buffer, int ubsize, bulkstat_one_fmt_pf formatter, - xfs_daddr_t bno, int *ubused, - void *dibuff, int *stat); int @@ -93,10 +79,7 @@ xfs_bulkstat_one( xfs_ino_t ino, void __user *buffer, int ubsize, - void *private_data, - xfs_daddr_t bno, int *ubused, - void *dibuff, int *stat); typedef int (*inumbers_fmt_pf)( diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ed0684c..9ac5cfa 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3198,7 +3198,7 @@ xlog_recover_process_one_iunlink( int error; ino = XFS_AGINO_TO_INO(mp, agno, agino); - error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); + error = xfs_iget(mp, NULL, ino, 0, 0, &ip); if (error) goto fail; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d59f4e8..69f62d8 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1300,7 +1300,7 @@ xfs_mountfs( * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. */ - error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); if (error) { cmn_err(CE_WARN, "XFS: failed to read root inode"); goto out_log_dealloc; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 1644551..a2d32ce 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -2277,12 +2277,12 @@ xfs_rtmount_inodes( sbp = &mp->m_sb; if (sbp->sb_rbmino == NULLFSINO) return 0; - error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip); if (error) return error; ASSERT(mp->m_rbmip != NULL); ASSERT(sbp->sb_rsumino != NULLFSINO); - error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); if (error) { IRELE(mp->m_rbmip); return error; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 785ff10..2559dfe 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -62,7 +62,7 @@ xfs_trans_iget( { int error; - error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); + error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); if (!error && tp) xfs_trans_ijoin(tp, *ipp, lock_flags); return error; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index a06bd62..c164683 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1269,7 +1269,7 @@ xfs_lookup( if (error) goto out; - error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); + error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); if (error) goto out_free_name; |