diff options
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h | 2 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/dcache.c | 8 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/llite_internal.h | 1 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/llite_lib.c | 42 | ||||
-rw-r--r-- | fs/dcache.c | 149 | ||||
-rw-r--r-- | fs/fs_pin.c | 96 | ||||
-rw-r--r-- | fs/internal.h | 2 | ||||
-rw-r--r-- | fs/mount.h | 4 | ||||
-rw-r--r-- | fs/namespace.c | 44 | ||||
-rw-r--r-- | fs/ncpfs/dir.c | 98 | ||||
-rw-r--r-- | fs/ncpfs/ncp_fs_i.h | 1 | ||||
-rw-r--r-- | fs/ncpfs/ncplib_kernel.h | 30 | ||||
-rw-r--r-- | fs/proc/generic.c | 25 | ||||
-rw-r--r-- | fs/super.c | 4 | ||||
-rw-r--r-- | include/linux/dcache.h | 3 | ||||
-rw-r--r-- | include/linux/fs_pin.h | 25 | ||||
-rw-r--r-- | include/linux/lockref.h | 3 | ||||
-rw-r--r-- | include/linux/pid_namespace.h | 4 | ||||
-rw-r--r-- | kernel/acct.c | 94 | ||||
-rw-r--r-- | kernel/auditsc.c | 5 | ||||
-rw-r--r-- | lib/lockref.c | 36 | ||||
-rw-r--r-- | security/commoncap.c | 6 | ||||
-rw-r--r-- | security/selinux/selinuxfs.c | 52 |
23 files changed, 356 insertions, 378 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h index b63cfee..8f418ba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h @@ -55,7 +55,7 @@ static const struct file_operations name##_debugfs_fops = { \ struct t4_debugfs_entry { const char *name; const struct file_operations *ops; - mode_t mode; + umode_t mode; unsigned char data; }; diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 5bb9c85..88614b7 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -263,14 +263,6 @@ void ll_invalidate_aliases(struct inode *inode) dentry, dentry, dentry->d_parent, dentry->d_inode, dentry->d_flags); - if (unlikely(dentry == dentry->d_sb->s_root)) { - CERROR("%s: called on root dentry=%p, fid="DFID"\n", - ll_get_fsname(dentry->d_sb, NULL, 0), - dentry, PFID(ll_inode2fid(inode))); - lustre_dump_dentry(dentry, 1); - dump_stack(); - } - d_lustre_invalidate(dentry, 0); } ll_unlock_dcache(inode); diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 37306e0..d032c2b 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -816,7 +816,6 @@ int ll_show_options(struct seq_file *seq, struct dentry *dentry); void ll_dirty_page_discard_warn(struct page *page, int ioret); int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, struct super_block *, struct lookup_intent *); -void lustre_dump_dentry(struct dentry *, int recur); int ll_obd_statfs(struct inode *inode, void *arg); int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize); diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 45aaa1c..0c1b583 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -665,48 +665,6 @@ int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *lmmsize) return rc; } -static void ll_dump_inode(struct inode *inode) -{ - struct ll_d_hlist_node *tmp; - int dentry_count = 0; - - LASSERT(inode != NULL); - - ll_d_hlist_for_each(tmp, &inode->i_dentry) - dentry_count++; - - CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n", - inode, ll_i2mdexp(inode)->exp_obd->obd_name, inode->i_ino, - inode->i_mode, atomic_read(&inode->i_count), dentry_count); -} - -void lustre_dump_dentry(struct dentry *dentry, int recur) -{ - struct list_head *tmp; - int subdirs = 0; - - LASSERT(dentry != NULL); - - list_for_each(tmp, &dentry->d_subdirs) - subdirs++; - - CERROR("dentry %p dump: name=%pd parent=%pd (%p), inode=%p, count=%u, flags=0x%x, fsdata=%p, %d subdirs\n", - dentry, dentry, dentry->d_parent, dentry->d_parent, - dentry->d_inode, d_count(dentry), - dentry->d_flags, dentry->d_fsdata, subdirs); - if (dentry->d_inode != NULL) - ll_dump_inode(dentry->d_inode); - - if (recur == 0) - return; - - list_for_each(tmp, &dentry->d_subdirs) { - struct dentry *d = list_entry(tmp, struct dentry, d_child); - - lustre_dump_dentry(d, recur - 1); - } -} - static void client_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); diff --git a/fs/dcache.c b/fs/dcache.c index 7d34f04..dc400fd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -511,7 +511,7 @@ static void __dentry_kill(struct dentry *dentry) * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. */ - BUG_ON((int)dentry->d_lockref.count > 0); + BUG_ON(dentry->d_lockref.count > 0); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -564,7 +564,7 @@ static inline struct dentry *lock_parent(struct dentry *dentry) struct dentry *parent = dentry->d_parent; if (IS_ROOT(dentry)) return NULL; - if (unlikely((int)dentry->d_lockref.count < 0)) + if (unlikely(dentry->d_lockref.count < 0)) return NULL; if (likely(spin_trylock(&parent->d_lock))) return parent; @@ -593,6 +593,110 @@ again: return parent; } +/* + * Try to do a lockless dput(), and return whether that was successful. + * + * If unsuccessful, we return false, having already taken the dentry lock. + * + * The caller needs to hold the RCU read lock, so that the dentry is + * guaranteed to stay around even if the refcount goes down to zero! + */ +static inline bool fast_dput(struct dentry *dentry) +{ + int ret; + unsigned int d_flags; + + /* + * If we have a d_op->d_delete() operation, we sould not + * let the dentry count go to zero, so use "put__or_lock". + */ + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) + return lockref_put_or_lock(&dentry->d_lockref); + + /* + * .. otherwise, we can try to just decrement the + * lockref optimistically. + */ + ret = lockref_put_return(&dentry->d_lockref); + + /* + * If the lockref_put_return() failed due to the lock being held + * by somebody else, the fast path has failed. We will need to + * get the lock, and then check the count again. + */ + if (unlikely(ret < 0)) { + spin_lock(&dentry->d_lock); + if (dentry->d_lockref.count > 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + return 1; + } + return 0; + } + + /* + * If we weren't the last ref, we're done. + */ + if (ret) + return 1; + + /* + * Careful, careful. The reference count went down + * to zero, but we don't hold the dentry lock, so + * somebody else could get it again, and do another + * dput(), and we need to not race with that. + * + * However, there is a very special and common case + * where we don't care, because there is nothing to + * do: the dentry is still hashed, it does not have + * a 'delete' op, and it's referenced and already on + * the LRU list. + * + * NOTE! Since we aren't locked, these values are + * not "stable". However, it is sufficient that at + * some point after we dropped the reference the + * dentry was hashed and the flags had the proper + * value. Other dentry users may have re-gotten + * a reference to the dentry and change that, but + * our work is done - we can leave the dentry + * around with a zero refcount. + */ + smp_rmb(); + d_flags = ACCESS_ONCE(dentry->d_flags); + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + + /* Nothing to do? Dropping the reference was all we needed? */ + if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) + return 1; + + /* + * Not the fast normal case? Get the lock. We've already decremented + * the refcount, but we'll need to re-check the situation after + * getting the lock. + */ + spin_lock(&dentry->d_lock); + + /* + * Did somebody else grab a reference to it in the meantime, and + * we're no longer the last user after all? Alternatively, somebody + * else could have killed it and marked it dead. Either way, we + * don't need to do anything else. + */ + if (dentry->d_lockref.count) { + spin_unlock(&dentry->d_lock); + return 1; + } + + /* + * Re-get the reference we optimistically dropped. We hold the + * lock, and we just tested that it was zero, so we can just + * set it to 1. + */ + dentry->d_lockref.count = 1; + return 0; +} + + /* * This is dput * @@ -625,8 +729,14 @@ void dput(struct dentry *dentry) return; repeat: - if (lockref_put_or_lock(&dentry->d_lockref)) + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); return; + } + + /* Slow case: now with the dentry lock held */ + rcu_read_unlock(); /* Unreachable? Get rid of it */ if (unlikely(d_unhashed(dentry))) @@ -813,7 +923,7 @@ static void shrink_dentry_list(struct list_head *list) * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free it. */ - if ((int)dentry->d_lockref.count > 0) { + if (dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); @@ -2191,37 +2301,6 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) } EXPORT_SYMBOL(d_hash_and_lookup); -/** - * d_validate - verify dentry provided from insecure source (deprecated) - * @dentry: The dentry alleged to be valid child of @dparent - * @dparent: The parent dentry (known to be valid) - * - * An insecure source has sent us a dentry, here we verify it and dget() it. - * This is used by ncpfs in its readdir implementation. - * Zero is returned in the dentry is invalid. - * - * This function is slow for big directories, and deprecated, do not use it. - */ -int d_validate(struct dentry *dentry, struct dentry *dparent) -{ - struct dentry *child; - - spin_lock(&dparent->d_lock); - list_for_each_entry(child, &dparent->d_subdirs, d_child) { - if (dentry == child) { - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - __dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dparent->d_lock); - return 1; - } - } - spin_unlock(&dparent->d_lock); - - return 0; -} -EXPORT_SYMBOL(d_validate); - /* * When a file is deleted, we have two options: * - turn this dentry into a negative dentry diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 9368236..b06c987 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -1,78 +1,102 @@ #include <linux/fs.h> +#include <linux/sched.h> #include <linux/slab.h> -#include <linux/fs_pin.h> #include "internal.h" #include "mount.h" -static void pin_free_rcu(struct rcu_head *head) -{ - kfree(container_of(head, struct fs_pin, rcu)); -} - static DEFINE_SPINLOCK(pin_lock); -void pin_put(struct fs_pin *p) -{ - if (atomic_long_dec_and_test(&p->count)) - call_rcu(&p->rcu, pin_free_rcu); -} - void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); hlist_del(&pin->m_list); hlist_del(&pin->s_list); spin_unlock(&pin_lock); + spin_lock_irq(&pin->wait.lock); + pin->done = 1; + wake_up_locked(&pin->wait); + spin_unlock_irq(&pin->wait.lock); } -void pin_insert(struct fs_pin *pin, struct vfsmount *m) +void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) { spin_lock(&pin_lock); - hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); + if (p) + hlist_add_head(&pin->s_list, p); hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); spin_unlock(&pin_lock); } +void pin_insert(struct fs_pin *pin, struct vfsmount *m) +{ + pin_insert_group(pin, m, &m->mnt_sb->s_pins); +} + +void pin_kill(struct fs_pin *p) +{ + wait_queue_t wait; + + if (!p) { + rcu_read_unlock(); + return; + } + init_wait(&wait); + spin_lock_irq(&p->wait.lock); + if (likely(!p->done)) { + p->done = -1; + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + p->kill(p); + return; + } + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + return; + } + __add_wait_queue(&p->wait, &wait); + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + if (likely(list_empty(&wait.task_list))) + break; + /* OK, we know p couldn't have been freed yet */ + spin_lock_irq(&p->wait.lock); + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + break; + } + } + rcu_read_unlock(); +} + void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; - struct fs_pin *pin; rcu_read_lock(); p = ACCESS_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; } - pin = hlist_entry(p, struct fs_pin, m_list); - if (!atomic_long_inc_not_zero(&pin->count)) { - rcu_read_unlock(); - cpu_relax(); - continue; - } - rcu_read_unlock(); - pin->kill(pin); + pin_kill(hlist_entry(p, struct fs_pin, m_list)); } } -void sb_pin_kill(struct super_block *sb) +void group_pin_kill(struct hlist_head *p) { while (1) { - struct hlist_node *p; - struct fs_pin *pin; + struct hlist_node *q; rcu_read_lock(); - p = ACCESS_ONCE(sb->s_pins.first); - if (!p) { + q = ACCESS_ONCE(p->first); + if (!q) { rcu_read_unlock(); break; } - pin = hlist_entry(p, struct fs_pin, s_list); - if (!atomic_long_inc_not_zero(&pin->count)) { - rcu_read_unlock(); - cpu_relax(); - continue; - } - rcu_read_unlock(); - pin->kill(pin); + pin_kill(hlist_entry(q, struct fs_pin, s_list)); } } diff --git a/fs/internal.h b/fs/internal.h index d92c346..30459da 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -144,7 +144,7 @@ extern const struct file_operations pipefifo_fops; /* * fs_pin.c */ -extern void sb_pin_kill(struct super_block *sb); +extern void group_pin_kill(struct hlist_head *p); extern void mnt_pin_kill(struct mount *m); /* @@ -2,6 +2,7 @@ #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/ns_common.h> +#include <linux/fs_pin.h> struct mnt_namespace { atomic_t count; @@ -62,7 +63,8 @@ struct mount { int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ struct hlist_head mnt_pins; - struct path mnt_ex_mountpoint; + struct fs_pin mnt_umount; + struct dentry *mnt_ex_mountpoint; }; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ diff --git a/fs/namespace.c b/fs/namespace.c index 6dae553..72a286e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -190,6 +190,14 @@ unsigned int mnt_get_count(struct mount *mnt) #endif } +static void drop_mountpoint(struct fs_pin *p) +{ + struct mount *m = container_of(p, struct mount, mnt_umount); + dput(m->mnt_ex_mountpoint); + pin_remove(p); + mntput(&m->mnt); +} + static struct mount *alloc_vfsmnt(const char *name) { struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -229,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif + init_fs_pin(&mnt->mnt_umount, drop_mountpoint); } return mnt; @@ -1289,7 +1298,6 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { - struct mount *mnt; struct hlist_head head = unmounted; if (likely(hlist_empty(&head))) { @@ -1299,23 +1307,11 @@ static void namespace_unlock(void) head.first->pprev = &head.first; INIT_HLIST_HEAD(&unmounted); - - /* undo decrements we'd done in umount_tree() */ - hlist_for_each_entry(mnt, &head, mnt_hash) - if (mnt->mnt_ex_mountpoint.mnt) - mntget(mnt->mnt_ex_mountpoint.mnt); - up_write(&namespace_sem); synchronize_rcu(); - while (!hlist_empty(&head)) { - mnt = hlist_entry(head.first, struct mount, mnt_hash); - hlist_del_init(&mnt->mnt_hash); - if (mnt->mnt_ex_mountpoint.mnt) - path_put(&mnt->mnt_ex_mountpoint); - mntput(&mnt->mnt); - } + group_pin_kill(&head); } static inline void namespace_lock(void) @@ -1334,7 +1330,6 @@ void umount_tree(struct mount *mnt, int how) { HLIST_HEAD(tmp_list); struct mount *p; - struct mount *last = NULL; for (p = mnt; p; p = next_mnt(p, mnt)) { hlist_del_init_rcu(&p->mnt_hash); @@ -1347,33 +1342,28 @@ void umount_tree(struct mount *mnt, int how) if (how) propagate_umount(&tmp_list); - hlist_for_each_entry(p, &tmp_list, mnt_hash) { + while (!hlist_empty(&tmp_list)) { + p = hlist_entry(tmp_list.first, struct mount, mnt_hash); + hlist_del_init_rcu(&p->mnt_hash); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; if (how < 2) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; + + pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); if (mnt_has_parent(p)) { hlist_del_init(&p->mnt_mp_list); put_mountpoint(p->mnt_mp); mnt_add_count(p->mnt_parent, -1); - /* move the reference to mountpoint into ->mnt_ex_mountpoint */ - p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; - p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; + /* old mountpoint will be dropped when we can do that */ + p->mnt_ex_mountpoint = p->mnt_mountpoint; p->mnt_mountpoint = p->mnt.mnt_root; p->mnt_parent = p; p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); - last = p; - } - if (last) { - last->mnt_hash.next = unmounted.first; - if (unmounted.first) - unmounted.first->pprev = &last->mnt_hash.next; - unmounted.first = tmp_list.first; - unmounted.first->pprev = &unmounted.first; } } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0089601..e7ca827 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -77,6 +77,7 @@ static int ncp_hash_dentry(const struct dentry *, struct qstr *); static int ncp_compare_dentry(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); static int ncp_delete_dentry(const struct dentry *); +static void ncp_d_prune(struct dentry *dentry); const struct dentry_operations ncp_dentry_operations = { @@ -84,6 +85,7 @@ const struct dentry_operations ncp_dentry_operations = .d_hash = ncp_hash_dentry, .d_compare = ncp_compare_dentry, .d_delete = ncp_delete_dentry, + .d_prune = ncp_d_prune, }; #define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) @@ -384,42 +386,6 @@ finished: return val; } -static struct dentry * -ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) -{ - struct dentry *dent = dentry; - - if (d_validate(dent, parent)) { - if (dent->d_name.len <= NCP_MAXPATHLEN && - (unsigned long)dent->d_fsdata == fpos) { - if (!dent->d_inode) { - dput(dent); - dent = NULL; - } - return dent; - } - dput(dent); - } - - /* If a pointer is invalid, we search the dentry. */ - spin_lock(&parent->d_lock); - list_for_each_entry(dent, &parent->d_subdirs, d_child) { - if ((unsigned long)dent->d_fsdata == fpos) { - if (dent->d_inode) - dget(dent); - else - dent = NULL; - spin_unlock(&parent->d_lock); - goto out; - } - } - spin_unlock(&parent->d_lock); - return NULL; - -out: - return dent; -} - static time_t ncp_obtain_mtime(struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -435,6 +401,20 @@ static time_t ncp_obtain_mtime(struct dentry *dentry) return ncp_date_dos2unix(i.modifyTime, i.modifyDate); } +static inline void +ncp_invalidate_dircache_entries(struct dentry *parent) +{ + struct ncp_server *server = NCP_SERVER(parent->d_inode); + struct dentry *dentry; + + spin_lock(&parent->d_lock); + list_for_each_entry(dentry, &parent->d_subdirs, d_child) { + dentry->d_fsdata = NULL; + ncp_age_dentry(server, dentry); + } + spin_unlock(&parent->d_lock); +} + static int ncp_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; @@ -500,10 +480,21 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx) struct dentry *dent; bool over; - dent = ncp_dget_fpos(ctl.cache->dentry[ctl.idx], - dentry, ctx->pos); - if (!dent) + spin_lock(&dentry->d_lock); + if (!(NCP_FINFO(inode)->flags & NCPI_DIR_CACHE)) { + spin_unlock(&dentry->d_lock); + goto invalid_cache; + } + dent = ctl.cache->dentry[ctl.idx]; + if (unlikely(!lockref_get_not_dead(&dent->d_lockref))) { + spin_unlock(&dentry->d_lock); + goto invalid_cache; + } + spin_unlock(&dentry->d_lock); + if (!dent->d_inode) { + dput(dent); goto invalid_cache; + } over = !dir_emit(ctx, dent->d_name.name, dent->d_name.len, dent->d_inode->i_ino, DT_UNKNOWN); @@ -548,6 +539,9 @@ init_cache: ctl.filled = 0; ctl.valid = 1; read_really: + spin_lock(&dentry->d_lock); + NCP_FINFO(inode)->flags |= NCPI_DIR_CACHE; + spin_unlock(&dentry->d_lock); if (ncp_is_server_root(inode)) { ncp_read_volume_list(file, ctx, &ctl); } else { @@ -573,6 +567,13 @@ out: return result; } +static void ncp_d_prune(struct dentry *dentry) +{ + if (!dentry->d_fsdata) /* not referenced from page cache */ + return; + NCP_FINFO(dentry->d_parent->d_inode)->flags &= ~NCPI_DIR_CACHE; +} + static int ncp_fill_cache(struct file *file, struct dir_context *ctx, struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, @@ -630,6 +631,10 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); + } else { + spin_lock(&dentry->d_lock); + NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE; + spin_unlock(&dentry->d_lock); } } else { struct inode *inode = newdent->d_inode; @@ -639,12 +644,6 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, mutex_unlock(&inode->i_mutex); } - if (newdent->d_inode) { - ino = newdent->d_inode->i_ino; - newdent->d_fsdata = (void *) ctl.fpos; - ncp_new_dentry(newdent); - } - if (ctl.idx >= NCP_DIRCACHE_SIZE) { if (ctl.page) { kunmap(ctl.page); @@ -660,8 +659,13 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, ctl.cache = kmap(ctl.page); } if (ctl.cache) { - ctl.cache->dentry[ctl.idx] = newdent; - valid = 1; + if (newdent->d_inode) { + newdent->d_fsdata = newdent; + ctl.cache->dentry[ctl.idx] = newdent; + ino = newdent->d_inode->i_ino; + ncp_new_dentry(newdent); + } + valid = 1; } dput(newdent); end_advance: diff --git a/fs/ncpfs/ncp_fs_i.h b/fs/ncpfs/ncp_fs_i.h index 4b0bec4..c479450 100644 --- a/fs/ncpfs/ncp_fs_i.h +++ b/fs/ncpfs/ncp_fs_i.h @@ -22,6 +22,7 @@ struct ncp_inode_info { int access; int flags; #define NCPI_KLUDGE_SYMLINK 0x0001 +#define NCPI_DIR_CACHE 0x0002 __u8 file_handle[6]; struct inode vfs_inode; }; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index b785f74..250e443 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -184,36 +184,6 @@ ncp_new_dentry(struct dentry* dentry) dentry->d_time = jiffies; } -static inline void -ncp_renew_dentries(struct dentry *parent) -{ - struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct dentry *dentry; - - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (dentry->d_fsdata == NULL) - ncp_age_dentry(server, dentry); - else - ncp_new_dentry(dentry); - } - spin_unlock(&parent->d_lock); -} - -static inline void -ncp_invalidate_dircache_entries(struct dentry *parent) -{ - struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct dentry *dentry; - - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - dentry->d_fsdata = NULL; - ncp_age_dentry(server, dentry); - } - spin_unlock(&parent->d_lock); -} - struct ncp_cache_head { time_t mtime; unsigned long time; /* cache age */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index de14e46..3309f59 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -350,29 +350,12 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp if (ret) return ret; - if (S_ISDIR(dp->mode)) { - dp->proc_fops = &proc_dir_operations; - dp->proc_iops = &proc_dir_inode_operations; - dir->nlink++; - } else if (S_ISLNK(dp->mode)) { - dp->proc_iops = &proc_link_inode_operations; - } else if (S_ISREG(dp->mode)) { - BUG_ON(dp->proc_fops == NULL); - dp->proc_iops = &proc_file_inode_operations; - } else { - WARN_ON(1); - proc_free_inum(dp->low_ino); - return -EINVAL; - } - spin_lock(&proc_subdir_lock); dp->parent = dir; if (pde_subdir_insert(dir, dp) == false) { WARN(1, "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); spin_unlock(&proc_subdir_lock); - if (S_ISDIR(dp->mode)) - dir->nlink--; proc_free_inum(dp->low_ino); return -EEXIST; } @@ -431,6 +414,7 @@ struct proc_dir_entry *proc_symlink(const char *name, ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); if (ent->data) { strcpy((char*)ent->data,dest); + ent->proc_iops = &proc_link_inode_operations; if (proc_register(parent, ent) < 0) { kfree(ent->data); kfree(ent); @@ -456,8 +440,12 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent = __proc_create(&parent, name, S_IFDIR | mode, 2); if (ent) { ent->data = data; + ent->proc_fops = &proc_dir_operations; + ent->proc_iops = &proc_dir_inode_operations; + parent->nlink++; if (proc_register(parent, ent) < 0) { kfree(ent); + parent->nlink--; ent = NULL; } } @@ -493,6 +481,8 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, return NULL; } + BUG_ON(proc_fops == NULL); + if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO; pde = __proc_create(&parent, name, mode, 1); @@ -500,6 +490,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, goto out; pde->proc_fops = proc_fops; pde->data = data; + pde->proc_iops = &proc_file_inode_operations; if (proc_register(parent, pde) < 0) goto out_free; return pde; @@ -715,9 +715,9 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); if (remount_ro) { - if (sb->s_pins.first) { + if (!hlist_empty(&sb->s_pins)) { up_write(&sb->s_umount); - sb_pin_kill(sb); + group_pin_kill(&sb->s_pins); down_write(&sb->s_umount); if (!sb->s_root) return 0; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5a81398..92c08cf 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -319,9 +319,6 @@ static inline unsigned d_count(const struct dentry *dentry) return dentry->d_lockref.count; } -/* validate "insecure" dentry pointer */ -extern int d_validate(struct dentry *, struct dentry *); - /* * helper function for dentry_operations.d_dname() members */ diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index f66525e..9dc4e03 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -1,17 +1,22 @@ -#include <linux/fs.h> +#include <linux/wait.h> struct fs_pin { - atomic_long_t count; - union { - struct { - struct hlist_node s_list; - struct hlist_node m_list; - }; - struct rcu_head rcu; - }; + wait_queue_head_t wait; + int done; + struct hlist_node s_list; + struct hlist_node m_list; void (*kill)(struct fs_pin *); }; -void pin_put(struct fs_pin *); +struct vfsmount; + +static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) +{ + init_waitqueue_head(&p->wait); + p->kill = kill; +} + void pin_remove(struct fs_pin *); +void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); void pin_insert(struct fs_pin *, struct vfsmount *); +void pin_kill(struct fs_pin *); diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 4bfde0e..b10b122d 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -28,12 +28,13 @@ struct lockref { #endif struct { spinlock_t lock; - unsigned int count; + int count; }; }; }; extern void lockref_get(struct lockref *); +extern int lockref_put_return(struct lockref *); extern int lockref_get_not_zero(struct lockref *); extern int lockref_get_or_lock(struct lockref *); extern int lockref_put_or_lock(struct lockref *); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b9cf6c5..918b117 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -19,7 +19,7 @@ struct pidmap { #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) #define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) -struct bsd_acct_struct; +struct fs_pin; struct pid_namespace { struct kref kref; @@ -37,7 +37,7 @@ struct pid_namespace { struct dentry *proc_thread_self; #endif #ifdef CONFIG_BSD_PROCESS_ACCT - struct bsd_acct_struct *bacct; + struct fs_pin *bacct; #endif struct user_namespace *user_ns; struct work_struct proc_work; diff --git a/kernel/acct.c b/kernel/acct.c index 33738ef..e6c10d1a 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -76,10 +76,11 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(struct bsd_acct_struct *acct); struct bsd_acct_struct { struct fs_pin pin; + atomic_long_t count; + struct rcu_head rcu; struct mutex lock; int active; unsigned long needcheck; @@ -89,6 +90,8 @@ struct bsd_acct_struct { struct completion done; }; +static void do_acct_process(struct bsd_acct_struct *acct); + /* * Check the amount of free space and suspend/resume accordingly. */ @@ -124,32 +127,56 @@ out: return acct->active; } +static void acct_put(struct bsd_acct_struct *p) +{ + if (atomic_long_dec_and_test(&p->count)) + kfree_rcu(p, rcu); +} + +static inline struct bsd_acct_struct *to_acct(struct fs_pin *p) +{ + return p ? container_of(p, struct bsd_acct_struct, pin) : NULL; +} + static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; again: smp_rmb(); rcu_read_lock(); - res = ACCESS_ONCE(ns->bacct); + res = to_acct(ACCESS_ONCE(ns->bacct)); if (!res) { rcu_read_unlock(); return NULL; } - if (!atomic_long_inc_not_zero(&res->pin.count)) { + if (!atomic_long_inc_not_zero(&res->count)) { rcu_read_unlock(); cpu_relax(); goto again; } rcu_read_unlock(); mutex_lock(&res->lock); - if (!res->ns) { + if (res != to_acct(ACCESS_ONCE(ns->bacct))) { mutex_unlock(&res->lock); - pin_put(&res->pin); + acct_put(res); goto again; } return res; } +static void acct_pin_kill(struct fs_pin *pin) +{ + struct bsd_acct_struct *acct = to_acct(pin); + mutex_lock(&acct->lock); + do_acct_process(acct); + schedule_work(&acct->work); + wait_for_completion(&acct->done); + cmpxchg(&acct->ns->bacct, pin, NULL); + mutex_unlock(&acct->lock); + pin_remove(pin); + acct_put(acct); +} + static void close_work(struct work_struct *work) { struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); @@ -160,44 +187,13 @@ static void close_work(struct work_struct *work) complete(&acct->done); } -static void acct_kill(struct bsd_acct_struct *acct, - struct bsd_acct_struct *new) -{ - if (acct) { - struct pid_namespace *ns = acct->ns; - do_acct_process(acct); - INIT_WORK(&acct->work, close_work); - init_completion(&acct->done); - schedule_work(&acct->work); - wait_for_completion(&acct->done); - pin_remove(&acct->pin); - ns->bacct = new; - acct->ns = NULL; - atomic_long_dec(&acct->pin.count); - mutex_unlock(&acct->lock); - pin_put(&acct->pin); - } -} - -static void acct_pin_kill(struct fs_pin *pin) -{ - struct bsd_acct_struct *acct; - acct = container_of(pin, struct bsd_acct_struct, pin); - mutex_lock(&acct->lock); - if (!acct->ns) { - mutex_unlock(&acct->lock); - pin_put(pin); - acct = NULL; - } - acct_kill(acct, NULL); -} - static int acct_on(struct filename *pathname) { struct file *file; struct vfsmount *mnt, *internal; struct pid_namespace *ns = task_active_pid_ns(current); - struct bsd_acct_struct *acct, *old; + struct bsd_acct_struct *acct; + struct fs_pin *old; int err; acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); @@ -238,21 +234,21 @@ static int acct_on(struct filename *pathname) mnt = file->f_path.mnt; file->f_path.mnt = internal; - atomic_long_set(&acct->pin.count, 1); - acct->pin.kill = acct_pin_kill; + atomic_long_set(&acct->count, 1); + init_fs_pin(&acct->pin, acct_pin_kill); acct->file = file; acct->needcheck = jiffies; acct->ns = ns; mutex_init(&acct->lock); + INIT_WORK(&acct->work, close_work); + init_completion(&acct->done); mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ pin_insert(&acct->pin, mnt); - old = acct_get(ns); - if (old) - acct_kill(old, acct); - else - ns->bacct = acct; + rcu_read_lock(); + old = xchg(&ns->bacct, &acct->pin); mutex_unlock(&acct->lock); + pin_kill(old); mnt_drop_write(mnt); mntput(mnt); return 0; @@ -288,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) mutex_unlock(&acct_on_mutex); putname(tmp); } else { - acct_kill(acct_get(task_active_pid_ns(current)), NULL); + rcu_read_lock(); + pin_kill(task_active_pid_ns(current)->bacct); } return error; @@ -296,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) void acct_exit_ns(struct pid_namespace *ns) { - acct_kill(acct_get(ns), NULL); + rcu_read_lock(); + pin_kill(ns->bacct); } /* @@ -576,7 +574,7 @@ static void slow_acct_process(struct pid_namespace *ns) if (acct) { do_acct_process(acct); mutex_unlock(&acct->lock); - pin_put(&acct->pin); + acct_put(acct); } } } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 072566d..55f82fc 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2405,7 +2405,6 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, struct audit_aux_data_bprm_fcaps *ax; struct audit_context *context = current->audit_context; struct cpu_vfs_cap_data vcaps; - struct dentry *dentry; ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) @@ -2415,9 +2414,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, ax->d.next = context->aux; context->aux = (void *)ax; - dentry = dget(bprm->file->f_path.dentry); - get_vfs_caps_from_disk(dentry, &vcaps); - dput(dentry); + get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); ax->fcap.permitted = vcaps.permitted; ax->fcap.inheritable = vcaps.inheritable; diff --git a/lib/lockref.c b/lib/lockref.c index d2233de..ecb9a66 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -60,7 +60,7 @@ void lockref_get(struct lockref *lockref) EXPORT_SYMBOL(lockref_get); /** - * lockref_get_not_zero - Increments count unless the count is 0 + * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ @@ -70,7 +70,7 @@ int lockref_get_not_zero(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) return 0; , return 1; @@ -78,7 +78,7 @@ int lockref_get_not_zero(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if (lockref->count) { + if (lockref->count > 0) { lockref->count++; retval = 1; } @@ -88,7 +88,7 @@ int lockref_get_not_zero(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_not_zero); /** - * lockref_get_or_lock - Increments count unless the count is 0 + * lockref_get_or_lock - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero * and we got the lock instead. @@ -97,14 +97,14 @@ int lockref_get_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) break; , return 1; ); spin_lock(&lockref->lock); - if (!lockref->count) + if (lockref->count <= 0) return 0; lockref->count++; spin_unlock(&lockref->lock); @@ -113,6 +113,26 @@ int lockref_get_or_lock(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_or_lock); /** + * lockref_put_return - Decrement reference count if possible + * @lockref: pointer to lockref structure + * + * Decrement the reference count and return the new value. + * If the lockref was dead or locked, return an error. + */ +int lockref_put_return(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count--; + if (old.count <= 0) + return -1; + , + return new.count; + ); + return -1; +} +EXPORT_SYMBOL(lockref_put_return); + +/** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken @@ -158,7 +178,7 @@ int lockref_get_not_dead(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if ((int)old.count < 0) + if (old.count < 0) return 0; , return 1; @@ -166,7 +186,7 @@ int lockref_get_not_dead(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if ((int) lockref->count >= 0) { + if (lockref->count >= 0) { lockref->count++; retval = 1; } diff --git a/security/commoncap.c b/security/commoncap.c index 2915d85..f66713b 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -434,7 +434,6 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data */ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_cap) { - struct dentry *dentry; int rc = 0; struct cpu_vfs_cap_data vcaps; @@ -446,9 +445,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) return 0; - dentry = dget(bprm->file->f_path.dentry); - - rc = get_vfs_caps_from_disk(dentry, &vcaps); + rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); if (rc < 0) { if (rc == -EINVAL) printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n", @@ -464,7 +461,6 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c __func__, rc, bprm->filename); out: - dput(dentry); if (rc) bprm_clear_caps(bprm); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 33db1ad..1684bcc 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1195,30 +1195,8 @@ static const struct file_operations sel_commit_bools_ops = { static void sel_remove_entries(struct dentry *de) { - struct list_head *node; - - spin_lock(&de->d_lock); - node = de->d_subdirs.next; - while (node != &de->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_child); - - spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); - list_del_init(node); - - if (d->d_inode) { - dget_dlock(d); - spin_unlock(&de->d_lock); - spin_unlock(&d->d_lock); - d_delete(d); - simple_unlink(de->d_inode, d); - dput(d); - spin_lock(&de->d_lock); - } else - spin_unlock(&d->d_lock); - node = de->d_subdirs.next; - } - - spin_unlock(&de->d_lock); + d_genocide(de); + shrink_dcache_parent(de); } #define BOOL_DIR_NAME "booleans" @@ -1668,37 +1646,13 @@ static int sel_make_class_dir_entries(char *classname, int index, return rc; } -static void sel_remove_classes(void) -{ - struct list_head *class_node; - - list_for_each(class_node, &class_dir->d_subdirs) { - struct dentry *class_subdir = list_entry(class_node, - struct dentry, d_child); - struct list_head *class_subdir_node; - - list_for_each(class_subdir_node, &class_subdir->d_subdirs) { - struct dentry *d = list_entry(class_subdir_node, - struct dentry, d_child); - - if (d->d_inode) - if (d->d_inode->i_mode & S_IFDIR) - sel_remove_entries(d); - } - - sel_remove_entries(class_subdir); - } - - sel_remove_entries(class_dir); -} - static int sel_make_classes(void) { int rc, nclasses, i; char **classes; /* delete any existing entries */ - sel_remove_classes(); + sel_remove_entries(class_dir); rc = security_get_classes(&classes, &nclasses); if (rc) |