From fee0c54e28f6ca187add93dfca226a8093cfa931 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 20 Dec 2013 16:43:50 -0800 Subject: dma-buf: avoid using IS_ERR_OR_NULL dma_buf_map_attachment and dma_buf_vmap can return NULL or ERR_PTR on a error. This encourages a common buggy pattern in callers: sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); if (IS_ERR_OR_NULL(sgt)) return PTR_ERR(sgt); This causes the caller to return 0 on an error. IS_ERR_OR_NULL is almost always a sign of poorly-defined error handling. This patch converts dma_buf_map_attachment to always return ERR_PTR, and fixes the callers that incorrectly handled NULL. There are a few more callers that were not checking for NULL at all, which would have dereferenced a NULL pointer later. There are also a few more callers that correctly handled NULL and ERR_PTR differently, I left those alone but they could also be modified to delete the NULL check. This patch also converts dma_buf_vmap to always return NULL. All the callers to dma_buf_vmap only check for NULL, and would have dereferenced an ERR_PTR and panic'd if one was ever returned. This is not consistent with the rest of the dma buf APIs, but matches the expectations of all of the callers. Signed-off-by: Colin Cross Reviewed-by: Rob Clark Signed-off-by: Greg Kroah-Hartman --- drivers/base/dma-buf.c | 18 +++++++++++------- drivers/gpu/drm/drm_prime.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_dmabuf.c | 2 +- drivers/media/v4l2-core/videobuf2-dma-contig.c | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index 1e16cbd..cfe1d8b 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -251,9 +251,8 @@ EXPORT_SYMBOL_GPL(dma_buf_put); * @dmabuf: [in] buffer to attach device to. * @dev: [in] device to be attached. * - * Returns struct dma_buf_attachment * for this attachment; may return negative - * error codes. - * + * Returns struct dma_buf_attachment * for this attachment; returns ERR_PTR on + * error. */ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev) @@ -319,9 +318,8 @@ EXPORT_SYMBOL_GPL(dma_buf_detach); * @attach: [in] attachment whose scatterlist is to be returned * @direction: [in] direction of DMA transfer * - * Returns sg_table containing the scatterlist to be returned; may return NULL - * or ERR_PTR. - * + * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR + * on error. */ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, enum dma_data_direction direction) @@ -334,6 +332,8 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, return ERR_PTR(-EINVAL); sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); + if (!sg_table) + sg_table = ERR_PTR(-ENOMEM); return sg_table; } @@ -544,6 +544,8 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap); * These calls are optional in drivers. The intended use for them * is for mapping objects linear in kernel space for high use objects. * Please attempt to use kmap/kunmap before thinking about these interfaces. + * + * Returns NULL on error. */ void *dma_buf_vmap(struct dma_buf *dmabuf) { @@ -566,7 +568,9 @@ void *dma_buf_vmap(struct dma_buf *dmabuf) BUG_ON(dmabuf->vmap_ptr); ptr = dmabuf->ops->vmap(dmabuf); - if (IS_ERR_OR_NULL(ptr)) + if (WARN_ON_ONCE(IS_ERR(ptr))) + ptr = NULL; + if (!ptr) goto out_unlock; dmabuf->vmap_ptr = ptr; diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 56805c3..bb516fd 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -471,7 +471,7 @@ struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev, get_dma_buf(dma_buf); sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); - if (IS_ERR_OR_NULL(sgt)) { + if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); goto fail_detach; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c index 59827cc..c786cd4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c @@ -224,7 +224,7 @@ struct drm_gem_object *exynos_dmabuf_prime_import(struct drm_device *drm_dev, get_dma_buf(dma_buf); sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); - if (IS_ERR_OR_NULL(sgt)) { + if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); goto err_buf_detach; } diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c index 33d3871d..880be07 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c @@ -719,7 +719,7 @@ static int vb2_dc_map_dmabuf(void *mem_priv) /* get the associated scatterlist for this buffer */ sgt = dma_buf_map_attachment(buf->db_attach, buf->dma_dir); - if (IS_ERR_OR_NULL(sgt)) { + if (IS_ERR(sgt)) { pr_err("Error getting dmabuf scatterlist\n"); return -EINVAL; } -- cgit v1.1 From a6607930b6cd829fc7f680b48a937d827e0bb931 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:54 -0500 Subject: kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag kernfs_deactivate() forgot to check whether KERNFS_LOCKDEP is set before performing lockdep annotations and ends up feeding uninitialized lockdep_map to lockdep triggering warning like the following on USB stick hotunplug. usb 1-2: USB disconnect, device number 2 INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 62 Comm: khubd Not tainted 3.13.0-work+ #82 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 ffff880065ca7f60 ffff88013a4ffa08 ffffffff81cfb6bd 0000000000000002 ffff88013a4ffac8 ffffffff810f8530 ffff88013a4fc710 0000000000000002 ffff880100000000 ffffffff82a3db50 0000000000000001 ffff88013a4fc710 Call Trace: [] dump_stack+0x4e/0x7a [] __lock_acquire+0x1910/0x1e70 [] lock_acquire+0x9a/0x1d0 [] kernfs_deactivate+0xee/0x130 [] kernfs_addrm_finish+0x38/0x60 [] kernfs_remove_by_name_ns+0x51/0xa0 [] remove_files.isra.1+0x41/0x80 [] sysfs_remove_group+0x47/0xa0 [] sysfs_remove_groups+0x33/0x50 [] device_remove_attrs+0x4d/0x80 [] device_del+0x12e/0x1d0 [] usb_disconnect+0x122/0x1a0 [] hub_thread+0x3c5/0x1290 [] kthread+0xed/0x110 [] ret_from_fork+0x7c/0xb0 Fix it by making kernfs_deactivate() perform lockdep annotations only if KERNFS_LOCKDEP is set. Signed-off-by: Tejun Heo Reported-by: Fabio Estevam Reported-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 5104cf5..bd6e18b 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -187,19 +187,23 @@ static void kernfs_deactivate(struct kernfs_node *kn) kn->u.completion = (void *)&wait; - rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); + if (kn->flags & KERNFS_LOCKDEP) + rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); /* atomic_add_return() is a mb(), put_active() will always see * the updated kn->u.completion. */ v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active); if (v != KN_DEACTIVATED_BIAS) { - lock_contended(&kn->dep_map, _RET_IP_); + if (kn->flags & KERNFS_LOCKDEP) + lock_contended(&kn->dep_map, _RET_IP_); wait_for_completion(&wait); } - lock_acquired(&kn->dep_map, _RET_IP_); - rwsem_release(&kn->dep_map, 1, _RET_IP_); + if (kn->flags & KERNFS_LOCKDEP) { + lock_acquired(&kn->dep_map, _RET_IP_); + rwsem_release(&kn->dep_map, 1, _RET_IP_); + } } /** -- cgit v1.1 From abd54f028ec30976d6e797e7474ec91d96186a0c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:55 -0500 Subject: kernfs: replace kernfs_node->u.completion with kernfs_root->deactivate_waitq kernfs_node->u.completion is used to notify deactivation completion from kernfs_put_active() to kernfs_deactivate(). We now allow multiple racing removals of the same node and the current removal scheme is no longer correct - kernfs_remove() invocation may return before the node is properly deactivated if it races against another removal. The removal path will be restructured to address the issue. To help such restructure which requires supporting multiple waiters, this patch replaces kernfs_node->u.completion with kernfs_root->deactivate_waitq. This makes deactivation event notifications share a per-root waitqueue_head; however, the wait path is quite cold and this will also allow shaving one pointer off kernfs_node. v2: Refreshed on top of ("kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag"). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 31 +++++++++++++------------------ include/linux/kernfs.h | 4 ++-- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index bd6e18b..2193d30 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -8,6 +8,7 @@ * This file is released under the GPLv2. */ +#include #include #include #include @@ -151,6 +152,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) */ void kernfs_put_active(struct kernfs_node *kn) { + struct kernfs_root *root = kernfs_root(kn); int v; if (unlikely(!kn)) @@ -162,11 +164,7 @@ void kernfs_put_active(struct kernfs_node *kn) if (likely(v != KN_DEACTIVATED_BIAS)) return; - /* - * atomic_dec_return() is a mb(), we'll always see the updated - * kn->u.completion. - */ - complete(kn->u.completion); + wake_up_all(&root->deactivate_waitq); } /** @@ -177,28 +175,24 @@ void kernfs_put_active(struct kernfs_node *kn) */ static void kernfs_deactivate(struct kernfs_node *kn) { - DECLARE_COMPLETION_ONSTACK(wait); - int v; + struct kernfs_root *root = kernfs_root(kn); BUG_ON(!(kn->flags & KERNFS_REMOVED)); if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF)) return; - kn->u.completion = (void *)&wait; - if (kn->flags & KERNFS_LOCKDEP) rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); - /* atomic_add_return() is a mb(), put_active() will always see - * the updated kn->u.completion. - */ - v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active); - if (v != KN_DEACTIVATED_BIAS) { - if (kn->flags & KERNFS_LOCKDEP) - lock_contended(&kn->dep_map, _RET_IP_); - wait_for_completion(&wait); - } + atomic_add(KN_DEACTIVATED_BIAS, &kn->active); + + if ((kn->flags & KERNFS_LOCKDEP) && + atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) + lock_contended(&kn->dep_map, _RET_IP_); + + wait_event(root->deactivate_waitq, + atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); if (kn->flags & KERNFS_LOCKDEP) { lock_acquired(&kn->dep_map, _RET_IP_); @@ -630,6 +624,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) root->dir_ops = kdops; root->kn = kn; + init_waitqueue_head(&root->deactivate_waitq); return root; } diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5be9f02..295a3bf 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include struct file; struct dentry; @@ -92,7 +92,6 @@ struct kernfs_node { struct rb_node rb; union { - struct completion *completion; struct kernfs_node *removed_list; } u; @@ -133,6 +132,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct ida ino_ida; struct kernfs_dir_ops *dir_ops; + wait_queue_head_t deactivate_waitq; }; struct kernfs_open_file { -- cgit v1.1 From 35beab0635f3cdd475e3c11a304b866c25b76fcf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:56 -0500 Subject: kernfs: restructure removal path to fix possible premature return The recursive nature of kernfs_remove() means that, even if kernfs_remove() is not allowed to be called multiple times on the same node, there may be race conditions between removal of parent and its descendants. While we can claim that kernfs_remove() shouldn't be called on one of the descendants while the removal of an ancestor is in progress, such rule is unnecessarily restrictive and very difficult to enforce. It's better to simply allow invoking kernfs_remove() as the caller sees fit as long as the caller ensures that the node is accessible. The current behavior in such situations is broken. Whoever enters removal path first takes the node off the hierarchy and then deactivates. Following removers either return as soon as it notices that it's not the first one or can't even find the target node as it has already been removed from the hierarchy. In both cases, the following removers may finish prematurely while the nodes which should be removed and drained are still being processed by the first one. This patch restructures so that multiple removers, whether through recursion or direction invocation, always follow the following rules. * When there are multiple concurrent removers, only one puts the base ref. * Regardless of which one puts the base ref, all removers are blocked until the target node is fully deactivated and removed. To achieve the above, removal path now first marks all descendants including self REMOVED and then deactivates and unlinks leftmost descendant one-by-one. kernfs_deactivate() is called directly from __kernfs_removal() and drops and regrabs kernfs_mutex for each descendant to drain active refs. As this means that multiple removers can enter kernfs_deactivate() for the same node, the function is updated so that it can handle multiple deactivators of the same node - only one actually deactivates but all wait till drain completion. The restructured removal path guarantees that a removed node gets unlinked only after the node is deactivated and drained. Combined with proper multiple deactivator handling, this guarantees that any invocation of kernfs_remove() returns only after the node itself and all its descendants are deactivated, drained and removed. v2: Draining separated into a separate loop (used to be in the same loop as unlink) and done from __kernfs_deactivate(). This is to allow exposing deactivation as a separate interface later. Root node removal was broken in v1 patch. Fixed. v3: Revert most of v2 except for root node removal fix and simplification of KERNFS_REMOVED setting loop. v4: Refreshed on top of ("kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag"). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 129 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 68 insertions(+), 61 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 2193d30..3ac9373 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -106,18 +106,24 @@ static int kernfs_link_sibling(struct kernfs_node *kn) * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree * @kn: kernfs_node of interest * - * Unlink @kn from its sibling rbtree which starts from - * kn->parent->dir.children. + * Try to unlink @kn from its sibling rbtree which starts from + * kn->parent->dir.children. Returns %true if @kn was actually + * removed, %false if @kn wasn't on the rbtree. * * Locking: * mutex_lock(kernfs_mutex) */ -static void kernfs_unlink_sibling(struct kernfs_node *kn) +static bool kernfs_unlink_sibling(struct kernfs_node *kn) { + if (RB_EMPTY_NODE(&kn->rb)) + return false; + if (kernfs_type(kn) == KERNFS_DIR) kn->parent->dir.subdirs--; rb_erase(&kn->rb, &kn->parent->dir.children); + RB_CLEAR_NODE(&kn->rb); + return true; } /** @@ -171,26 +177,34 @@ void kernfs_put_active(struct kernfs_node *kn) * kernfs_deactivate - deactivate kernfs_node * @kn: kernfs_node to deactivate * - * Deny new active references and drain existing ones. + * Deny new active references and drain existing ones. Mutiple + * removers may invoke this function concurrently on @kn and all will + * return after deactivation and draining are complete. */ static void kernfs_deactivate(struct kernfs_node *kn) + __releases(&kernfs_mutex) __acquires(&kernfs_mutex) { struct kernfs_root *root = kernfs_root(kn); + lockdep_assert_held(&kernfs_mutex); BUG_ON(!(kn->flags & KERNFS_REMOVED)); if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF)) return; - if (kn->flags & KERNFS_LOCKDEP) - rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); + /* only the first invocation on @kn should deactivate it */ + if (atomic_read(&kn->active) >= 0) + atomic_add(KN_DEACTIVATED_BIAS, &kn->active); - atomic_add(KN_DEACTIVATED_BIAS, &kn->active); + mutex_unlock(&kernfs_mutex); - if ((kn->flags & KERNFS_LOCKDEP) && - atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) - lock_contended(&kn->dep_map, _RET_IP_); + if (kn->flags & KERNFS_LOCKDEP) { + rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); + if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) + lock_contended(&kn->dep_map, _RET_IP_); + } + /* but everyone should wait for draining */ wait_event(root->deactivate_waitq, atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); @@ -198,6 +212,8 @@ static void kernfs_deactivate(struct kernfs_node *kn) lock_acquired(&kn->dep_map, _RET_IP_); rwsem_release(&kn->dep_map, 1, _RET_IP_); } + + mutex_lock(&kernfs_mutex); } /** @@ -347,6 +363,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, atomic_set(&kn->count, 1); atomic_set(&kn->active, 0); + RB_CLEAR_NODE(&kn->rb); kn->name = name; kn->mode = mode; @@ -454,49 +471,6 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) } /** - * kernfs_remove_one - remove kernfs_node from parent - * @acxt: addrm context to use - * @kn: kernfs_node to be removed - * - * Mark @kn removed and drop nlink of parent inode if @kn is a - * directory. @kn is unlinked from the children list. - * - * This function should be called between calls to - * kernfs_addrm_start() and kernfs_addrm_finish() and should be - * passed the same @acxt as passed to kernfs_addrm_start(). - * - * LOCKING: - * Determined by kernfs_addrm_start(). - */ -static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt, - struct kernfs_node *kn) -{ - struct kernfs_iattrs *ps_iattr; - - /* - * Removal can be called multiple times on the same node. Only the - * first invocation is effective and puts the base ref. - */ - if (kn->flags & KERNFS_REMOVED) - return; - - if (kn->parent) { - kernfs_unlink_sibling(kn); - - /* Update timestamps on the parent */ - ps_iattr = kn->parent->iattr; - if (ps_iattr) { - ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; - ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; - } - } - - kn->flags |= KERNFS_REMOVED; - kn->u.removed_list = acxt->removed; - acxt->removed = kn; -} - -/** * kernfs_addrm_finish - finish up kernfs_node add/remove * @acxt: addrm context to finish up * @@ -519,7 +493,6 @@ void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt) acxt->removed = kn->u.removed_list; - kernfs_deactivate(kn); kernfs_unmap_bin_file(kn); kernfs_put(kn); } @@ -828,20 +801,54 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) { - struct kernfs_node *pos, *next; + struct kernfs_node *pos; + + lockdep_assert_held(&kernfs_mutex); if (!kn) return; pr_debug("kernfs %s: removing\n", kn->name); - next = NULL; + /* disable lookup and node creation under @kn */ + pos = NULL; + while ((pos = kernfs_next_descendant_post(pos, kn))) + pos->flags |= KERNFS_REMOVED; + + /* deactivate and unlink the subtree node-by-node */ do { - pos = next; - next = kernfs_next_descendant_post(pos, kn); - if (pos) - kernfs_remove_one(acxt, pos); - } while (next); + pos = kernfs_leftmost_descendant(kn); + + /* + * kernfs_deactivate() drops kernfs_mutex temporarily and + * @pos's base ref could have been put by someone else by + * the time the function returns. Make sure it doesn't go + * away underneath us. + */ + kernfs_get(pos); + + kernfs_deactivate(pos); + + /* + * kernfs_unlink_sibling() succeeds once per node. Use it + * to decide who's responsible for cleanups. + */ + if (!pos->parent || kernfs_unlink_sibling(pos)) { + struct kernfs_iattrs *ps_iattr = + pos->parent ? pos->parent->iattr : NULL; + + /* update timestamps on the parent */ + if (ps_iattr) { + ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; + ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; + } + + pos->u.removed_list = acxt->removed; + acxt->removed = pos; + } + + kernfs_put(pos); + } while (pos != kn); } /** -- cgit v1.1 From ccf02aaf8167bb8bfb3c17c01c843d309b872671 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:57 -0500 Subject: kernfs: invoke kernfs_unmap_bin_file() directly from kernfs_deactivate() kernfs_unmap_bin_file() is supposed to unmap all memory mappings of the target file before kernfs_remove() finishes; however, it currently is being called from kernfs_addrm_finish() and has the same race problem as the original implementation of deactivation when there are multiple removers - only the remover which snatches the node to its addrm_cxt->removed list is guaranteed to wait for its completion before returning. It can be easily fixed by moving kernfs_unmap_bin_file() invocation from kernfs_addrm_finish() to kernfs_deactivated(). The function may be called multiple times but that shouldn't do any harm. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 3ac9373..9603c06 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -177,9 +177,10 @@ void kernfs_put_active(struct kernfs_node *kn) * kernfs_deactivate - deactivate kernfs_node * @kn: kernfs_node to deactivate * - * Deny new active references and drain existing ones. Mutiple - * removers may invoke this function concurrently on @kn and all will - * return after deactivation and draining are complete. + * Deny new active references, drain existing ones and nuke all + * existing mmaps. Mutiple removers may invoke this function + * concurrently on @kn and all will return after deactivation and + * draining are complete. */ static void kernfs_deactivate(struct kernfs_node *kn) __releases(&kernfs_mutex) __acquires(&kernfs_mutex) @@ -213,6 +214,8 @@ static void kernfs_deactivate(struct kernfs_node *kn) rwsem_release(&kn->dep_map, 1, _RET_IP_); } + kernfs_unmap_bin_file(kn); + mutex_lock(&kernfs_mutex); } @@ -493,7 +496,6 @@ void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt) acxt->removed = kn->u.removed_list; - kernfs_unmap_bin_file(kn); kernfs_put(kn); } } -- cgit v1.1 From 988cd7afb3f37598891ca70b4c6eb914c338c46a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:58 -0500 Subject: kernfs: remove kernfs_addrm_cxt kernfs_addrm_cxt and the accompanying kernfs_addrm_start/finish() were added because there were operations which should be performed outside kernfs_mutex after adding and removing kernfs_nodes. The necessary operations were recorded in kernfs_addrm_cxt and performed by kernfs_addrm_finish(); however, after the recent changes which relocated deactivation and unmapping so that they're performed directly during removal, the only operation kernfs_addrm_finish() performs is kernfs_put(), which can be moved inside the removal path too. This patch moves the kernfs_put() of the base ref to __kernfs_remove() and remove kernfs_addrm_cxt and kernfs_addrm_start/finish(). * kernfs_add_one() is updated to grab and release kernfs_mutex itself. sysfs_addrm_start/finish() invocations around it are removed from all users. * __kernfs_remove() puts an unlinked node directly instead of chaining it to kernfs_addrm_cxt. Its callers are updated to grab and release kernfs_mutex instead of calling kernfs_addrm_start/finish() around it. v2: Rebased on top of "kernfs: associate a new kernfs_node with its parent on creation" which dropped @parent from kernfs_add_one(). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 109 ++++++++++---------------------------------- fs/kernfs/file.c | 6 +-- fs/kernfs/kernfs-internal.h | 11 +---- fs/kernfs/symlink.c | 6 +-- include/linux/kernfs.h | 4 -- 5 files changed, 28 insertions(+), 108 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 9603c06..948551d 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -396,69 +396,44 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, } /** - * kernfs_addrm_start - prepare for kernfs_node add/remove - * @acxt: pointer to kernfs_addrm_cxt to be used - * - * This function is called when the caller is about to add or remove - * kernfs_node. This function acquires kernfs_mutex. @acxt is used - * to keep and pass context to other addrm functions. - * - * LOCKING: - * Kernel thread context (may sleep). kernfs_mutex is locked on - * return. - */ -void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt) - __acquires(kernfs_mutex) -{ - memset(acxt, 0, sizeof(*acxt)); - - mutex_lock(&kernfs_mutex); -} - -/** * kernfs_add_one - add kernfs_node to parent without warning - * @acxt: addrm context to use * @kn: kernfs_node to be added * * The caller must already have initialized @kn->parent. This * function increments nlink of the parent's inode if @kn is a * directory and link into the children list of the parent. * - * This function should be called between calls to - * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed - * the same @acxt as passed to kernfs_addrm_start(). - * - * LOCKING: - * Determined by kernfs_addrm_start(). - * * RETURNS: * 0 on success, -EEXIST if entry with the given name already * exists. */ -int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) +int kernfs_add_one(struct kernfs_node *kn) { struct kernfs_node *parent = kn->parent; - bool has_ns = kernfs_ns_enabled(parent); struct kernfs_iattrs *ps_iattr; + bool has_ns; int ret; - if (has_ns != (bool)kn->ns) { - WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, kn->name); - return -EINVAL; - } + mutex_lock(&kernfs_mutex); + + ret = -EINVAL; + has_ns = kernfs_ns_enabled(parent); + if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", + has_ns ? "required" : "invalid", parent->name, kn->name)) + goto out_unlock; if (kernfs_type(parent) != KERNFS_DIR) - return -EINVAL; + goto out_unlock; + ret = -ENOENT; if (parent->flags & KERNFS_REMOVED) - return -ENOENT; + goto out_unlock; kn->hash = kernfs_name_hash(kn->name, kn->ns); ret = kernfs_link_sibling(kn); if (ret) - return ret; + goto out_unlock; /* Update timestamps on the parent */ ps_iattr = parent->iattr; @@ -469,35 +444,10 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) /* Mark the entry added into directory tree */ kn->flags &= ~KERNFS_REMOVED; - - return 0; -} - -/** - * kernfs_addrm_finish - finish up kernfs_node add/remove - * @acxt: addrm context to finish up - * - * Finish up kernfs_node add/remove. Resources acquired by - * kernfs_addrm_start() are released and removed kernfs_nodes are - * cleaned up. - * - * LOCKING: - * kernfs_mutex is released. - */ -void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt) - __releases(kernfs_mutex) -{ - /* release resources acquired by kernfs_addrm_start() */ + ret = 0; +out_unlock: mutex_unlock(&kernfs_mutex); - - /* kill removed kernfs_nodes */ - while (acxt->removed) { - struct kernfs_node *kn = acxt->removed; - - acxt->removed = kn->u.removed_list; - - kernfs_put(kn); - } + return ret; } /** @@ -630,7 +580,6 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns) { - struct kernfs_addrm_cxt acxt; struct kernfs_node *kn; int rc; @@ -644,10 +593,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, kn->priv = priv; /* link in */ - kernfs_addrm_start(&acxt); - rc = kernfs_add_one(&acxt, kn); - kernfs_addrm_finish(&acxt); - + rc = kernfs_add_one(kn); if (!rc) return kn; @@ -800,8 +746,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, return pos->parent; } -static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, - struct kernfs_node *kn) +static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos; @@ -845,8 +790,7 @@ static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; } - pos->u.removed_list = acxt->removed; - acxt->removed = pos; + kernfs_put(pos); } kernfs_put(pos); @@ -861,11 +805,9 @@ static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, */ void kernfs_remove(struct kernfs_node *kn) { - struct kernfs_addrm_cxt acxt; - - kernfs_addrm_start(&acxt); - __kernfs_remove(&acxt, kn); - kernfs_addrm_finish(&acxt); + mutex_lock(&kernfs_mutex); + __kernfs_remove(kn); + mutex_unlock(&kernfs_mutex); } /** @@ -880,7 +822,6 @@ void kernfs_remove(struct kernfs_node *kn) int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns) { - struct kernfs_addrm_cxt acxt; struct kernfs_node *kn; if (!parent) { @@ -889,13 +830,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, return -ENOENT; } - kernfs_addrm_start(&acxt); + mutex_lock(&kernfs_mutex); kn = kernfs_find_ns(parent, name, ns); if (kn) - __kernfs_remove(&acxt, kn); + __kernfs_remove(kn); - kernfs_addrm_finish(&acxt); + mutex_unlock(&kernfs_mutex); if (kn) return 0; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index dbf397b..10a8c91 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -820,7 +820,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, bool name_is_static, struct lock_class_key *key) { - struct kernfs_addrm_cxt acxt; struct kernfs_node *kn; unsigned flags; int rc; @@ -855,10 +854,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, if (ops->mmap) kn->flags |= KERNFS_HAS_MMAP; - kernfs_addrm_start(&acxt); - rc = kernfs_add_one(&acxt, kn); - kernfs_addrm_finish(&acxt); - + rc = kernfs_add_one(kn); if (rc) { kernfs_put(kn); return ERR_PTR(rc); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index eb536b7..46b58de 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -45,13 +45,6 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) } /* - * Context structure to be used while adding/removing nodes. - */ -struct kernfs_addrm_cxt { - struct kernfs_node *removed; -}; - -/* * mount.c */ struct kernfs_super_info { @@ -100,9 +93,7 @@ extern const struct inode_operations kernfs_dir_iops; struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); void kernfs_put_active(struct kernfs_node *kn); -void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt); -int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn); -void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt); +int kernfs_add_one(struct kernfs_node *kn); struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, unsigned flags); diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 4d45705..8a19889 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -27,7 +27,6 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, struct kernfs_node *target) { struct kernfs_node *kn; - struct kernfs_addrm_cxt acxt; int error; kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); @@ -39,10 +38,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, kn->symlink.target_kn = target; kernfs_get(target); /* ref owned by symlink */ - kernfs_addrm_start(&acxt); - error = kernfs_add_one(&acxt, kn); - kernfs_addrm_finish(&acxt); - + error = kernfs_add_one(kn); if (!error) return kn; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 295a3bf..38646f6 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -91,10 +91,6 @@ struct kernfs_node { struct rb_node rb; - union { - struct kernfs_node *removed_list; - } u; - const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ union { -- cgit v1.1 From 182fd64b66342219d6fcf2b84d337529d120d95c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:59 -0500 Subject: kernfs: remove KERNFS_ACTIVE_REF and add kernfs_lockdep() There currently are two mechanisms gating active ref lockdep annotations - KERNFS_LOCKDEP flag and KERNFS_ACTIVE_REF type mask. The former disables lockdep annotations in kernfs_get/put_active() while the latter disables all of kernfs_deactivate(). While KERNFS_ACTIVE_REF also behaves as an optimization to skip the deactivation step for non-file nodes, the benefit is marginal and it needlessly diverges code paths. Let's drop KERNFS_ACTIVE_REF. While at it, add a test helper kernfs_lockdep() to test KERNFS_LOCKDEP flag so that it's more convenient and the related code can be compiled out when not enabled. v2: Refreshed on top of ("kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag"). As the earlier patch already added KERNFS_LOCKDEP tests to kernfs_deactivate(), those additions are dropped from this patch and the existing ones are simply converted to kernfs_lockdep(). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 20 +++++++++++++------- include/linux/kernfs.h | 1 - 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 948551d..5cf137b 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -22,6 +22,15 @@ DEFINE_MUTEX(kernfs_mutex); #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) +static bool kernfs_lockdep(struct kernfs_node *kn) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + return kn->flags & KERNFS_LOCKDEP; +#else + return false; +#endif +} + /** * kernfs_name_hash * @name: Null terminated string to hash @@ -144,7 +153,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) if (!atomic_inc_unless_negative(&kn->active)) return NULL; - if (kn->flags & KERNFS_LOCKDEP) + if (kernfs_lockdep(kn)) rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); return kn; } @@ -164,7 +173,7 @@ void kernfs_put_active(struct kernfs_node *kn) if (unlikely(!kn)) return; - if (kn->flags & KERNFS_LOCKDEP) + if (kernfs_lockdep(kn)) rwsem_release(&kn->dep_map, 1, _RET_IP_); v = atomic_dec_return(&kn->active); if (likely(v != KN_DEACTIVATED_BIAS)) @@ -190,16 +199,13 @@ static void kernfs_deactivate(struct kernfs_node *kn) lockdep_assert_held(&kernfs_mutex); BUG_ON(!(kn->flags & KERNFS_REMOVED)); - if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF)) - return; - /* only the first invocation on @kn should deactivate it */ if (atomic_read(&kn->active) >= 0) atomic_add(KN_DEACTIVATED_BIAS, &kn->active); mutex_unlock(&kernfs_mutex); - if (kn->flags & KERNFS_LOCKDEP) { + if (kernfs_lockdep(kn)) { rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) lock_contended(&kn->dep_map, _RET_IP_); @@ -209,7 +215,7 @@ static void kernfs_deactivate(struct kernfs_node *kn) wait_event(root->deactivate_waitq, atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); - if (kn->flags & KERNFS_LOCKDEP) { + if (kernfs_lockdep(kn)) { lock_acquired(&kn->dep_map, _RET_IP_); rwsem_release(&kn->dep_map, 1, _RET_IP_); } diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 38646f6..dc4cd6c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -35,7 +35,6 @@ enum kernfs_node_type { }; #define KERNFS_TYPE_MASK 0x000f -#define KERNFS_ACTIVE_REF KERNFS_FILE #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK enum kernfs_node_flag { -- cgit v1.1 From 81c173cb5e87fbb47ccd80630faefe39bbf68449 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:00 -0500 Subject: kernfs: remove KERNFS_REMOVED KERNFS_REMOVED is used to mark half-initialized and dying nodes so that they don't show up in lookups and deny adding new nodes under or renaming it; however, its role overlaps that of deactivation. It's necessary to deny addition of new children while removal is in progress; however, this role considerably intersects with deactivation - KERNFS_REMOVED prevents new children while deactivation prevents new file operations. There's no reason to have them separate making things more complex than necessary. This patch removes KERNFS_REMOVED. * Instead of KERNFS_REMOVED, each node now starts its life deactivated. This means that we now use both atomic_add() and atomic_sub() on KN_DEACTIVATED_BIAS, which is INT_MIN. The compiler generates an overflow warnings when negating INT_MIN as the negation can't be represented as a positive number. Nothing is actually broken but let's bump BIAS by one to avoid the warnings for archs which negates the subtrahend.. * A new helper kernfs_active() which tests whether kn->active >= 0 is added for convenience and lockdep annotation. All KERNFS_REMOVED tests are replaced with negated kernfs_active() tests. * __kernfs_remove() is updated to deactivate, but not drain, all nodes in the subtree instead of setting KERNFS_REMOVED. This removes deactivation from kernfs_deactivate(), which is now renamed to kernfs_drain(). * Sanity check on KERNFS_REMOVED in kernfs_put() is replaced with checks on the active ref. * Some comment style updates in the affected area. v2: Reordered before removal path restructuring. kernfs_active() dropped and kernfs_get/put_active() used instead. RB_EMPTY_NODE() used in the lookup paths. v3: Reverted most of v2 except for creating a new node with KN_DEACTIVATED_BIAS. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 66 ++++++++++++++++++++++++--------------------- fs/kernfs/kernfs-internal.h | 3 ++- include/linux/kernfs.h | 1 - 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 5cf137b..d0fd739 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -22,6 +22,12 @@ DEFINE_MUTEX(kernfs_mutex); #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) +static bool kernfs_active(struct kernfs_node *kn) +{ + lockdep_assert_held(&kernfs_mutex); + return atomic_read(&kn->active) >= 0; +} + static bool kernfs_lockdep(struct kernfs_node *kn) { #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -183,25 +189,20 @@ void kernfs_put_active(struct kernfs_node *kn) } /** - * kernfs_deactivate - deactivate kernfs_node - * @kn: kernfs_node to deactivate + * kernfs_drain - drain kernfs_node + * @kn: kernfs_node to drain * - * Deny new active references, drain existing ones and nuke all - * existing mmaps. Mutiple removers may invoke this function - * concurrently on @kn and all will return after deactivation and - * draining are complete. + * Drain existing usages and nuke all existing mmaps of @kn. Mutiple + * removers may invoke this function concurrently on @kn and all will + * return after draining is complete. */ -static void kernfs_deactivate(struct kernfs_node *kn) +static void kernfs_drain(struct kernfs_node *kn) __releases(&kernfs_mutex) __acquires(&kernfs_mutex) { struct kernfs_root *root = kernfs_root(kn); lockdep_assert_held(&kernfs_mutex); - BUG_ON(!(kn->flags & KERNFS_REMOVED)); - - /* only the first invocation on @kn should deactivate it */ - if (atomic_read(&kn->active) >= 0) - atomic_add(KN_DEACTIVATED_BIAS, &kn->active); + WARN_ON_ONCE(kernfs_active(kn)); mutex_unlock(&kernfs_mutex); @@ -253,13 +254,15 @@ void kernfs_put(struct kernfs_node *kn) return; root = kernfs_root(kn); repeat: - /* Moving/renaming is always done while holding reference. + /* + * Moving/renaming is always done while holding reference. * kn->parent won't change beneath us. */ parent = kn->parent; - WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n", - parent ? parent->name : "", kn->name); + WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, + "kernfs_put: %s/%s: released with incorrect active_ref %d\n", + parent ? parent->name : "", kn->name, atomic_read(&kn->active)); if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); @@ -301,8 +304,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) kn = dentry->d_fsdata; mutex_lock(&kernfs_mutex); - /* The kernfs node has been deleted */ - if (kn->flags & KERNFS_REMOVED) + /* The kernfs node has been deactivated */ + if (!kernfs_active(kn)) goto out_bad; /* The kernfs node has been moved? */ @@ -371,12 +374,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, kn->ino = ret; atomic_set(&kn->count, 1); - atomic_set(&kn->active, 0); + atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); kn->name = name; kn->mode = mode; - kn->flags = flags | KERNFS_REMOVED; + kn->flags = flags; return kn; @@ -432,7 +435,7 @@ int kernfs_add_one(struct kernfs_node *kn) goto out_unlock; ret = -ENOENT; - if (parent->flags & KERNFS_REMOVED) + if (!kernfs_active(parent)) goto out_unlock; kn->hash = kernfs_name_hash(kn->name, kn->ns); @@ -449,7 +452,7 @@ int kernfs_add_one(struct kernfs_node *kn) } /* Mark the entry added into directory tree */ - kn->flags &= ~KERNFS_REMOVED; + atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); ret = 0; out_unlock: mutex_unlock(&kernfs_mutex); @@ -549,7 +552,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) return ERR_PTR(-ENOMEM); } - kn->flags &= ~KERNFS_REMOVED; + atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); kn->priv = priv; kn->dir.root = root; @@ -763,24 +766,25 @@ static void __kernfs_remove(struct kernfs_node *kn) pr_debug("kernfs %s: removing\n", kn->name); - /* disable lookup and node creation under @kn */ + /* prevent any new usage under @kn by deactivating all nodes */ pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) - pos->flags |= KERNFS_REMOVED; + if (kernfs_active(pos)) + atomic_add(KN_DEACTIVATED_BIAS, &pos->active); /* deactivate and unlink the subtree node-by-node */ do { pos = kernfs_leftmost_descendant(kn); /* - * kernfs_deactivate() drops kernfs_mutex temporarily and - * @pos's base ref could have been put by someone else by - * the time the function returns. Make sure it doesn't go - * away underneath us. + * kernfs_drain() drops kernfs_mutex temporarily and @pos's + * base ref could have been put by someone else by the time + * the function returns. Make sure it doesn't go away + * underneath us. */ kernfs_get(pos); - kernfs_deactivate(pos); + kernfs_drain(pos); /* * kernfs_unlink_sibling() succeeds once per node. Use it @@ -865,7 +869,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&kernfs_mutex); error = -ENOENT; - if ((kn->flags | new_parent->flags) & KERNFS_REMOVED) + if (!kernfs_active(kn) || !kernfs_active(new_parent)) goto out; error = 0; @@ -925,7 +929,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) { if (pos) { - int valid = !(pos->flags & KERNFS_REMOVED) && + int valid = kernfs_active(pos) && pos->parent == parent && hash == pos->hash; kernfs_put(pos); if (!valid) diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 46b58de..a91d7a1 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -26,7 +26,8 @@ struct kernfs_iattrs { struct simple_xattrs xattrs; }; -#define KN_DEACTIVATED_BIAS INT_MIN +/* +1 to avoid triggering overflow warning when negating it */ +#define KN_DEACTIVATED_BIAS (INT_MIN + 1) /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index dc4cd6c..917bc6c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -38,7 +38,6 @@ enum kernfs_node_type { #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK enum kernfs_node_flag { - KERNFS_REMOVED = 0x0010, KERNFS_NS = 0x0020, KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, -- cgit v1.1 From 6b0afc2a21726b2d6b6aa441af40cafaf5405cc8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:01 -0500 Subject: kernfs, sysfs, driver-core: implement kernfs_remove_self() and its wrappers Sometimes it's necessary to implement a node which wants to delete nodes including itself. This isn't straightforward because of kernfs active reference. While a file operation is in progress, an active reference is held and kernfs_remove() waits for all such references to drain before completing. For a self-deleting node, this is a deadlock as kernfs_remove() ends up waiting for an active reference that itself is sitting on top of. This currently is worked around in the sysfs layer using sysfs_schedule_callback() which makes such removals asynchronous. While it works, it's rather cumbersome and inherently breaks synchronicity of the operation - the file operation which triggered the operation may complete before the removal is finished (or even started) and the removal may fail asynchronously. If a removal operation is immmediately followed by another operation which expects the specific name to be available (e.g. removal followed by rename onto the same name), there's no way to make the latter operation reliable. The thing is there's no inherent reason for this to be asynchrnous. All that's necessary to do this synchronous is a dedicated operation which drops its own active ref and deactivates self. This patch implements kernfs_remove_self() and its wrappers in sysfs and driver core. kernfs_remove_self() is to be called from one of the file operations, drops the active ref the task is holding, removes the self node, and restores active ref to the dead node so that the ref is balanced afterwards. __kernfs_remove() is updated so that it takes an early exit if the target node is already fully removed so that the active ref restored by kernfs_remove_self() after removal doesn't confuse the deactivation path. This makes implementing self-deleting nodes very easy. The normal removal path doesn't even need to be changed to use kernfs_remove_self() for the self-deleting node. The method can invoke kernfs_remove_self() on itself before proceeding the normal removal path. kernfs_remove() invoked on the node by the normal deletion path will simply be ignored. This will replace sysfs_schedule_callback(). A subtle feature of sysfs_schedule_callback() is that it collapses multiple invocations - even if multiple removals are triggered, the removal callback is run only once. An equivalent effect can be achieved by testing the return value of kernfs_remove_self() - only the one which gets %true return value should proceed with actual deletion. All other instances of kernfs_remove_self() will wait till the enclosing kernfs operation which invoked the winning instance of kernfs_remove_self() finishes and then return %false. This trivially makes all users of kernfs_remove_self() automatically show correct synchronous behavior even when there are multiple concurrent operations - all "echo 1 > delete" instances will finish only after the whole operation is completed by one of the instances. Note that manipulation of active ref is implemented in separate public functions - kernfs_[un]break_active_protection(). kernfs_remove_self() is the only user at the moment but this will be used to cater to more complex cases. v2: For !CONFIG_SYSFS, dummy version kernfs_remove_self() was missing and sysfs_remove_file_self() had incorrect return type. Fix it. Reported by kbuild test bot. v3: kernfs_[un]break_active_protection() separated out from kernfs_remove_self() and exposed as public API. Signed-off-by: Tejun Heo Cc: Alan Stern Cc: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 17 ++++++ fs/kernfs/dir.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/sysfs/file.c | 23 +++++++++ include/linux/device.h | 2 + include/linux/kernfs.h | 8 +++ include/linux/sysfs.h | 7 +++ 6 files changed, 194 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 2b56717..9db57af 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -571,6 +571,23 @@ void device_remove_file(struct device *dev, EXPORT_SYMBOL_GPL(device_remove_file); /** + * device_remove_file_self - remove sysfs attribute file from its own method. + * @dev: device. + * @attr: device attribute descriptor. + * + * See kernfs_remove_self() for details. + */ +bool device_remove_file_self(struct device *dev, + const struct device_attribute *attr) +{ + if (dev) + return sysfs_remove_file_self(&dev->kobj, &attr->attr); + else + return false; +} +EXPORT_SYMBOL_GPL(device_remove_file_self); + +/** * device_create_bin_file - create sysfs binary attribute file for device. * @dev: device. * @attr: device binary attribute descriptor. diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index d0fd739..8c63ae1 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -761,7 +761,12 @@ static void __kernfs_remove(struct kernfs_node *kn) lockdep_assert_held(&kernfs_mutex); - if (!kn) + /* + * Short-circuit if non-root @kn has already finished removal. + * This is for kernfs_remove_self() which plays with active ref + * after removal. + */ + if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) return; pr_debug("kernfs %s: removing\n", kn->name); @@ -821,6 +826,137 @@ void kernfs_remove(struct kernfs_node *kn) } /** + * kernfs_break_active_protection - break out of active protection + * @kn: the self kernfs_node + * + * The caller must be running off of a kernfs operation which is invoked + * with an active reference - e.g. one of kernfs_ops. Each invocation of + * this function must also be matched with an invocation of + * kernfs_unbreak_active_protection(). + * + * This function releases the active reference of @kn the caller is + * holding. Once this function is called, @kn may be removed at any point + * and the caller is solely responsible for ensuring that the objects it + * dereferences are accessible. + */ +void kernfs_break_active_protection(struct kernfs_node *kn) +{ + /* + * Take out ourself out of the active ref dependency chain. If + * we're called without an active ref, lockdep will complain. + */ + kernfs_put_active(kn); +} + +/** + * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() + * @kn: the self kernfs_node + * + * If kernfs_break_active_protection() was called, this function must be + * invoked before finishing the kernfs operation. Note that while this + * function restores the active reference, it doesn't and can't actually + * restore the active protection - @kn may already or be in the process of + * being removed. Once kernfs_break_active_protection() is invoked, that + * protection is irreversibly gone for the kernfs operation instance. + * + * While this function may be called at any point after + * kernfs_break_active_protection() is invoked, its most useful location + * would be right before the enclosing kernfs operation returns. + */ +void kernfs_unbreak_active_protection(struct kernfs_node *kn) +{ + /* + * @kn->active could be in any state; however, the increment we do + * here will be undone as soon as the enclosing kernfs operation + * finishes and this temporary bump can't break anything. If @kn + * is alive, nothing changes. If @kn is being deactivated, the + * soon-to-follow put will either finish deactivation or restore + * deactivated state. If @kn is already removed, the temporary + * bump is guaranteed to be gone before @kn is released. + */ + atomic_inc(&kn->active); + if (kernfs_lockdep(kn)) + rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); +} + +/** + * kernfs_remove_self - remove a kernfs_node from its own method + * @kn: the self kernfs_node to remove + * + * The caller must be running off of a kernfs operation which is invoked + * with an active reference - e.g. one of kernfs_ops. This can be used to + * implement a file operation which deletes itself. + * + * For example, the "delete" file for a sysfs device directory can be + * implemented by invoking kernfs_remove_self() on the "delete" file + * itself. This function breaks the circular dependency of trying to + * deactivate self while holding an active ref itself. It isn't necessary + * to modify the usual removal path to use kernfs_remove_self(). The + * "delete" implementation can simply invoke kernfs_remove_self() on self + * before proceeding with the usual removal path. kernfs will ignore later + * kernfs_remove() on self. + * + * kernfs_remove_self() can be called multiple times concurrently on the + * same kernfs_node. Only the first one actually performs removal and + * returns %true. All others will wait until the kernfs operation which + * won self-removal finishes and return %false. Note that the losers wait + * for the completion of not only the winning kernfs_remove_self() but also + * the whole kernfs_ops which won the arbitration. This can be used to + * guarantee, for example, all concurrent writes to a "delete" file to + * finish only after the whole operation is complete. + */ +bool kernfs_remove_self(struct kernfs_node *kn) +{ + bool ret; + + mutex_lock(&kernfs_mutex); + kernfs_break_active_protection(kn); + + /* + * SUICIDAL is used to arbitrate among competing invocations. Only + * the first one will actually perform removal. When the removal + * is complete, SUICIDED is set and the active ref is restored + * while holding kernfs_mutex. The ones which lost arbitration + * waits for SUICDED && drained which can happen only after the + * enclosing kernfs operation which executed the winning instance + * of kernfs_remove_self() finished. + */ + if (!(kn->flags & KERNFS_SUICIDAL)) { + kn->flags |= KERNFS_SUICIDAL; + __kernfs_remove(kn); + kn->flags |= KERNFS_SUICIDED; + ret = true; + } else { + wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; + DEFINE_WAIT(wait); + + while (true) { + prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); + + if ((kn->flags & KERNFS_SUICIDED) && + atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) + break; + + mutex_unlock(&kernfs_mutex); + schedule(); + mutex_lock(&kernfs_mutex); + } + finish_wait(waitq, &wait); + WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); + ret = false; + } + + /* + * This must be done while holding kernfs_mutex; otherwise, waiting + * for SUICIDED && deactivated could finish prematurely. + */ + kernfs_unbreak_active_protection(kn); + + mutex_unlock(&kernfs_mutex); + return ret; +} + +/** * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it * @parent: parent of the target * @name: name of the kernfs_node to remove diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 810cf6e..1b8b91b 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -372,6 +372,29 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, } EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); +/** + * sysfs_remove_file_self - remove an object attribute from its own method + * @kobj: object we're acting for + * @attr: attribute descriptor + * + * See kernfs_remove_self() for details. + */ +bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) +{ + struct kernfs_node *parent = kobj->sd; + struct kernfs_node *kn; + bool ret; + + kn = kernfs_find_and_get(parent, attr->name); + if (WARN_ON_ONCE(!kn)) + return false; + + ret = kernfs_remove_self(kn); + + kernfs_put(kn); + return ret; +} + void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) { int i; diff --git a/include/linux/device.h b/include/linux/device.h index 952b010..1ff3f16 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -560,6 +560,8 @@ extern int device_create_file(struct device *device, const struct device_attribute *entry); extern void device_remove_file(struct device *dev, const struct device_attribute *attr); +extern bool device_remove_file_self(struct device *dev, + const struct device_attribute *attr); extern int __must_check device_create_bin_file(struct device *dev, const struct bin_attribute *attr); extern void device_remove_bin_file(struct device *dev, diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 917bc6c..02ac334 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -43,6 +43,8 @@ enum kernfs_node_flag { KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, KERNFS_STATIC_NAME = 0x0200, + KERNFS_SUICIDAL = 0x0400, + KERNFS_SUICIDED = 0x0800, }; /* type-specific structures for kernfs_node union members */ @@ -234,6 +236,9 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); void kernfs_remove(struct kernfs_node *kn); +void kernfs_break_active_protection(struct kernfs_node *kn); +void kernfs_unbreak_active_protection(struct kernfs_node *kn); +bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, @@ -291,6 +296,9 @@ kernfs_create_link(struct kernfs_node *parent, const char *name, static inline void kernfs_remove(struct kernfs_node *kn) { } +static inline bool kernfs_remove_self(struct kernfs_node *kn) +{ return false; } + static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, const char *name, const void *ns) { return -ENOSYS; } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 30b2ebe..bd96c60 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -198,6 +198,7 @@ int __must_check sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); +bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr); int __must_check sysfs_create_bin_file(struct kobject *kobj, @@ -301,6 +302,12 @@ static inline void sysfs_remove_file_ns(struct kobject *kobj, { } +static inline bool sysfs_remove_file_self(struct kobject *kobj, + const struct attribute *attr) +{ + return false; +} + static inline void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr) { -- cgit v1.1 From bc6caf02cccedd30c9458e26dfdd8118af574ae5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:02 -0500 Subject: pci: use device_remove_file_self() instead of device_schedule_callback() driver-core now supports synchrnous self-deletion of attributes and the asynchrnous removal mechanism is scheduled for removal. Use it instead of device_schedule_callback(). This makes "remove" behave synchronously. Signed-off-by: Tejun Heo Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-sysfs.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 276ef9c..4e0acef 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -351,28 +351,17 @@ static struct device_attribute dev_rescan_attr = __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store); -static void remove_callback(struct device *dev) -{ - pci_stop_and_remove_bus_device_locked(to_pci_dev(dev)); -} - static ssize_t -remove_store(struct device *dev, struct device_attribute *dummy, +remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int ret = 0; unsigned long val; if (kstrtoul(buf, 0, &val) < 0) return -EINVAL; - /* An attribute cannot be unregistered by one of its own methods, - * so we have to use this roundabout approach. - */ - if (val) - ret = device_schedule_callback(dev, remove_callback); - if (ret) - count = ret; + if (val && device_remove_file_self(dev, attr)) + pci_stop_and_remove_bus_device_locked(to_pci_dev(dev)); return count; } static struct device_attribute dev_remove_attr = __ATTR(remove, -- cgit v1.1 From ac0ece9174aca9aa895ce0accc54f1f8ff12d117 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:03 -0500 Subject: scsi: use device_remove_file_self() instead of device_schedule_callback() driver-core now supports synchrnous self-deletion of attributes and the asynchrnous removal mechanism is scheduled for removal. Use it instead of device_schedule_callback(). This makes "delete" behave synchronously. Signed-off-by: Tejun Heo Cc: "James E.J. Bottomley" Cc: linux-scsi@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_sysfs.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 9117d0b..8ead24c 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -649,23 +649,12 @@ store_rescan_field (struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field); -static void sdev_store_delete_callback(struct device *dev) -{ - scsi_remove_device(to_scsi_device(dev)); -} - static ssize_t sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int rc; - - /* An attribute cannot be unregistered by one of its own methods, - * so we have to use this roundabout approach. - */ - rc = device_schedule_callback(dev, sdev_store_delete_callback); - if (rc) - count = rc; + if (device_remove_file_self(dev, attr)) + scsi_remove_device(to_scsi_device(dev)); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); -- cgit v1.1 From 0b60f9ead5d4816e7e3d6e28f4a0d22d4a1b2513 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:04 -0500 Subject: s390: use device_remove_file_self() instead of device_schedule_callback() driver-core now supports synchrnous self-deletion of attributes and the asynchrnous removal mechanism is scheduled for removal. Use it instead of device_schedule_callback(). * Conversions in arch/s390/pci/pci_sysfs.c and drivers/s390/block/dcssblk.c are straightforward. * drivers/s390/cio/ccwgroup.c is a bit more tricky because ccwgroup_notifier() was (ab)using device_schedule_callback() to purely obtain a process context to kick off ungroup operation which may block from a notifier callback. Rename ccwgroup_ungroup_callback() to ccwgroup_ungroup() and make it take ccwgroup_device * instead. The new function is now called directly from ccwgroup_ungroup_store(). ccwgroup_notifier() chain is updated to explicitly bounce through ccwgroup_device->ungroup_work. This also removes possible failure from memory pressure. Only compile-tested. Signed-off-by: Tejun Heo Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: linux-s390@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/ccwgroup.h | 1 + arch/s390/pci/pci_sysfs.c | 18 ++++++++---------- drivers/s390/block/dcssblk.c | 14 +++++++------- drivers/s390/cio/ccwgroup.c | 26 ++++++++++++++++---------- 4 files changed, 32 insertions(+), 27 deletions(-) diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 23723ce..6e670f8 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -23,6 +23,7 @@ struct ccwgroup_device { unsigned int count; struct device dev; struct ccw_device *cdev[0]; + struct work_struct ungroup_work; }; /** diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index cf8a12f..ab4a913 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -48,29 +48,27 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); -static void recover_callback(struct device *dev) +static ssize_t store_recover(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = get_zdev(pdev); int ret; + if (!device_remove_file_self(dev, attr)) + return count; + pci_stop_and_remove_bus_device(pdev); ret = zpci_disable_device(zdev); if (ret) - return; + return ret; ret = zpci_enable_device(zdev); if (ret) - return; + return ret; pci_rescan_bus(zdev->bus); -} - -static ssize_t store_recover(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - int rc = device_schedule_callback(dev, recover_callback); - return rc ? rc : count; + return count; } static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index ebf41e2..ee0e85a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -304,12 +304,6 @@ dcssblk_load_segment(char *name, struct segment_info **seg_info) return rc; } -static void dcssblk_unregister_callback(struct device *dev) -{ - device_unregister(dev); - put_device(dev); -} - /* * device attribute for switching shared/nonshared (exclusive) * operation (show + store) @@ -397,7 +391,13 @@ removeseg: blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); - rc = device_schedule_callback(dev, dcssblk_unregister_callback); + up_write(&dcssblk_devices_sem); + + if (device_remove_file_self(dev, attr)) { + device_unregister(dev); + put_device(dev); + } + return rc; out: up_write(&dcssblk_devices_sem); return rc; diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index fd3367a1..dfd7bc6 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -168,14 +168,12 @@ static ssize_t ccwgroup_online_show(struct device *dev, * Provide an 'ungroup' attribute so the user can remove group devices no * longer needed or accidentially created. Saves memory :) */ -static void ccwgroup_ungroup_callback(struct device *dev) +static void ccwgroup_ungroup(struct ccwgroup_device *gdev) { - struct ccwgroup_device *gdev = to_ccwgroupdev(dev); - mutex_lock(&gdev->reg_mutex); if (device_is_registered(&gdev->dev)) { __ccwgroup_remove_symlinks(gdev); - device_unregister(dev); + device_unregister(&gdev->dev); __ccwgroup_remove_cdev_refs(gdev); } mutex_unlock(&gdev->reg_mutex); @@ -195,10 +193,9 @@ static ssize_t ccwgroup_ungroup_store(struct device *dev, rc = -EINVAL; goto out; } - /* Note that we cannot unregister the device from one of its - * attribute methods, so we have to use this roundabout approach. - */ - rc = device_schedule_callback(dev, ccwgroup_ungroup_callback); + + if (device_remove_file_self(dev, attr)) + ccwgroup_ungroup(gdev); out: if (rc) { if (rc != -EAGAIN) @@ -224,6 +221,14 @@ static const struct attribute_group *ccwgroup_attr_groups[] = { NULL, }; +static void ccwgroup_ungroup_workfn(struct work_struct *work) +{ + struct ccwgroup_device *gdev = + container_of(work, struct ccwgroup_device, ungroup_work); + + ccwgroup_ungroup(gdev); +} + static void ccwgroup_release(struct device *dev) { kfree(to_ccwgroupdev(dev)); @@ -323,6 +328,7 @@ int ccwgroup_create_dev(struct device *parent, struct ccwgroup_driver *gdrv, atomic_set(&gdev->onoff, 0); mutex_init(&gdev->reg_mutex); mutex_lock(&gdev->reg_mutex); + INIT_WORK(&gdev->ungroup_work, ccwgroup_ungroup_workfn); gdev->count = num_devices; gdev->dev.bus = &ccwgroup_bus_type; gdev->dev.parent = parent; @@ -404,10 +410,10 @@ EXPORT_SYMBOL(ccwgroup_create_dev); static int ccwgroup_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct device *dev = data; + struct ccwgroup_device *gdev = to_ccwgroupdev(data); if (action == BUS_NOTIFY_UNBIND_DRIVER) - device_schedule_callback(dev, ccwgroup_ungroup_callback); + schedule_work(&gdev->ungroup_work); return NOTIFY_OK; } -- cgit v1.1 From ce8b04aa6c9bdf211b921fdd18c040ea29516b97 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:05 -0500 Subject: sysfs, driver-core: remove unused {sysfs|device}_schedule_callback_owner() All device_schedule_callback_owner() users are converted to use device_remove_file_self(). Remove now unused {sysfs|device}_schedule_callback_owner(). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 33 ------------------ fs/sysfs/file.c | 92 -------------------------------------------------- include/linux/device.h | 11 +----- include/linux/sysfs.h | 9 ----- 4 files changed, 1 insertion(+), 144 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 9db57af..4195364 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -615,39 +615,6 @@ void device_remove_bin_file(struct device *dev, } EXPORT_SYMBOL_GPL(device_remove_bin_file); -/** - * device_schedule_callback_owner - helper to schedule a callback for a device - * @dev: device. - * @func: callback function to invoke later. - * @owner: module owning the callback routine - * - * Attribute methods must not unregister themselves or their parent device - * (which would amount to the same thing). Attempts to do so will deadlock, - * since unregistration is mutually exclusive with driver callbacks. - * - * Instead methods can call this routine, which will attempt to allocate - * and schedule a workqueue request to call back @func with @dev as its - * argument in the workqueue's process context. @dev will be pinned until - * @func returns. - * - * This routine is usually called via the inline device_schedule_callback(), - * which automatically sets @owner to THIS_MODULE. - * - * Returns 0 if the request was submitted, -ENOMEM if storage could not - * be allocated, -ENODEV if a reference to @owner isn't available. - * - * NOTE: This routine won't work if CONFIG_SYSFS isn't set! It uses an - * underlying sysfs routine (since it is intended for use by attribute - * methods), and if sysfs isn't available you'll get nothing but -ENOSYS. - */ -int device_schedule_callback_owner(struct device *dev, - void (*func)(struct device *), struct module *owner) -{ - return sysfs_schedule_callback(&dev->kobj, - (void (*)(void *)) func, dev, owner); -} -EXPORT_SYMBOL_GPL(device_schedule_callback_owner); - static void klist_children_get(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1b8b91b..28cc1acd 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -453,95 +453,3 @@ void sysfs_remove_bin_file(struct kobject *kobj, kernfs_remove_by_name(kobj->sd, attr->attr.name); } EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); - -struct sysfs_schedule_callback_struct { - struct list_head workq_list; - struct kobject *kobj; - void (*func)(void *); - void *data; - struct module *owner; - struct work_struct work; -}; - -static struct workqueue_struct *sysfs_workqueue; -static DEFINE_MUTEX(sysfs_workq_mutex); -static LIST_HEAD(sysfs_workq); -static void sysfs_schedule_callback_work(struct work_struct *work) -{ - struct sysfs_schedule_callback_struct *ss = container_of(work, - struct sysfs_schedule_callback_struct, work); - - (ss->func)(ss->data); - kobject_put(ss->kobj); - module_put(ss->owner); - mutex_lock(&sysfs_workq_mutex); - list_del(&ss->workq_list); - mutex_unlock(&sysfs_workq_mutex); - kfree(ss); -} - -/** - * sysfs_schedule_callback - helper to schedule a callback for a kobject - * @kobj: object we're acting for. - * @func: callback function to invoke later. - * @data: argument to pass to @func. - * @owner: module owning the callback code - * - * sysfs attribute methods must not unregister themselves or their parent - * kobject (which would amount to the same thing). Attempts to do so will - * deadlock, since unregistration is mutually exclusive with driver - * callbacks. - * - * Instead methods can call this routine, which will attempt to allocate - * and schedule a workqueue request to call back @func with @data as its - * argument in the workqueue's process context. @kobj will be pinned - * until @func returns. - * - * Returns 0 if the request was submitted, -ENOMEM if storage could not - * be allocated, -ENODEV if a reference to @owner isn't available, - * -EAGAIN if a callback has already been scheduled for @kobj. - */ -int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), - void *data, struct module *owner) -{ - struct sysfs_schedule_callback_struct *ss, *tmp; - - if (!try_module_get(owner)) - return -ENODEV; - - mutex_lock(&sysfs_workq_mutex); - list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list) - if (ss->kobj == kobj) { - module_put(owner); - mutex_unlock(&sysfs_workq_mutex); - return -EAGAIN; - } - mutex_unlock(&sysfs_workq_mutex); - - if (sysfs_workqueue == NULL) { - sysfs_workqueue = create_singlethread_workqueue("sysfsd"); - if (sysfs_workqueue == NULL) { - module_put(owner); - return -ENOMEM; - } - } - - ss = kmalloc(sizeof(*ss), GFP_KERNEL); - if (!ss) { - module_put(owner); - return -ENOMEM; - } - kobject_get(kobj); - ss->kobj = kobj; - ss->func = func; - ss->data = data; - ss->owner = owner; - INIT_WORK(&ss->work, sysfs_schedule_callback_work); - INIT_LIST_HEAD(&ss->workq_list); - mutex_lock(&sysfs_workq_mutex); - list_add_tail(&ss->workq_list, &sysfs_workq); - mutex_unlock(&sysfs_workq_mutex); - queue_work(sysfs_workqueue, &ss->work); - return 0; -} -EXPORT_SYMBOL_GPL(sysfs_schedule_callback); diff --git a/include/linux/device.h b/include/linux/device.h index 1ff3f16..fb1ba13 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -566,12 +566,6 @@ extern int __must_check device_create_bin_file(struct device *dev, const struct bin_attribute *attr); extern void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); -extern int device_schedule_callback_owner(struct device *dev, - void (*func)(struct device *dev), struct module *owner); - -/* This is a macro to avoid include problems with THIS_MODULE */ -#define device_schedule_callback(dev, func) \ - device_schedule_callback_owner(dev, func, THIS_MODULE) /* device resource management */ typedef void (*dr_release_t)(struct device *dev, void *res); @@ -931,10 +925,7 @@ extern int device_online(struct device *dev); extern struct device *__root_device_register(const char *name, struct module *owner); -/* - * This is a macro to avoid include problems with THIS_MODULE, - * just as per what is done for device_schedule_callback() above. - */ +/* This is a macro to avoid include problems with THIS_MODULE */ #define root_device_register(name) \ __root_device_register(name, THIS_MODULE) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index bd96c60..14df054 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -178,9 +178,6 @@ struct sysfs_ops { #ifdef CONFIG_SYSFS -int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), - void *data, struct module *owner); - int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, @@ -249,12 +246,6 @@ int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ -static inline int sysfs_schedule_callback(struct kobject *kobj, - void (*func)(void *), void *data, struct module *owner) -{ - return -ENOSYS; -} - static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { return 0; -- cgit v1.1 From 07c7530dd46728e25e938d0eb291f8085435c365 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:08 -0500 Subject: kernfs: invoke dir_ops while holding active ref of the target node kernfs_dir_ops are currently being invoked without any active reference, which makes it tricky for the invoked operations to determine whether the objects associated those nodes are safe to access and will remain that way for the duration of such operations. kernfs already has active_ref mechanism to deal with this which makes the removal of a given node the synchronization point for gating the file operations. There's no reason for dir_ops to be any different. Update the dir_ops handling so that active_ref is held while the dir_ops are executing. This guarantees that while a dir_ops is executing the target nodes stay alive. As kernfs_dir_ops doesn't have any in-kernel user at this point, this doesn't affect anybody. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 33 ++++++++++++++++++++++++++++++--- include/linux/kernfs.h | 3 ++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8c63ae1..bfbfb48 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -654,22 +654,36 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, { struct kernfs_node *parent = dir->i_private; struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; + int ret; if (!kdops || !kdops->mkdir) return -EPERM; - return kdops->mkdir(parent, dentry->d_name.name, mode); + if (!kernfs_get_active(parent)) + return -ENODEV; + + ret = kdops->mkdir(parent, dentry->d_name.name, mode); + + kernfs_put_active(parent); + return ret; } static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) { struct kernfs_node *kn = dentry->d_fsdata; struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; + int ret; if (!kdops || !kdops->rmdir) return -EPERM; - return kdops->rmdir(kn); + if (!kernfs_get_active(kn)) + return -ENODEV; + + ret = kdops->rmdir(kn); + + kernfs_put_active(kn); + return ret; } static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -678,11 +692,24 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernfs_node *kn = old_dentry->d_fsdata; struct kernfs_node *new_parent = new_dir->i_private; struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; + int ret; if (!kdops || !kdops->rename) return -EPERM; - return kdops->rename(kn, new_parent, new_dentry->d_name.name); + if (!kernfs_get_active(kn)) + return -ENODEV; + + if (!kernfs_get_active(new_parent)) { + kernfs_put_active(kn); + return -ENODEV; + } + + ret = kdops->rename(kn, new_parent, new_dentry->d_name.name); + + kernfs_put_active(new_parent); + kernfs_put_active(kn); + return ret; } const struct inode_operations kernfs_dir_iops = { diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 02ac334..58a131d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -111,7 +111,8 @@ struct kernfs_node { * kernfs_dir_ops may be specified on kernfs_create_root() to support * directory manipulation syscalls. These optional callbacks are invoked * on the matching syscalls and can perform any kernfs operations which - * don't necessarily have to be the exact operation requested. + * don't necessarily have to be the exact operation requested. An active + * reference is held for each kernfs_node parameter. */ struct kernfs_dir_ops { int (*mkdir)(struct kernfs_node *parent, const char *name, -- cgit v1.1 From 90c07c895c87d38db100b6afcb686ab3ef0d6a64 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:09 -0500 Subject: kernfs: rename kernfs_dir_ops to kernfs_syscall_ops We're gonna need non-dir syscall callbacks, which will make dir_ops a misnomer. Let's rename kernfs_dir_ops to kernfs_syscall_ops. This is pure rename. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 25 +++++++++++++------------ include/linux/kernfs.h | 18 +++++++++--------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index bfbfb48..f58d2f1 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -527,13 +527,14 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); /** * kernfs_create_root - create a new kernfs hierarchy - * @kdops: optional directory syscall operations for the hierarchy + * @scops: optional syscall operations for the hierarchy * @priv: opaque data associated with the new directory * * Returns the root of the new hierarchy on success, ERR_PTR() value on * failure. */ -struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) +struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, + void *priv) { struct kernfs_root *root; struct kernfs_node *kn; @@ -556,7 +557,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) kn->priv = priv; kn->dir.root = root; - root->dir_ops = kdops; + root->syscall_ops = scops; root->kn = kn; init_waitqueue_head(&root->deactivate_waitq); @@ -653,16 +654,16 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct kernfs_node *parent = dir->i_private; - struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; + struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; int ret; - if (!kdops || !kdops->mkdir) + if (!scops || !scops->mkdir) return -EPERM; if (!kernfs_get_active(parent)) return -ENODEV; - ret = kdops->mkdir(parent, dentry->d_name.name, mode); + ret = scops->mkdir(parent, dentry->d_name.name, mode); kernfs_put_active(parent); return ret; @@ -671,16 +672,16 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) { struct kernfs_node *kn = dentry->d_fsdata; - struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; + struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; int ret; - if (!kdops || !kdops->rmdir) + if (!scops || !scops->rmdir) return -EPERM; if (!kernfs_get_active(kn)) return -ENODEV; - ret = kdops->rmdir(kn); + ret = scops->rmdir(kn); kernfs_put_active(kn); return ret; @@ -691,10 +692,10 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, { struct kernfs_node *kn = old_dentry->d_fsdata; struct kernfs_node *new_parent = new_dir->i_private; - struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; + struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; int ret; - if (!kdops || !kdops->rename) + if (!scops || !scops->rename) return -EPERM; if (!kernfs_get_active(kn)) @@ -705,7 +706,7 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, return -ENODEV; } - ret = kdops->rename(kn, new_parent, new_dentry->d_name.name); + ret = scops->rename(kn, new_parent, new_dentry->d_name.name); kernfs_put_active(new_parent); kernfs_put_active(kn); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 58a131d..5ddc474 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -108,13 +108,13 @@ struct kernfs_node { }; /* - * kernfs_dir_ops may be specified on kernfs_create_root() to support - * directory manipulation syscalls. These optional callbacks are invoked - * on the matching syscalls and can perform any kernfs operations which - * don't necessarily have to be the exact operation requested. An active - * reference is held for each kernfs_node parameter. + * kernfs_syscall_ops may be specified on kernfs_create_root() to support + * syscalls. These optional callbacks are invoked on the matching syscalls + * and can perform any kernfs operations which don't necessarily have to be + * the exact operation requested. An active reference is held for each + * kernfs_node parameter. */ -struct kernfs_dir_ops { +struct kernfs_syscall_ops { int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); int (*rmdir)(struct kernfs_node *kn); @@ -128,7 +128,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct ida ino_ida; - struct kernfs_dir_ops *dir_ops; + struct kernfs_syscall_ops *syscall_ops; wait_queue_head_t deactivate_waitq; }; @@ -219,7 +219,7 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); -struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, +struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, void *priv); void kernfs_destroy_root(struct kernfs_root *root); @@ -273,7 +273,7 @@ static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } static inline struct kernfs_root * -kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) +kernfs_create_root(struct kernfs_syscall_ops *scops, void *priv) { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } -- cgit v1.1 From 6a7fed4eefddad48224f1c9d534b4e262f0897f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:10 -0500 Subject: kernfs: implement kernfs_syscall_ops->remount_fs() and ->show_options() Add two super_block related syscall callbacks ->remount_fs() and ->show_options() to kernfs_syscall_ops. These simply forward the matching super_operations. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 23 +++++++++++++++++++++++ include/linux/kernfs.h | 3 +++ 2 files changed, 26 insertions(+) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 0d6ce89..70cc698 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -19,10 +19,33 @@ struct kmem_cache *kernfs_node_cache; +static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct kernfs_root *root = kernfs_info(sb)->root; + struct kernfs_syscall_ops *scops = root->syscall_ops; + + if (scops && scops->remount_fs) + return scops->remount_fs(root, flags, data); + return 0; +} + +static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) +{ + struct kernfs_root *root = kernfs_root(dentry->d_fsdata); + struct kernfs_syscall_ops *scops = root->syscall_ops; + + if (scops && scops->show_options) + return scops->show_options(sf, root); + return 0; +} + static const struct super_operations kernfs_sops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, + + .remount_fs = kernfs_sop_remount_fs, + .show_options = kernfs_sop_show_options, }; static int kernfs_fill_super(struct super_block *sb) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5ddc474..5d5b7e9 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -115,6 +115,9 @@ struct kernfs_node { * kernfs_node parameter. */ struct kernfs_syscall_ops { + int (*remount_fs)(struct kernfs_root *root, int *flags, char *data); + int (*show_options)(struct seq_file *sf, struct kernfs_root *root); + int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); int (*rmdir)(struct kernfs_node *kn); -- cgit v1.1 From b9c9dad0c457d32cf8c7d2e413463c8414c7a7a7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:11 -0500 Subject: kernfs: add missing kernfs_active() checks in directory operations kernfs_iop_lookup(), kernfs_dir_pos() and kernfs_dir_next_pos() were missing kernfs_active() tests before using the found kernfs_node. As deactivated state is currently visible only while a node is being removed, this doesn't pose an actual problem. e.g. lookup succeeding on a deactivated node doesn't harm anything as the eventual file operations are gonna fail and those failures are indistinguishible from the cases in which the lookups had happened before the node was deactivated. However, we're gonna allow new nodes to be created deactivated and then activated explicitly by the kernfs user when it sees fit. This is to support atomically making multiple nodes visible to userland and thus those nodes must not be visible to userland before activated. Let's plug the lookup and readdir holes so that deactivated nodes are invisible to userland. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index f58d2f1..89f8462 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -629,7 +629,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, kn = kernfs_find_ns(parent, dentry->d_name.name, ns); /* no such entry */ - if (!kn) { + if (!kn || !kernfs_active(kn)) { ret = NULL; goto out_unlock; } @@ -1112,8 +1112,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, break; } } - /* Skip over entries in the wrong namespace */ - while (pos && pos->ns != ns) { + /* Skip over entries which are dying/dead or in the wrong namespace */ + while (pos && (!kernfs_active(pos) || pos->ns != ns)) { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; @@ -1127,14 +1127,15 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns, struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) { pos = kernfs_dir_pos(ns, parent, ino, pos); - if (pos) + if (pos) { do { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; else pos = rb_to_kn(node); - } while (pos && pos->ns != ns); + } while (pos && (!kernfs_active(pos) || pos->ns != ns)); + } return pos; } -- cgit v1.1 From d35258ef702cca0c4e66d799f8e38b78c02ce8a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:12 -0500 Subject: kernfs: allow nodes to be created in the deactivated state Currently, kernfs_nodes are made visible to userland on creation, which makes it difficult for kernfs users to atomically succeed or fail creation of multiple nodes. In addition, if something fails after creating some nodes, the created nodes might already be in use and their active refs need to be drained for removal, which has the potential to introduce tricky reverse locking dependency on active_ref depending on how the error path is synchronized. This patch introduces per-root flag KERNFS_ROOT_CREATE_DEACTIVATED. If set, all nodes under the root are created in the deactivated state and stay invisible to userland until explicitly enabled by the new kernfs_activate() API. Also, nodes which have never been activated are guaranteed to bypass draining on removal thus allowing error paths to not worry about lockding dependency on active_ref draining. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 71 +++++++++++++++++++++++++++++++++++++++++++++----- fs/sysfs/mount.c | 2 +- include/linux/kernfs.h | 15 +++++++++-- 3 files changed, 78 insertions(+), 10 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 89f8462..3cff0a2 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -435,7 +435,7 @@ int kernfs_add_one(struct kernfs_node *kn) goto out_unlock; ret = -ENOENT; - if (!kernfs_active(parent)) + if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) goto out_unlock; kn->hash = kernfs_name_hash(kn->name, kn->ns); @@ -451,9 +451,19 @@ int kernfs_add_one(struct kernfs_node *kn) ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; } - /* Mark the entry added into directory tree */ - atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); - ret = 0; + mutex_unlock(&kernfs_mutex); + + /* + * Activate the new node unless CREATE_DEACTIVATED is requested. + * If not activated here, the kernfs user is responsible for + * activating the node with kernfs_activate(). A node which hasn't + * been activated is not visible to userland and its removal won't + * trigger deactivation. + */ + if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) + kernfs_activate(kn); + return 0; + out_unlock: mutex_unlock(&kernfs_mutex); return ret; @@ -528,13 +538,14 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); /** * kernfs_create_root - create a new kernfs hierarchy * @scops: optional syscall operations for the hierarchy + * @flags: KERNFS_ROOT_* flags * @priv: opaque data associated with the new directory * * Returns the root of the new hierarchy on success, ERR_PTR() value on * failure. */ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, - void *priv) + unsigned int flags, void *priv) { struct kernfs_root *root; struct kernfs_node *kn; @@ -553,14 +564,17 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, return ERR_PTR(-ENOMEM); } - atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); kn->priv = priv; kn->dir.root = root; root->syscall_ops = scops; + root->flags = flags; root->kn = kn; init_waitqueue_head(&root->deactivate_waitq); + if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) + kernfs_activate(kn); + return root; } @@ -783,6 +797,40 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, return pos->parent; } +/** + * kernfs_activate - activate a node which started deactivated + * @kn: kernfs_node whose subtree is to be activated + * + * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node + * needs to be explicitly activated. A node which hasn't been activated + * isn't visible to userland and deactivation is skipped during its + * removal. This is useful to construct atomic init sequences where + * creation of multiple nodes should either succeed or fail atomically. + * + * The caller is responsible for ensuring that this function is not called + * after kernfs_remove*() is invoked on @kn. + */ +void kernfs_activate(struct kernfs_node *kn) +{ + struct kernfs_node *pos; + + mutex_lock(&kernfs_mutex); + + pos = NULL; + while ((pos = kernfs_next_descendant_post(pos, kn))) { + if (!pos || (pos->flags & KERNFS_ACTIVATED)) + continue; + + WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); + WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); + + atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); + pos->flags |= KERNFS_ACTIVATED; + } + + mutex_unlock(&kernfs_mutex); +} + static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos; @@ -817,7 +865,16 @@ static void __kernfs_remove(struct kernfs_node *kn) */ kernfs_get(pos); - kernfs_drain(pos); + /* + * Drain iff @kn was activated. This avoids draining and + * its lockdep annotations for nodes which have never been + * activated and allows embedding kernfs_remove() in create + * error paths without worrying about draining. + */ + if (kn->flags & KERNFS_ACTIVATED) + kernfs_drain(pos); + else + WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); /* * kernfs_unlink_sibling() succeeds once per node. Use it diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 6211230..5c7fdd9 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -62,7 +62,7 @@ int __init sysfs_init(void) { int err; - sysfs_root = kernfs_create_root(NULL, NULL); + sysfs_root = kernfs_create_root(NULL, 0, NULL); if (IS_ERR(sysfs_root)) return PTR_ERR(sysfs_root); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5d5b7e9..4520c86 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -38,6 +38,7 @@ enum kernfs_node_type { #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK enum kernfs_node_flag { + KERNFS_ACTIVATED = 0x0010, KERNFS_NS = 0x0020, KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, @@ -47,6 +48,11 @@ enum kernfs_node_flag { KERNFS_SUICIDED = 0x0800, }; +/* @flags for kernfs_create_root() */ +enum kernfs_root_flag { + KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, +}; + /* type-specific structures for kernfs_node union members */ struct kernfs_elem_dir { unsigned long subdirs; @@ -128,6 +134,7 @@ struct kernfs_syscall_ops { struct kernfs_root { /* published fields */ struct kernfs_node *kn; + unsigned int flags; /* KERNFS_ROOT_* flags */ /* private fields, do not use outside kernfs proper */ struct ida ino_ida; @@ -223,7 +230,7 @@ void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, - void *priv); + unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, @@ -239,6 +246,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); +void kernfs_activate(struct kernfs_node *kn); void kernfs_remove(struct kernfs_node *kn); void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); @@ -276,7 +284,8 @@ static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } static inline struct kernfs_root * -kernfs_create_root(struct kernfs_syscall_ops *scops, void *priv) +kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, + void *priv) { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } @@ -298,6 +307,8 @@ kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target) { return ERR_PTR(-ENOSYS); } +static inline void kernfs_activate(struct kernfs_node *kn) { } + static inline void kernfs_remove(struct kernfs_node *kn) { } static inline bool kernfs_remove_self(struct kernfs_node *kn) -- cgit v1.1 From 4d3773c4bb41ed5228f1ab7a4a52b79e17b10515 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:13 -0500 Subject: kernfs: implement kernfs_ops->atomic_write_len A write to a kernfs_node is buffered through a kernel buffer. Writes <= PAGE_SIZE are performed atomically, while larger ones are executed in PAGE_SIZE chunks. While this is enough for sysfs, cgroup which is scheduled to be converted to use kernfs needs a bit more control over it. This patch adds kernfs_ops->atomic_write_len. If not set (zero), the behavior stays the same. If set, writes upto the size are executed atomically and larger writes are rejected with -E2BIG. A different implementation strategy would be allowing configuring chunking size while making the original write size available to the write method; however, such strategy, while being more complicated, doesn't really buy anything. If the write implementation has to handle chunking, the specific chunk size shouldn't matter all that much. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 49 +++++++++++++++++++++++++++++++------------------ include/linux/kernfs.h | 8 ++++++-- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 10a8c91..ddcb471 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -252,19 +252,9 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct kernfs_open_file *of = kernfs_of(file); - ssize_t len = min_t(size_t, count, PAGE_SIZE); const struct kernfs_ops *ops; - char *buf; - - buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, user_buf, len)) { - len = -EFAULT; - goto out_free; - } - buf[len] = '\0'; /* guarantee string termination */ + char *buf = NULL; + ssize_t len; /* * @of->mutex nests outside active ref and is just to ensure that @@ -273,22 +263,45 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) { mutex_unlock(&of->mutex); - len = -ENODEV; - goto out_free; + return -ENODEV; } ops = kernfs_ops(of->kn); - if (ops->write) - len = ops->write(of, buf, len, *ppos); - else + if (!ops->write) { len = -EINVAL; + goto out_unlock; + } + + if (ops->atomic_write_len) { + len = count; + if (len > ops->atomic_write_len) { + len = -E2BIG; + goto out_unlock; + } + } else { + len = min_t(size_t, count, PAGE_SIZE); + } + + buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) { + len = -ENOMEM; + goto out_unlock; + } + if (copy_from_user(buf, user_buf, len)) { + len = -EFAULT; + goto out_unlock; + } + buf[len] = '\0'; /* guarantee string termination */ + + len = ops->write(of, buf, len, *ppos); +out_unlock: kernfs_put_active(of->kn); mutex_unlock(&of->mutex); if (len > 0) *ppos += len; -out_free: + kfree(buf); return len; } diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 4520c86..47f5235 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -178,9 +178,13 @@ struct kernfs_ops { loff_t off); /* - * write() is bounced through kernel buffer and a write larger than - * PAGE_SIZE results in partial operation of PAGE_SIZE. + * write() is bounced through kernel buffer. If atomic_write_len + * is not set, a write larger than PAGE_SIZE results in partial + * operations of PAGE_SIZE chunks. If atomic_write_len is set, + * writes upto the specified size are executed atomically but + * larger ones are rejected with -E2BIG. */ + size_t atomic_write_len; ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); -- cgit v1.1 From 2536390da0d300b2734c721235c082498879841d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:14 -0500 Subject: kernfs: add kernfs_open_file->priv Add a private data field to be used by kernfs file operations. This generally makes sense and will be used by cgroup. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 47f5235..9ca0f09 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -146,6 +146,7 @@ struct kernfs_open_file { /* published fields */ struct kernfs_node *kn; struct file *file; + void *priv; /* private fields, do not use outside kernfs proper */ struct mutex mutex; -- cgit v1.1 From 0c23b2259a4850494e2c53e864ea840597c6cdd3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:15 -0500 Subject: kernfs: implement kernfs_node_from_dentry(), kernfs_root_from_sb() and kernfs_rename() Implement helpers to determine node from dentry and root from super_block. Also add a kernfs_rename_ns() wrapper which assumes NULL namespace. These generally make sense and will be used by cgroup. v2: Some dummy implementations for !CONFIG_SYSFS was missing. Fixed. Reported by kbuild test robot. Signed-off-by: Tejun Heo Cc: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 18 ++++++++++++++++++ fs/kernfs/mount.c | 14 ++++++++++++++ include/linux/kernfs.h | 16 ++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 3cff0a2..42a250f 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -350,6 +350,24 @@ const struct dentry_operations kernfs_dops = { .d_release = kernfs_dop_release, }; +/** + * kernfs_node_from_dentry - determine kernfs_node associated with a dentry + * @dentry: the dentry in question + * + * Return the kernfs_node associated with @dentry. If @dentry is not a + * kernfs one, %NULL is returned. + * + * While the returned kernfs_node will stay accessible as long as @dentry + * is accessible, the returned node can be in any state and the caller is + * fully responsible for determining what's accessible. + */ +struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) +{ + if (dentry->d_op == &kernfs_dops) + return dentry->d_fsdata; + return NULL; +} + static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, const char *name, umode_t mode, unsigned flags) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 70cc698..e5b28b0 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -48,6 +48,20 @@ static const struct super_operations kernfs_sops = { .show_options = kernfs_sop_show_options, }; +/** + * kernfs_root_from_sb - determine kernfs_root associated with a super_block + * @sb: the super_block in question + * + * Return the kernfs_root associated with @sb. If @sb is not a kernfs one, + * %NULL is returned. + */ +struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) +{ + if (sb->s_op == &kernfs_sops) + return kernfs_info(sb)->root; + return NULL; +} + static int kernfs_fill_super(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 9ca0f09..9c89904 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -234,6 +234,9 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); +struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); +struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); + struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); @@ -288,6 +291,12 @@ kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } +static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) +{ return NULL; } + +static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) +{ return NULL; } + static inline struct kernfs_root * kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) @@ -388,6 +397,13 @@ static inline int kernfs_remove_by_name(struct kernfs_node *parent, return kernfs_remove_by_name_ns(parent, name, NULL); } +static inline int kernfs_rename(struct kernfs_node *kn, + struct kernfs_node *new_parent, + const char *new_name) +{ + return kernfs_rename_ns(kn, new_parent, new_name, NULL); +} + static inline struct dentry * kernfs_mount(struct file_system_type *fs_type, int flags, struct kernfs_root *root) -- cgit v1.1 From 3eef34ad7dc369b7183ec383908aff3da2f6e5ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Feb 2014 13:32:07 -0500 Subject: kernfs: implement kernfs_get_parent(), kernfs_name/path() and friends kernfs_node->parent and ->name are currently marked as "published" indicating that kernfs users may access them directly; however, those fields may get updated by kernfs_rename[_ns]() and unrestricted access may lead to erroneous values or oops. Protect ->parent and ->name updates with a irq-safe spinlock kernfs_rename_lock and implement the following accessors for these fields. * kernfs_name() - format the node's name into the specified buffer * kernfs_path() - format the node's path into the specified buffer * pr_cont_kernfs_name() - pr_cont a node's name (doesn't need buffer) * pr_cont_kernfs_path() - pr_cont a node's path (doesn't need buffer) * kernfs_get_parent() - pin and return a node's parent All can be called under any context. The recursive sysfs_pathname() in fs/sysfs/dir.c is replaced with kernfs_path() and sysfs_rename_dir_ns() is updated to use kernfs_get_parent() instead of dereferencing parent directly. v2: Dummy definition of kernfs_path() for !CONFIG_KERNFS was missing static inline making it cause a lot of build warnings. Add it. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/sysfs/dir.c | 44 ++++--------- include/linux/kernfs.h | 26 +++++++- 3 files changed, 203 insertions(+), 42 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 42a250f..a347792 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -19,6 +19,8 @@ #include "kernfs-internal.h" DEFINE_MUTEX(kernfs_mutex); +static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -37,6 +39,141 @@ static bool kernfs_lockdep(struct kernfs_node *kn) #endif } +static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) +{ + return strlcpy(buf, kn->parent ? kn->name : "/", buflen); +} + +static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf, + size_t buflen) +{ + char *p = buf + buflen; + int len; + + *--p = '\0'; + + do { + len = strlen(kn->name); + if (p - buf < len + 1) { + buf[0] = '\0'; + p = NULL; + break; + } + p -= len; + memcpy(p, kn->name, len); + *--p = '/'; + kn = kn->parent; + } while (kn && kn->parent); + + return p; +} + +/** + * kernfs_name - obtain the name of a given node + * @kn: kernfs_node of interest + * @buf: buffer to copy @kn's name into + * @buflen: size of @buf + * + * Copies the name of @kn into @buf of @buflen bytes. The behavior is + * similar to strlcpy(). It returns the length of @kn's name and if @buf + * isn't long enough, it's filled upto @buflen-1 and nul terminated. + * + * This function can be called from any context. + */ +int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + ret = kernfs_name_locked(kn, buf, buflen); + spin_unlock_irqrestore(&kernfs_rename_lock, flags); + return ret; +} + +/** + * kernfs_path - build full path of a given node + * @kn: kernfs_node of interest + * @buf: buffer to copy @kn's name into + * @buflen: size of @buf + * + * Builds and returns the full path of @kn in @buf of @buflen bytes. The + * path is built from the end of @buf so the returned pointer usually + * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated + * and %NULL is returned. + */ +char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) +{ + unsigned long flags; + char *p; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + p = kernfs_path_locked(kn, buf, buflen); + spin_unlock_irqrestore(&kernfs_rename_lock, flags); + return p; +} + +/** + * pr_cont_kernfs_name - pr_cont name of a kernfs_node + * @kn: kernfs_node of interest + * + * This function can be called from any context. + */ +void pr_cont_kernfs_name(struct kernfs_node *kn) +{ + unsigned long flags; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + + kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + pr_cont("%s", kernfs_pr_cont_buf); + + spin_unlock_irqrestore(&kernfs_rename_lock, flags); +} + +/** + * pr_cont_kernfs_path - pr_cont path of a kernfs_node + * @kn: kernfs_node of interest + * + * This function can be called from any context. + */ +void pr_cont_kernfs_path(struct kernfs_node *kn) +{ + unsigned long flags; + char *p; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + + p = kernfs_path_locked(kn, kernfs_pr_cont_buf, + sizeof(kernfs_pr_cont_buf)); + if (p) + pr_cont("%s", p); + else + pr_cont(""); + + spin_unlock_irqrestore(&kernfs_rename_lock, flags); +} + +/** + * kernfs_get_parent - determine the parent node and pin it + * @kn: kernfs_node of interest + * + * Determines @kn's parent, pins and returns it. This function can be + * called from any context. + */ +struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) +{ + struct kernfs_node *parent; + unsigned long flags; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + parent = kn->parent; + kernfs_get(parent); + spin_unlock_irqrestore(&kernfs_rename_lock, flags); + + return parent; +} + /** * kernfs_name_hash * @name: Null terminated string to hash @@ -1103,8 +1240,14 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) { + struct kernfs_node *old_parent; + const char *old_name = NULL; int error; + /* can't move or rename root */ + if (!kn->parent) + return -EINVAL; + mutex_lock(&kernfs_mutex); error = -ENOENT; @@ -1126,13 +1269,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, new_name = kstrdup(new_name, GFP_KERNEL); if (!new_name) goto out; - - if (kn->flags & KERNFS_STATIC_NAME) - kn->flags &= ~KERNFS_STATIC_NAME; - else - kfree(kn->name); - - kn->name = new_name; + } else { + new_name = NULL; } /* @@ -1140,12 +1278,29 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, */ kernfs_unlink_sibling(kn); kernfs_get(new_parent); - kernfs_put(kn->parent); - kn->ns = new_ns; - kn->hash = kernfs_name_hash(kn->name, kn->ns); + + /* rename_lock protects ->parent and ->name accessors */ + spin_lock_irq(&kernfs_rename_lock); + + old_parent = kn->parent; kn->parent = new_parent; + + kn->ns = new_ns; + if (new_name) { + if (!(kn->flags & KERNFS_STATIC_NAME)) + old_name = kn->name; + kn->flags &= ~KERNFS_STATIC_NAME; + kn->name = new_name; + } + + spin_unlock_irq(&kernfs_rename_lock); + + kn->hash = kernfs_name_hash(new_name, new_ns); kernfs_link_sibling(kn); + kernfs_put(old_parent); + kfree(old_name); + error = 0; out: mutex_unlock(&kernfs_mutex); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index ee0d761..0b45ff4 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -19,39 +19,18 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock); -/** - * sysfs_pathname - return full path to sysfs dirent - * @kn: kernfs_node whose path we want - * @path: caller allocated buffer of size PATH_MAX - * - * Gives the name "/" to the sysfs_root entry; any path returned - * is relative to wherever sysfs is mounted. - */ -static char *sysfs_pathname(struct kernfs_node *kn, char *path) -{ - if (kn->parent) { - sysfs_pathname(kn->parent, path); - strlcat(path, "/", PATH_MAX); - } - strlcat(path, kn->name, PATH_MAX); - return path; -} - void sysfs_warn_dup(struct kernfs_node *parent, const char *name) { - char *path; + char *buf, *path = NULL; - path = kzalloc(PATH_MAX, GFP_KERNEL); - if (path) { - sysfs_pathname(parent, path); - strlcat(path, "/", PATH_MAX); - strlcat(path, name, PATH_MAX); - } + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (buf) + path = kernfs_path(parent, buf, PATH_MAX); - WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", - path ? path : name); + WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n", + path, name); - kfree(path); + kfree(buf); } /** @@ -122,9 +101,13 @@ void sysfs_remove_dir(struct kobject *kobj) int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, const void *new_ns) { - struct kernfs_node *parent = kobj->sd->parent; + struct kernfs_node *parent; + int ret; - return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); + parent = kernfs_get_parent(kobj->sd); + ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); + kernfs_put(parent); + return ret; } int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, @@ -133,7 +116,6 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, struct kernfs_node *kn = kobj->sd; struct kernfs_node *new_parent; - BUG_ON(!kn->parent); new_parent = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : sysfs_root_kn; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 9c89904..8736ee8 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -91,7 +91,12 @@ struct kernfs_node { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif - /* the following two fields are published */ + /* + * Use kernfs_get_parent() and kernfs_name/path() instead of + * accessing the following two fields directly. If the node is + * never moved to a different parent, it is safe to access the + * parent directly. + */ struct kernfs_node *parent; const char *name; @@ -229,6 +234,12 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) return kn->flags & KERNFS_NS; } +int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); +char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, + size_t buflen); +void pr_cont_kernfs_name(struct kernfs_node *kn); +void pr_cont_kernfs_path(struct kernfs_node *kn); +struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); void kernfs_get(struct kernfs_node *kn); @@ -283,6 +294,19 @@ static inline void kernfs_enable_ns(struct kernfs_node *kn) { } static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return false; } +static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) +{ return -ENOSYS; } + +static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, + size_t buflen) +{ return NULL; } + +static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } +static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } + +static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) +{ return NULL; } + static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) -- cgit v1.1 From fa4cd451cceb77e97432b91fcf50a7e4a7361e29 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Feb 2014 13:32:07 -0500 Subject: sysfs, kobject: add sysfs wrapper for kernfs_enable_ns() Currently, kobject is invoking kernfs_enable_ns() directly. This is fine now as sysfs and kernfs are enabled and disabled together. If sysfs is disabled, kernfs_enable_ns() is switched to dummy implementation too and everything is fine; however, kernfs will soon have its own config option CONFIG_KERNFS and !SYSFS && KERNFS will be possible, which can make kobject call into non-dummy kernfs_enable_ns() with NULL kernfs_node pointers leading to an oops. Introduce sysfs_enable_ns() which is a wrapper around kernfs_enable_ns() so that it can be made a noop depending only on CONFIG_SYSFS regardless of the planned CONFIG_KERNFS. Signed-off-by: Tejun Heo Reported-by: Fengguang Wu Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 9 +++++++++ lib/kobject.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 14df054..fdaa0c6 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -244,6 +244,11 @@ void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); int __must_check sysfs_init(void); +static inline void sysfs_enable_ns(struct kernfs_node *kn) +{ + return kernfs_enable_ns(kn); +} + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) @@ -416,6 +421,10 @@ static inline int __must_check sysfs_init(void) return 0; } +static inline void sysfs_enable_ns(struct kernfs_node *kn) +{ +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, diff --git a/lib/kobject.c b/lib/kobject.c index cb14aea..58751bb 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -94,7 +94,7 @@ static int create_dir(struct kobject *kobj) BUG_ON(ops->type >= KOBJ_NS_TYPES); BUG_ON(!kobj_ns_type_registered(ops->type)); - kernfs_enable_ns(kobj->sd); + sysfs_enable_ns(kobj->sd); } return 0; -- cgit v1.1 From ba341d55a420ab4fdd1a53fd395fd59bd65de880 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:17 -0500 Subject: kernfs: add CONFIG_KERNFS As sysfs was kernfs's only user, kernfs has been piggybacking on CONFIG_SYSFS; however, kernfs is scheduled to grow a new user very soon. Introduce a separate config option CONFIG_KERNFS which is to be selected by kernfs users. Signed-off-by: Tejun Heo Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/Kconfig | 1 + fs/Makefile | 3 ++- fs/kernfs/Kconfig | 7 +++++++ fs/sysfs/Kconfig | 1 + include/linux/kernfs.h | 6 +++--- 5 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 fs/kernfs/Kconfig diff --git a/fs/Kconfig b/fs/Kconfig index 7385e54..312393f 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -96,6 +96,7 @@ endif # BLOCK menu "Pseudo filesystems" source "fs/proc/Kconfig" +source "fs/kernfs/Kconfig" source "fs/sysfs/Kconfig" config TMPFS diff --git a/fs/Makefile b/fs/Makefile index 47ac07b..f9cb987 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -52,7 +52,8 @@ obj-$(CONFIG_FHANDLE) += fhandle.o obj-y += quota/ obj-$(CONFIG_PROC_FS) += proc/ -obj-$(CONFIG_SYSFS) += sysfs/ kernfs/ +obj-$(CONFIG_KERNFS) += kernfs/ +obj-$(CONFIG_SYSFS) += sysfs/ obj-$(CONFIG_CONFIGFS_FS) += configfs/ obj-y += devpts/ diff --git a/fs/kernfs/Kconfig b/fs/kernfs/Kconfig new file mode 100644 index 0000000..397b5f7 --- /dev/null +++ b/fs/kernfs/Kconfig @@ -0,0 +1,7 @@ +# +# KERNFS should be selected by its users +# + +config KERNFS + bool + default n diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig index 8c41fea..b275601 100644 --- a/fs/sysfs/Kconfig +++ b/fs/sysfs/Kconfig @@ -1,6 +1,7 @@ config SYSFS bool "sysfs file system support" if EXPERT default y + select KERNFS help The sysfs filesystem is a virtual filesystem that the kernel uses to export internal kernel objects, their attributes, and their diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 8736ee8..649497a 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -201,7 +201,7 @@ struct kernfs_ops { #endif }; -#ifdef CONFIG_SYSFS +#ifdef CONFIG_KERNFS static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { @@ -284,7 +284,7 @@ void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); -#else /* CONFIG_SYSFS */ +#else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return 0; } /* whatever */ @@ -379,7 +379,7 @@ static inline void kernfs_kill_sb(struct super_block *sb) { } static inline void kernfs_init(void) { } -#endif /* CONFIG_SYSFS */ +#endif /* CONFIG_KERNFS */ static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) -- cgit v1.1 From 9561a8961c708ff6ba3e71a817af0f16bdc1d885 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Feb 2014 17:57:09 -0500 Subject: kernfs: fix hash calculation in kernfs_rename_ns() 3eef34ad7dc3 ("kernfs: implement kernfs_get_parent(), kernfs_name/path() and friends") restructured kernfs_rename_ns() such that new name assignment happens under kernfs_rename_lock; unfortunately, it mistakenly passed NULL to kernfs_name_hash() to calculate the new hash if the name hasn't changed, which can lead to oops. Fix it by using kn->name and kn->ns when calculating the new hash. Signed-off-by: Tejun Heo Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a347792..a0f99b7 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1295,7 +1295,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, spin_unlock_irq(&kernfs_rename_lock); - kn->hash = kernfs_name_hash(new_name, new_ns); + kn->hash = kernfs_name_hash(kn->name, kn->ns); kernfs_link_sibling(kn); kernfs_put(old_parent); -- cgit v1.1 From 9383f4c6b66256c039c65ddc141f0caeeae51847 Mon Sep 17 00:00:00 2001 From: Josh Cartwright Date: Thu, 13 Feb 2014 22:00:43 -0600 Subject: ACPI / platform: drop redundant ACPI_HANDLE check The acpi_dev_pm_attach/_detach functions perform their own checks to ensure the device has an ACPI companion. It is not necessary for the caller to do so. This mirrors what other busses with ACPI dev PM support do (i2c, spi, sdio). Cc: Len Brown Acked-by: Rafael J. Wysocki Signed-off-by: Josh Cartwright Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index bc78848..e714709 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -481,11 +481,10 @@ static int platform_drv_probe(struct device *_dev) struct platform_device *dev = to_platform_device(_dev); int ret; - if (ACPI_HANDLE(_dev)) - acpi_dev_pm_attach(_dev, true); + acpi_dev_pm_attach(_dev, true); ret = drv->probe(dev); - if (ret && ACPI_HANDLE(_dev)) + if (ret) acpi_dev_pm_detach(_dev, true); if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) { @@ -508,8 +507,7 @@ static int platform_drv_remove(struct device *_dev) int ret; ret = drv->remove(dev); - if (ACPI_HANDLE(_dev)) - acpi_dev_pm_detach(_dev, true); + acpi_dev_pm_detach(_dev, true); return ret; } @@ -520,8 +518,7 @@ static void platform_drv_shutdown(struct device *_dev) struct platform_device *dev = to_platform_device(_dev); drv->shutdown(dev); - if (ACPI_HANDLE(_dev)) - acpi_dev_pm_detach(_dev, true); + acpi_dev_pm_detach(_dev, true); } /** -- cgit v1.1 From f41c593454943b80a2017c8a2a3d6b1d0b0a6f89 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 14 Feb 2014 16:57:27 +0800 Subject: kernfs: fix kernfs_node_from_dentry() Currently kernfs_node_from_dentry() returns NULL for root dentry, because root_dentry->d_op == NULL. Due to this bug cgroupstats_build() returns -EINVAL for root cgroup. # mount -t cgroup -o cpuacct /cgroup # Documentation/accounting/getdelays -C /cgroup fatal reply error, errno -22 With this fix: # Documentation/accounting/getdelays -C /cgroup sleeping 305, blocked 0, running 1, stopped 0, uninterruptible 1 Signed-off-by: Li Zefan Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 2 +- fs/kernfs/kernfs-internal.h | 1 + fs/kernfs/mount.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a0f99b7..8245d3b 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -500,7 +500,7 @@ const struct dentry_operations kernfs_dops = { */ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) { - if (dentry->d_op == &kernfs_dops) + if (dentry->d_sb->s_op == &kernfs_sops) return dentry->d_fsdata; return NULL; } diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index a91d7a1..8be13b2 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -65,6 +65,7 @@ struct kernfs_super_info { }; #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) +extern const struct super_operations kernfs_sops; extern struct kmem_cache *kernfs_node_cache; /* diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index e5b28b0..405279b 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -39,7 +39,7 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) return 0; } -static const struct super_operations kernfs_sops = { +const struct super_operations kernfs_sops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, -- cgit v1.1 From 4272b9611c30f99f51590085998129480f2fe45e Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 21 Jan 2014 16:23:10 -0500 Subject: drivers/base: delete non-required instances of include None of these files are actually using any __init type directives and hence don't need to include . Most are just a left over from __devinit and __cpuinit removal, or simply due to code getting copied from one driver to the next. Cc: Len Brown Signed-off-by: Paul Gortmaker Acked-by: Pavel Machek Acked-by: Rafael J. Wysocki Acked-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/attribute_container.c | 1 - drivers/base/power/clock_ops.c | 1 - drivers/base/power/common.c | 1 - drivers/base/power/domain.c | 1 - drivers/base/power/domain_governor.c | 1 - drivers/base/power/opp.c | 1 - drivers/base/regmap/regmap-i2c.c | 1 - drivers/base/regmap/regmap-mmio.c | 1 - drivers/base/regmap/regmap-spi.c | 1 - drivers/base/topology.c | 1 - 10 files changed, 10 deletions(-) diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index ecc1929..b84ca8f 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -12,7 +12,6 @@ */ #include -#include #include #include #include diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index e870bbe..b99e6c0 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -6,7 +6,6 @@ * This file is released under the GPLv2. */ -#include #include #include #include diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 5da9140..df2e5ee 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -6,7 +6,6 @@ * This file is released under the GPLv2. */ -#include #include #include #include diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index bfb8955..921b192 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -6,7 +6,6 @@ * This file is released under the GPLv2. */ -#include #include #include #include diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 28dee30..a089e3b 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -6,7 +6,6 @@ * This file is released under the GPLv2. */ -#include #include #include #include diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index fa41874..2553867 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c index fa6bf52..ebd1895 100644 --- a/drivers/base/regmap/regmap-i2c.c +++ b/drivers/base/regmap/regmap-i2c.c @@ -13,7 +13,6 @@ #include #include #include -#include static int regmap_i2c_write(void *context, const void *data, size_t count) { diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c index 81f9775..4410cb2 100644 --- a/drivers/base/regmap/regmap-mmio.c +++ b/drivers/base/regmap/regmap-mmio.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c index 37f12ae..0eb3097 100644 --- a/drivers/base/regmap/regmap-spi.c +++ b/drivers/base/regmap/regmap-spi.c @@ -12,7 +12,6 @@ #include #include -#include #include #include "internal.h" diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 94ffee3..ad9d177 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -23,7 +23,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ -#include #include #include #include -- cgit v1.1 From b12b73f1ca7763a94e0949a0d86eea40b5f69ea6 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 28 Jan 2014 05:06:21 -0800 Subject: firmware: fix google/gsmi duplicate efivars_sysfs_init() Starting in commit e14ab23dde12b80db4c94b684a2e485b72b16af3, efivars_sysfs_init() is called both by itself as an init function, and by drivers/firmware/google/gsmi.c gsmi_init(). This results in runtime warnings such as the following: [ 5.651330] WARNING: at fs/sysfs/dir.c:530 sysfs_add_one+0xbd/0xe0() [ 5.657699] sysfs: cannot create duplicate filename '/firmware/gsmi/vars' Fixing this by removing the redundant efivars_sysfs_init() call in gsmi_init(). Tested: booted, checked that /firmware/gsmi/vars was still present and showed the expected contents. Signed-off-by: Michel Lespinasse Acked-by: Matt Fleming Acked-by: Mike Waychison Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/gsmi.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index e5a67b2..f1ab05e 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -892,13 +892,6 @@ static __init int gsmi_init(void) goto out_remove_sysfs_files; } - ret = efivars_sysfs_init(); - if (ret) { - printk(KERN_INFO "gsmi: Failed to create efivars files\n"); - efivars_unregister(&efivars); - goto out_remove_sysfs_files; - } - register_reboot_notifier(&gsmi_reboot_notifier); register_die_notifier(&gsmi_die_notifier); atomic_notifier_chain_register(&panic_notifier_list, -- cgit v1.1 From cb88759107292b4bf36d13c73129add580d4e26e Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 28 Jan 2014 05:06:22 -0800 Subject: firmware: google memconsole driver fixes The google memconsole driver is currently broken upstream, as it tries to read memory that is described as reserved in /proc/iomem, by dereferencing a pointer obtained through phys_to_virt(). This triggers a kernel fault as such regions are unmapped after early boot. The proper workaround is to use ioremap_cache() / iounmap() around such accesses. As some unrelated changes, I also converted some printks to use pr_info() and added some missing __init annotations. Tested: booted dbg build, verified I could read /sys/firmware/log Signed-off-by: Michel Lespinasse Acked-by: Mike Waychison Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/memconsole.c | 47 ++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/firmware/google/memconsole.c b/drivers/firmware/google/memconsole.c index 2a90ba6..2f569aa 100644 --- a/drivers/firmware/google/memconsole.c +++ b/drivers/firmware/google/memconsole.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #define BIOS_MEMCONSOLE_V1_MAGIC 0xDEADBABE @@ -41,15 +42,25 @@ struct biosmemcon_ebda { }; } __packed; -static char *memconsole_baseaddr; +static u32 memconsole_baseaddr; static size_t memconsole_length; static ssize_t memconsole_read(struct file *filp, struct kobject *kobp, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { - return memory_read_from_buffer(buf, count, &pos, memconsole_baseaddr, - memconsole_length); + char *memconsole; + ssize_t ret; + + memconsole = ioremap_cache(memconsole_baseaddr, memconsole_length); + if (!memconsole) { + pr_err("memconsole: ioremap_cache failed\n"); + return -ENOMEM; + } + ret = memory_read_from_buffer(buf, count, &pos, memconsole, + memconsole_length); + iounmap(memconsole); + return ret; } static struct bin_attribute memconsole_bin_attr = { @@ -58,43 +69,42 @@ static struct bin_attribute memconsole_bin_attr = { }; -static void found_v1_header(struct biosmemcon_ebda *hdr) +static void __init found_v1_header(struct biosmemcon_ebda *hdr) { - printk(KERN_INFO "BIOS console v1 EBDA structure found at %p\n", hdr); - printk(KERN_INFO "BIOS console buffer at 0x%.8x, " + pr_info("BIOS console v1 EBDA structure found at %p\n", hdr); + pr_info("BIOS console buffer at 0x%.8x, " "start = %d, end = %d, num = %d\n", hdr->v1.buffer_addr, hdr->v1.start, hdr->v1.end, hdr->v1.num_chars); memconsole_length = hdr->v1.num_chars; - memconsole_baseaddr = phys_to_virt(hdr->v1.buffer_addr); + memconsole_baseaddr = hdr->v1.buffer_addr; } -static void found_v2_header(struct biosmemcon_ebda *hdr) +static void __init found_v2_header(struct biosmemcon_ebda *hdr) { - printk(KERN_INFO "BIOS console v2 EBDA structure found at %p\n", hdr); - printk(KERN_INFO "BIOS console buffer at 0x%.8x, " + pr_info("BIOS console v2 EBDA structure found at %p\n", hdr); + pr_info("BIOS console buffer at 0x%.8x, " "start = %d, end = %d, num_bytes = %d\n", hdr->v2.buffer_addr, hdr->v2.start, hdr->v2.end, hdr->v2.num_bytes); memconsole_length = hdr->v2.end - hdr->v2.start; - memconsole_baseaddr = phys_to_virt(hdr->v2.buffer_addr - + hdr->v2.start); + memconsole_baseaddr = hdr->v2.buffer_addr + hdr->v2.start; } /* * Search through the EBDA for the BIOS Memory Console, and * set the global variables to point to it. Return true if found. */ -static bool found_memconsole(void) +static bool __init found_memconsole(void) { unsigned int address; size_t length, cur; address = get_bios_ebda(); if (!address) { - printk(KERN_INFO "BIOS EBDA non-existent.\n"); + pr_info("BIOS EBDA non-existent.\n"); return false; } @@ -122,7 +132,7 @@ static bool found_memconsole(void) } } - printk(KERN_INFO "BIOS console EBDA structure not found!\n"); + pr_info("BIOS console EBDA structure not found!\n"); return false; } @@ -139,8 +149,6 @@ MODULE_DEVICE_TABLE(dmi, memconsole_dmi_table); static int __init memconsole_init(void) { - int ret; - if (!dmi_check_system(memconsole_dmi_table)) return -ENODEV; @@ -148,10 +156,7 @@ static int __init memconsole_init(void) return -ENODEV; memconsole_bin_attr.size = memconsole_length; - - ret = sysfs_create_bin_file(firmware_kobj, &memconsole_bin_attr); - - return ret; + return sysfs_create_bin_file(firmware_kobj, &memconsole_bin_attr); } static void __exit memconsole_exit(void) -- cgit v1.1 From 2b1278cb651786648ba6dad285a6c0873c6788e1 Mon Sep 17 00:00:00 2001 From: zhang jun Date: Thu, 13 Feb 2014 15:18:47 +0800 Subject: firmware: give a protection when map page failed so, we need give a protection and return a error value. [ 7341.474236] [drm:do_intel_finish_page_flip] *ERROR* invalid or inactive unpin_work! [ 7341.494464] atomisp-css2400b0_v21 0000:00:03.0: unhandled css stored event: 0x20 [ 7341.503627] vmap allocation for size 208896 failed: use vmalloc= to increase size.<=================== map failed [ 7341.507135] [drm:do_intel_finish_page_flip] *ERROR* invalid or inactive unpin_work! [ 7341.503848] BUG: unable to handle kernel NULL pointer dereference at (null) [ 7341.520394] IP: [] sst_load_all_modules_elf+0x1bb/0x850 [ 7341.527216] *pdpt = 0000000030dfe001 *pde = 0000000000000000 [ 7341.533640] Oops: 0000 [#1] PREEMPT SMP [ 7341.540360] [drm:do_intel_finish_page_flip] *ERROR* invalid or inactive unpin_work! [ 7341.538037] Modules linked in: atomisp_css2400b0_v21 lm3554 ov2722 imx1x5 atmel_mxt_ts vxd392 videobuf_vmalloc videobuf_core lm_dump(O) bcm_bt_lpm hdmi_audio bcm4334x(O) [ 7341.563531] CPU: 1 PID: 525 Comm: mediaserver Tainted: G W O 3.10.20-262518-ga83c053 #1 [ 7341.573253] task: f0994ec0 ti: f09f0000 task.ti: f09f0000 [ 7341.579284] EIP: 0060:[] EFLAGS: 00010246 CPU: 1 [ 7341.585415] EIP is at sst_load_all_modules_elf+0x1bb/0x850 [ 7341.591541] EAX: 00000000 EBX: e3595ba0 ECX: 00000000 EDX: 00031c1c [ 7341.598541] ESI: e04a0000 EDI: 00000000 EBP: f09f1d80 ESP: f09f1cf4 [ 7341.605542] DS: 007b ES: 007b FS: 00d8 GS: 003b SS: 0068 [ 7341.611573] CR0: 80050033 CR2: 00000000 CR3: 30db4000 CR4: 001007f0 [ 7341.618573] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 [ 7341.625575] DR6: ffff0ff0 DR7: 00000400 [ 7341.629856] Stack: [ 7341.632097] f09f1d57 00000019 c1d656d7 c1d658d3 c1d56409 00000f28 c1d64af9 18000103 [ 7341.640766] 01000001 00080000 c1f910a0 f326f4c8 00000034 f326f520 00000002 e04a02bc [ 7341.649465] 00000001 f326e014 c1f910b0 e04a0000 c0080100 00031c1c e3595ba0 c0080100 [ 7341.658149] Call Trace: [ 7341.660888] [] sst_post_download_byt+0x58/0xb0 [ 7341.666925] [] sst_load_fw+0xdc/0x510 [ 7341.672086] [] ? __mutex_lock_slowpath+0x250/0x370 [ 7341.678507] [] ? sub_preempt_count+0x55/0xe0 [ 7341.684346] [] sst_download_fw+0x14/0x60 [ 7341.689796] [] ? mutex_lock+0x23/0x30 [ 7341.694954] [] intel_sst_check_device+0x6c/0x120 [ 7341.701181] [] sst_set_generic_params+0x1b8/0x4a0 [ 7341.707504] [] ? sub_preempt_count+0x55/0xe0 [ 7341.713341] [] ? sub_preempt_count+0x55/0xe0 [ 7341.719178] [] ? __mutex_lock_slowpath+0x250/0x370 [ 7341.725600] [] ? __kmalloc_track_caller+0xe4/0x1d0 [ 7341.732022] [] sst_set_mixer_param+0x25/0x40 [ 7341.737859] [] lpe_mixer_ihf_set+0xb3/0x160 [ 7341.743602] [] snd_ctl_ioctl+0xa89/0xb40 [ 7341.749052] [] ? path_openat+0xa5/0x3d0 [ 7341.754409] [] ? avc_has_perm_flags+0xc7/0x170 [ 7341.760441] [] ? snd_ctl_elem_add_user+0x540/0x540 [ 7341.766862] [] do_vfs_ioctl+0x77/0x5e0 [ 7341.772117] [] ? inode_has_perm.isra.42.constprop.79+0x3a/0x50 [ 7341.779705] [] ? file_has_perm+0xa0/0xb0 [ 7341.785155] [] ? selinux_file_ioctl+0x48/0xe0 [ 7341.791090] [] SyS_ioctl+0x78/0x90 [ 7341.795958] [] syscall_call+0x7/0xb [ 7341.800925] [] ? mm_fault_error+0x13c/0x198 Signed-off-by: zhang jun Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 8a97ddf..e5808eb 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -649,7 +649,9 @@ static ssize_t firmware_loading_store(struct device *dev, * see the mapped 'buf->data' once the loading * is completed. * */ - fw_map_pages_buf(fw_buf); + if (fw_map_pages_buf(fw_buf)) + dev_err(dev, "%s: map pages failed\n", + __func__); list_del_init(&fw_buf->pending_list); complete_all(&fw_buf->completion); break; @@ -908,6 +910,8 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, wait_for_completion(&buf->completion); cancel_delayed_work_sync(&fw_priv->timeout_work); + if (!buf->data) + retval = -ENOMEM; device_remove_file(f_dev, &dev_attr_loading); err_del_bin_attr: -- cgit v1.1 From bce6618a11ca4ca3cb5d6a2a1d43c00f8301d2b2 Mon Sep 17 00:00:00 2001 From: Shaibal Dutta Date: Fri, 31 Jan 2014 15:44:58 -0800 Subject: firmware: use power efficient workqueue for unloading and aborting fw load Allow the scheduler to select the most appropriate CPU for running the firmware load timeout routine and delayed routine for firmware unload. This extends idle residency times and conserves power. This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected. Cc: Ming Lei Cc: Greg Kroah-Hartman Signed-off-by: Shaibal Dutta [zoran.markovic@linaro.org: Rebased to latest kernel, added commit message. Fixed code alignment.] Signed-off-by: Zoran Markovic Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index e5808eb..cb8b779 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -902,7 +902,8 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, dev_set_uevent_suppress(f_dev, false); dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id); if (timeout != MAX_SCHEDULE_TIMEOUT) - schedule_delayed_work(&fw_priv->timeout_work, timeout); + queue_delayed_work(system_power_efficient_wq, + &fw_priv->timeout_work, timeout); kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD); } @@ -1574,8 +1575,8 @@ static void device_uncache_fw_images_work(struct work_struct *work) */ static void device_uncache_fw_images_delay(unsigned long delay) { - schedule_delayed_work(&fw_cache.work, - msecs_to_jiffies(delay)); + queue_delayed_work(system_power_efficient_wq, &fw_cache.work, + msecs_to_jiffies(delay)); } static int fw_pm_notify(struct notifier_block *notify_block, -- cgit v1.1 From 5c764dfaefc582d9c3e2bbd52ae2e30747af8a37 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sat, 8 Feb 2014 09:59:02 +0800 Subject: driver core: unexport static function create_syslog_header Function create_syslog_header() is defined as static, so it should not be exported. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 4195364..80b6a9b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2042,7 +2042,6 @@ create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) return pos; } -EXPORT_SYMBOL(create_syslog_header); int dev_vprintk_emit(int level, const struct device *dev, const char *fmt, va_list args) -- cgit v1.1 From aabaf4c2050d21d39fe11eec889c508e84d6a328 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 14 Feb 2014 14:02:07 -0800 Subject: sysfs: create bin_attributes under the requested group bin_attributes created/updated in create_files() (such as those listed via (struct device).attribute_groups) were not placed under the specified group, and instead appeared in the base kobj directory. Fix this by making bin_attributes use creating code similar to normal attributes. A quick grep shows that no one is using bin_attrs in a named attribute group yet, so we can do this without breaking anything in usespace. Note that I do not add is_visible() support to bin_attributes, though that could be done as well. Signed-off-by: Cody P Schafer Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/group.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 6b57938..aa04068 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -70,8 +70,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, if (grp->bin_attrs) { for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { if (update) - sysfs_remove_bin_file(kobj, *bin_attr); - error = sysfs_create_bin_file(kobj, *bin_attr); + kernfs_remove_by_name(parent, + (*bin_attr)->attr.name); + error = sysfs_add_file_mode_ns(parent, + &(*bin_attr)->attr, true, + (*bin_attr)->attr.mode, NULL); if (error) break; } -- cgit v1.1 From 67bad2fdb754dbef14596c0b5d28b3a12c8dfe84 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 8 Feb 2014 13:34:09 +0100 Subject: cpu: add generic support for CPU feature based module autoloading This patch adds support for advertising optional CPU features over udev using the modalias, and for declaring compatibility with/dependency upon such a feature in a module. The mapping between feature numbers and actual features should be provided by the architecture in a file called which exports the following functions/macros: - cpu_feature(FEAT), a preprocessor macro that maps token FEAT to a numeric index; - bool cpu_have_feature(n), returning whether this CPU has support for feature #n; - MAX_CPU_FEATURES, an upper bound for 'n' in the previous function. The feature can then be enabled by setting CONFIG_GENERIC_CPU_AUTOPROBE for the architecture. For instance, a module that registers its module init function using module_cpu_feature_match(FEAT_X, module_init_function) will be probed automatically when the CPU's support for the 'FEAT_X' feature is advertised over udev, and will only allow the module to be loaded by hand if the 'FEAT_X' feature is supported. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/base/Kconfig | 8 ++++++ drivers/base/cpu.c | 50 ++++++++++++++++++++++++++++---- include/linux/cpufeature.h | 60 +++++++++++++++++++++++++++++++++++++++ include/linux/mod_devicetable.h | 9 ++++++ scripts/mod/devicetable-offsets.c | 3 ++ scripts/mod/file2alias.c | 10 +++++++ 6 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 include/linux/cpufeature.h diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index ec36e77..3f0d373 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -185,6 +185,14 @@ config GENERIC_CPU_DEVICES bool default n +config HAVE_CPU_AUTOPROBE + def_bool ARCH_HAS_CPU_AUTOPROBE + +config GENERIC_CPU_AUTOPROBE + bool + depends on !ARCH_HAS_CPU_AUTOPROBE + select HAVE_CPU_AUTOPROBE + config SOC_BUS bool diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index f48370d..8a38bf8 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "base.h" @@ -286,6 +287,45 @@ static void cpu_device_release(struct device *dev) */ } +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE +static ssize_t print_cpu_modalias(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t n; + u32 i; + + n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", + CPU_FEATURE_TYPEVAL); + + for (i = 0; i < MAX_CPU_FEATURES; i++) + if (cpu_have_feature(i)) { + if (PAGE_SIZE < n + sizeof(",XXXX\n")) { + WARN(1, "CPU features overflow page\n"); + break; + } + n += sprintf(&buf[n], ",%04X", i); + } + buf[n++] = '\n'; + return n; +} +#else +#define print_cpu_modalias arch_print_cpu_modalias +#endif + +static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (buf) { + print_cpu_modalias(NULL, NULL, buf); + add_uevent_var(env, "MODALIAS=%s", buf); + kfree(buf); + } + return 0; +} +#endif + /* * register_cpu - Setup a sysfs device for a CPU. * @cpu - cpu->hotpluggable field set to 1 will generate a control file in @@ -306,8 +346,8 @@ int register_cpu(struct cpu *cpu, int num) cpu->dev.offline_disabled = !cpu->hotpluggable; cpu->dev.offline = !cpu_online(num); cpu->dev.of_node = of_get_cpu_node(num, NULL); -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE - cpu->dev.bus->uevent = arch_cpu_uevent; +#ifdef CONFIG_HAVE_CPU_AUTOPROBE + cpu->dev.bus->uevent = cpu_uevent; #endif cpu->dev.groups = common_cpu_attr_groups; if (cpu->hotpluggable) @@ -330,8 +370,8 @@ struct device *get_cpu_device(unsigned cpu) } EXPORT_SYMBOL_GPL(get_cpu_device); -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE -static DEVICE_ATTR(modalias, 0444, arch_print_cpu_modalias, NULL); +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); #endif static struct attribute *cpu_root_attrs[] = { @@ -344,7 +384,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE +#ifdef CONFIG_HAVE_CPU_AUTOPROBE &dev_attr_modalias.attr, #endif NULL diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h new file mode 100644 index 0000000..c4d4eb8 --- /dev/null +++ b/include/linux/cpufeature.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_CPUFEATURE_H +#define __LINUX_CPUFEATURE_H + +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE + +#include +#include + +/* + * Macros imported from : + * - cpu_feature(x) ordinal value of feature called 'x' + * - cpu_have_feature(u32 n) whether feature #n is available + * - MAX_CPU_FEATURES upper bound for feature ordinal values + * Optional: + * - CPU_FEATURE_TYPEFMT format string fragment for printing the cpu type + * - CPU_FEATURE_TYPEVAL set of values matching the format string above + */ + +#ifndef CPU_FEATURE_TYPEFMT +#define CPU_FEATURE_TYPEFMT "%s" +#endif + +#ifndef CPU_FEATURE_TYPEVAL +#define CPU_FEATURE_TYPEVAL ELF_PLATFORM +#endif + +/* + * Use module_cpu_feature_match(feature, module_init_function) to + * declare that + * a) the module shall be probed upon discovery of CPU feature 'feature' + * (typically at boot time using udev) + * b) the module must not be loaded if CPU feature 'feature' is not present + * (not even by manual insmod). + * + * For a list of legal values for 'feature', please consult the file + * 'asm/cpufeature.h' of your favorite architecture. + */ +#define module_cpu_feature_match(x, __init) \ +static struct cpu_feature const cpu_feature_match_ ## x[] = \ + { { .feature = cpu_feature(x) }, { } }; \ +MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ + \ +static int cpu_feature_match_ ## x ## _init(void) \ +{ \ + if (!cpu_have_feature(cpu_feature(x))) \ + return -ENODEV; \ + return __init(); \ +} \ +module_init(cpu_feature_match_ ## x ## _init) + +#endif +#endif diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 45e9214..f2ac87c 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -564,6 +564,15 @@ struct x86_cpu_id { #define X86_MODEL_ANY 0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +/* + * Generic table type for matching CPU features. + * @feature: the bit number of the feature (0 - 65535) + */ + +struct cpu_feature { + __u16 feature; +}; + #define IPACK_ANY_FORMAT 0xff #define IPACK_ANY_ID (~0) struct ipack_device_id { diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index bb5d115..f282516 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -174,6 +174,9 @@ int main(void) DEVID_FIELD(x86_cpu_id, model); DEVID_FIELD(x86_cpu_id, vendor); + DEVID(cpu_feature); + DEVID_FIELD(cpu_feature, feature); + DEVID(mei_cl_device_id); DEVID_FIELD(mei_cl_device_id, name); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 25e5cb0..506146e 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1135,6 +1135,16 @@ static int do_x86cpu_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry); +/* LOOKS like cpu:type:*:feature:*FEAT* */ +static int do_cpu_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, cpu_feature, feature); + + sprintf(alias, "cpu:type:*:feature:*%04X*", feature); + return 1; +} +ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry); + /* Looks like: mei:S */ static int do_mei_entry(const char *filename, void *symval, char *alias) -- cgit v1.1 From 2b9c1f03278ab7cd421f14ce24dee39091ecb064 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 8 Feb 2014 13:34:10 +0100 Subject: x86: align x86 arch with generic CPU modalias handling The x86 CPU feature modalias handling existed before it was reimplemented generically. This patch aligns the x86 handling so that it (a) reuses some more code that is now generic; (b) uses the generic format for the modalias module metadata entry, i.e., it now uses 'cpu:type:x86,venVVVVfamFFFFmodMMMM:feature:,XXXX,YYYY' instead of the 'x86cpu:vendor:VVVV:family:FFFF:model:MMMM:feature:,XXXX,YYYY' that was used before. Signed-off-by: Ard Biesheuvel Acked-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 4 +--- arch/x86/include/asm/cpufeature.h | 7 +++++++ arch/x86/kernel/cpu/match.c | 42 --------------------------------------- drivers/base/Kconfig | 5 ----- drivers/base/cpu.c | 10 +++------- include/linux/cpu.h | 7 ------- scripts/mod/file2alias.c | 10 +++++----- 7 files changed, 16 insertions(+), 69 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0af5250..7fab7e0b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -127,6 +127,7 @@ config X86 select HAVE_DEBUG_STACKOVERFLOW select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_CC_STACKPROTECTOR + select GENERIC_CPU_AUTOPROBE config INSTRUCTION_DECODER def_bool y @@ -195,9 +196,6 @@ config ARCH_HAS_CPU_RELAX config ARCH_HAS_CACHE_LINE_SIZE def_bool y -config ARCH_HAS_CPU_AUTOPROBE - def_bool y - config HAVE_SETUP_PER_CPU_AREA def_bool y diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e099f95..d86dc3d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -541,6 +541,13 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) #define static_cpu_has_bug(bit) static_cpu_has((bit)) #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) +#define MAX_CPU_FEATURES (NCAPINTS * 32) +#define cpu_have_feature boot_cpu_has + +#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" +#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ + boot_cpu_data.x86_model + #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ #endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 3656537..afa9f0d 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -47,45 +47,3 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) return NULL; } EXPORT_SYMBOL(x86_match_cpu); - -ssize_t arch_print_cpu_modalias(struct device *dev, - struct device_attribute *attr, - char *bufptr) -{ - int size = PAGE_SIZE; - int i, n; - char *buf = bufptr; - - n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:" - "model:%04X:feature:", - boot_cpu_data.x86_vendor, - boot_cpu_data.x86, - boot_cpu_data.x86_model); - size -= n; - buf += n; - size -= 1; - for (i = 0; i < NCAPINTS*32; i++) { - if (boot_cpu_has(i)) { - n = snprintf(buf, size, ",%04X", i); - if (n >= size) { - WARN(1, "x86 features overflow page\n"); - break; - } - size -= n; - buf += n; - } - } - *buf++ = '\n'; - return buf - bufptr; -} - -int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (buf) { - arch_print_cpu_modalias(NULL, NULL, buf); - add_uevent_var(env, "MODALIAS=%s", buf); - kfree(buf); - } - return 0; -} diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 3f0d373..8fa8dea 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -185,13 +185,8 @@ config GENERIC_CPU_DEVICES bool default n -config HAVE_CPU_AUTOPROBE - def_bool ARCH_HAS_CPU_AUTOPROBE - config GENERIC_CPU_AUTOPROBE bool - depends on !ARCH_HAS_CPU_AUTOPROBE - select HAVE_CPU_AUTOPROBE config SOC_BUS bool diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 8a38bf8..006b1bc 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -287,7 +287,6 @@ static void cpu_device_release(struct device *dev) */ } -#ifdef CONFIG_HAVE_CPU_AUTOPROBE #ifdef CONFIG_GENERIC_CPU_AUTOPROBE static ssize_t print_cpu_modalias(struct device *dev, struct device_attribute *attr, @@ -310,9 +309,6 @@ static ssize_t print_cpu_modalias(struct device *dev, buf[n++] = '\n'; return n; } -#else -#define print_cpu_modalias arch_print_cpu_modalias -#endif static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env) { @@ -346,7 +342,7 @@ int register_cpu(struct cpu *cpu, int num) cpu->dev.offline_disabled = !cpu->hotpluggable; cpu->dev.offline = !cpu_online(num); cpu->dev.of_node = of_get_cpu_node(num, NULL); -#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE cpu->dev.bus->uevent = cpu_uevent; #endif cpu->dev.groups = common_cpu_attr_groups; @@ -370,7 +366,7 @@ struct device *get_cpu_device(unsigned cpu) } EXPORT_SYMBOL_GPL(get_cpu_device); -#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); #endif @@ -384,7 +380,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, -#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE &dev_attr_modalias.attr, #endif NULL diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 03e235ad..03e962e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -46,13 +46,6 @@ extern ssize_t arch_cpu_release(const char *, size_t); #endif struct notifier_block; -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE -extern int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env); -extern ssize_t arch_print_cpu_modalias(struct device *dev, - struct device_attribute *attr, - char *bufptr); -#endif - /* * CPU notifier priorities. */ diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 506146e..25f6f59 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1110,7 +1110,7 @@ static int do_amba_entry(const char *filename, } ADD_TO_DEVTABLE("amba", amba_id, do_amba_entry); -/* LOOKS like x86cpu:vendor:VVVV:family:FFFF:model:MMMM:feature:*,FEAT,* +/* LOOKS like cpu:type:x86,venVVVVfamFFFFmodMMMM:feature:*,FEAT,* * All fields are numbers. It would be nicer to use strings for vendor * and feature, but getting those out of the build system here is too * complicated. @@ -1124,10 +1124,10 @@ static int do_x86cpu_entry(const char *filename, void *symval, DEF_FIELD(symval, x86_cpu_id, model); DEF_FIELD(symval, x86_cpu_id, vendor); - strcpy(alias, "x86cpu:"); - ADD(alias, "vendor:", vendor != X86_VENDOR_ANY, vendor); - ADD(alias, ":family:", family != X86_FAMILY_ANY, family); - ADD(alias, ":model:", model != X86_MODEL_ANY, model); + strcpy(alias, "cpu:type:x86,"); + ADD(alias, "ven", vendor != X86_VENDOR_ANY, vendor); + ADD(alias, "fam", family != X86_FAMILY_ANY, family); + ADD(alias, "mod", model != X86_MODEL_ANY, model); strcat(alias, ":feature:*"); if (feature != X86_FEATURE_ANY) sprintf(alias + strlen(alias), "%04X*", feature); -- cgit v1.1 From 6d8b3e1ad3d3815d9c87b8553493301e243af76a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Feb 2014 13:29:31 -0500 Subject: kernfs: remove duplicate dir.c at the top dir a8fa94e0f2ab ("Merge branch 'master' into driver-core-next-test-merge-rc2") mistakenly introduced a duplicate dir.c at the top directory. Remove it. Signed-off-by: Tejun Heo Reported-by: Paul Gortmaker Signed-off-by: Greg Kroah-Hartman --- dir.c | 1248 ----------------------------------------------------------------- 1 file changed, 1248 deletions(-) delete mode 100644 dir.c diff --git a/dir.c b/dir.c deleted file mode 100644 index d8cfe0d..0000000 --- a/dir.c +++ /dev/null @@ -1,1248 +0,0 @@ -/* - * fs/kernfs/dir.c - kernfs directory implementation - * - * Copyright (c) 2001-3 Patrick Mochel - * Copyright (c) 2007 SUSE Linux Products GmbH - * Copyright (c) 2007, 2013 Tejun Heo - * - * This file is released under the GPLv2. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "kernfs-internal.h" - -DEFINE_MUTEX(kernfs_mutex); - -#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) - -static bool kernfs_active(struct kernfs_node *kn) -{ - lockdep_assert_held(&kernfs_mutex); - return atomic_read(&kn->active) >= 0; -} - -static bool kernfs_lockdep(struct kernfs_node *kn) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - return kn->flags & KERNFS_LOCKDEP; -#else - return false; -#endif -} - -/** - * kernfs_name_hash - * @name: Null terminated string to hash - * @ns: Namespace tag to hash - * - * Returns 31 bit hash of ns + name (so it fits in an off_t ) - */ -static unsigned int kernfs_name_hash(const char *name, const void *ns) -{ - unsigned long hash = init_name_hash(); - unsigned int len = strlen(name); - while (len--) - hash = partial_name_hash(*name++, hash); - hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); - hash &= 0x7fffffffU; - /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ - if (hash < 1) - hash += 2; - if (hash >= INT_MAX) - hash = INT_MAX - 1; - return hash; -} - -static int kernfs_name_compare(unsigned int hash, const char *name, - const void *ns, const struct kernfs_node *kn) -{ - if (hash != kn->hash) - return hash - kn->hash; - if (ns != kn->ns) - return ns - kn->ns; - return strcmp(name, kn->name); -} - -static int kernfs_sd_compare(const struct kernfs_node *left, - const struct kernfs_node *right) -{ - return kernfs_name_compare(left->hash, left->name, left->ns, right); -} - -/** - * kernfs_link_sibling - link kernfs_node into sibling rbtree - * @kn: kernfs_node of interest - * - * Link @kn into its sibling rbtree which starts from - * @kn->parent->dir.children. - * - * Locking: - * mutex_lock(kernfs_mutex) - * - * RETURNS: - * 0 on susccess -EEXIST on failure. - */ -static int kernfs_link_sibling(struct kernfs_node *kn) -{ - struct rb_node **node = &kn->parent->dir.children.rb_node; - struct rb_node *parent = NULL; - - if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs++; - - while (*node) { - struct kernfs_node *pos; - int result; - - pos = rb_to_kn(*node); - parent = *node; - result = kernfs_sd_compare(kn, pos); - if (result < 0) - node = &pos->rb.rb_left; - else if (result > 0) - node = &pos->rb.rb_right; - else - return -EEXIST; - } - /* add new node and rebalance the tree */ - rb_link_node(&kn->rb, parent, node); - rb_insert_color(&kn->rb, &kn->parent->dir.children); - return 0; -} - -/** - * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree - * @kn: kernfs_node of interest - * - * Try to unlink @kn from its sibling rbtree which starts from - * kn->parent->dir.children. Returns %true if @kn was actually - * removed, %false if @kn wasn't on the rbtree. - * - * Locking: - * mutex_lock(kernfs_mutex) - */ -static bool kernfs_unlink_sibling(struct kernfs_node *kn) -{ - if (RB_EMPTY_NODE(&kn->rb)) - return false; - - if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs--; - - rb_erase(&kn->rb, &kn->parent->dir.children); - RB_CLEAR_NODE(&kn->rb); - return true; -} - -/** - * kernfs_get_active - get an active reference to kernfs_node - * @kn: kernfs_node to get an active reference to - * - * Get an active reference of @kn. This function is noop if @kn - * is NULL. - * - * RETURNS: - * Pointer to @kn on success, NULL on failure. - */ -struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) -{ - if (unlikely(!kn)) - return NULL; - - if (!atomic_inc_unless_negative(&kn->active)) - return NULL; - - if (kernfs_lockdep(kn)) - rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); - return kn; -} - -/** - * kernfs_put_active - put an active reference to kernfs_node - * @kn: kernfs_node to put an active reference to - * - * Put an active reference to @kn. This function is noop if @kn - * is NULL. - */ -void kernfs_put_active(struct kernfs_node *kn) -{ - struct kernfs_root *root = kernfs_root(kn); - int v; - - if (unlikely(!kn)) - return; - - if (kernfs_lockdep(kn)) - rwsem_release(&kn->dep_map, 1, _RET_IP_); - v = atomic_dec_return(&kn->active); - if (likely(v != KN_DEACTIVATED_BIAS)) - return; - - wake_up_all(&root->deactivate_waitq); -} - -/** - * kernfs_drain - drain kernfs_node - * @kn: kernfs_node to drain - * - * Drain existing usages and nuke all existing mmaps of @kn. Mutiple - * removers may invoke this function concurrently on @kn and all will - * return after draining is complete. - */ -static void kernfs_drain(struct kernfs_node *kn) - __releases(&kernfs_mutex) __acquires(&kernfs_mutex) -{ - struct kernfs_root *root = kernfs_root(kn); - - lockdep_assert_held(&kernfs_mutex); - WARN_ON_ONCE(kernfs_active(kn)); - - mutex_unlock(&kernfs_mutex); - - if (kernfs_lockdep(kn)) { - rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); - if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) - lock_contended(&kn->dep_map, _RET_IP_); - } - - /* but everyone should wait for draining */ - wait_event(root->deactivate_waitq, - atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); - - if (kernfs_lockdep(kn)) { - lock_acquired(&kn->dep_map, _RET_IP_); - rwsem_release(&kn->dep_map, 1, _RET_IP_); - } - - kernfs_unmap_bin_file(kn); - - mutex_lock(&kernfs_mutex); -} - -/** - * kernfs_get - get a reference count on a kernfs_node - * @kn: the target kernfs_node - */ -void kernfs_get(struct kernfs_node *kn) -{ - if (kn) { - WARN_ON(!atomic_read(&kn->count)); - atomic_inc(&kn->count); - } -} -EXPORT_SYMBOL_GPL(kernfs_get); - -/** - * kernfs_put - put a reference count on a kernfs_node - * @kn: the target kernfs_node - * - * Put a reference count of @kn and destroy it if it reached zero. - */ -void kernfs_put(struct kernfs_node *kn) -{ - struct kernfs_node *parent; - struct kernfs_root *root; - - if (!kn || !atomic_dec_and_test(&kn->count)) - return; - root = kernfs_root(kn); - repeat: - /* - * Moving/renaming is always done while holding reference. - * kn->parent won't change beneath us. - */ - parent = kn->parent; - - WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, - "kernfs_put: %s/%s: released with incorrect active_ref %d\n", - parent ? parent->name : "", kn->name, atomic_read(&kn->active)); - - if (kernfs_type(kn) == KERNFS_LINK) - kernfs_put(kn->symlink.target_kn); - if (!(kn->flags & KERNFS_STATIC_NAME)) - kfree(kn->name); - if (kn->iattr) { - if (kn->iattr->ia_secdata) - security_release_secctx(kn->iattr->ia_secdata, - kn->iattr->ia_secdata_len); - simple_xattrs_free(&kn->iattr->xattrs); - } - kfree(kn->iattr); - ida_simple_remove(&root->ino_ida, kn->ino); - kmem_cache_free(kernfs_node_cache, kn); - - kn = parent; - if (kn) { - if (atomic_dec_and_test(&kn->count)) - goto repeat; - } else { - /* just released the root kn, free @root too */ - ida_destroy(&root->ino_ida); - kfree(root); - } -} -EXPORT_SYMBOL_GPL(kernfs_put); - -static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct kernfs_node *kn; - - if (flags & LOOKUP_RCU) - return -ECHILD; - - /* Always perform fresh lookup for negatives */ - if (!dentry->d_inode) - goto out_bad_unlocked; - - kn = dentry->d_fsdata; - mutex_lock(&kernfs_mutex); - - /* The kernfs node has been deactivated */ - if (!kernfs_active(kn)) - goto out_bad; - - /* The kernfs node has been moved? */ - if (dentry->d_parent->d_fsdata != kn->parent) - goto out_bad; - - /* The kernfs node has been renamed */ - if (strcmp(dentry->d_name.name, kn->name) != 0) - goto out_bad; - - /* The kernfs node has been moved to a different namespace */ - if (kn->parent && kernfs_ns_enabled(kn->parent) && - kernfs_info(dentry->d_sb)->ns != kn->ns) - goto out_bad; - - mutex_unlock(&kernfs_mutex); -out_valid: - return 1; -out_bad: - mutex_unlock(&kernfs_mutex); -out_bad_unlocked: - /* - * @dentry doesn't match the underlying kernfs node, drop the - * dentry and force lookup. If we have submounts we must allow the - * vfs caches to lie about the state of the filesystem to prevent - * leaks and other nasty things, so use check_submounts_and_drop() - * instead of d_drop(). - */ - if (check_submounts_and_drop(dentry) != 0) - goto out_valid; - - return 0; -} - -static void kernfs_dop_release(struct dentry *dentry) -{ - kernfs_put(dentry->d_fsdata); -} - -const struct dentry_operations kernfs_dops = { - .d_revalidate = kernfs_dop_revalidate, - .d_release = kernfs_dop_release, -}; - -static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, - const char *name, umode_t mode, - unsigned flags) -{ - char *dup_name = NULL; - struct kernfs_node *kn; - int ret; - - if (!(flags & KERNFS_STATIC_NAME)) { - name = dup_name = kstrdup(name, GFP_KERNEL); - if (!name) - return NULL; - } - - kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); - if (!kn) - goto err_out1; - - ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); - if (ret < 0) - goto err_out2; - kn->ino = ret; - - atomic_set(&kn->count, 1); - atomic_set(&kn->active, KN_DEACTIVATED_BIAS); - RB_CLEAR_NODE(&kn->rb); - - kn->name = name; - kn->mode = mode; - kn->flags = flags; - - return kn; - - err_out2: - kmem_cache_free(kernfs_node_cache, kn); - err_out1: - kfree(dup_name); - return NULL; -} - -struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, - const char *name, umode_t mode, - unsigned flags) -{ - struct kernfs_node *kn; - - kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags); - if (kn) { - kernfs_get(parent); - kn->parent = parent; - } - return kn; -} - -/** - * kernfs_add_one - add kernfs_node to parent without warning - * @kn: kernfs_node to be added - * - * The caller must already have initialized @kn->parent. This - * function increments nlink of the parent's inode if @kn is a - * directory and link into the children list of the parent. - * - * RETURNS: - * 0 on success, -EEXIST if entry with the given name already - * exists. - */ -int kernfs_add_one(struct kernfs_node *kn) -{ - struct kernfs_node *parent = kn->parent; - struct kernfs_iattrs *ps_iattr; - bool has_ns; - int ret; - - mutex_lock(&kernfs_mutex); - - ret = -EINVAL; - has_ns = kernfs_ns_enabled(parent); - if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, kn->name)) - goto out_unlock; - - if (kernfs_type(parent) != KERNFS_DIR) - goto out_unlock; - - ret = -ENOENT; - if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) - goto out_unlock; - - kn->hash = kernfs_name_hash(kn->name, kn->ns); - - ret = kernfs_link_sibling(kn); - if (ret) - goto out_unlock; - - /* Update timestamps on the parent */ - ps_iattr = parent->iattr; - if (ps_iattr) { - struct iattr *ps_iattrs = &ps_iattr->ia_iattr; - ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; - } - - mutex_unlock(&kernfs_mutex); - - /* - * Activate the new node unless CREATE_DEACTIVATED is requested. - * If not activated here, the kernfs user is responsible for - * activating the node with kernfs_activate(). A node which hasn't - * been activated is not visible to userland and its removal won't - * trigger deactivation. - */ - if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) - kernfs_activate(kn); - return 0; - -out_unlock: - mutex_unlock(&kernfs_mutex); - return ret; -} - -/** - * kernfs_find_ns - find kernfs_node with the given name - * @parent: kernfs_node to search under - * @name: name to look for - * @ns: the namespace tag to use - * - * Look for kernfs_node with name @name under @parent. Returns pointer to - * the found kernfs_node on success, %NULL on failure. - */ -static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, - const unsigned char *name, - const void *ns) -{ - struct rb_node *node = parent->dir.children.rb_node; - bool has_ns = kernfs_ns_enabled(parent); - unsigned int hash; - - lockdep_assert_held(&kernfs_mutex); - - if (has_ns != (bool)ns) { - WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, name); - return NULL; - } - - hash = kernfs_name_hash(name, ns); - while (node) { - struct kernfs_node *kn; - int result; - - kn = rb_to_kn(node); - result = kernfs_name_compare(hash, name, ns, kn); - if (result < 0) - node = node->rb_left; - else if (result > 0) - node = node->rb_right; - else - return kn; - } - return NULL; -} - -/** - * kernfs_find_and_get_ns - find and get kernfs_node with the given name - * @parent: kernfs_node to search under - * @name: name to look for - * @ns: the namespace tag to use - * - * Look for kernfs_node with name @name under @parent and get a reference - * if found. This function may sleep and returns pointer to the found - * kernfs_node on success, %NULL on failure. - */ -struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, - const char *name, const void *ns) -{ - struct kernfs_node *kn; - - mutex_lock(&kernfs_mutex); - kn = kernfs_find_ns(parent, name, ns); - kernfs_get(kn); - mutex_unlock(&kernfs_mutex); - - return kn; -} -EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); - -/** - * kernfs_create_root - create a new kernfs hierarchy - * @scops: optional syscall operations for the hierarchy - * @priv: opaque data associated with the new directory - * - * Returns the root of the new hierarchy on success, ERR_PTR() value on - * failure. - */ -struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, - void *priv) -{ - struct kernfs_root *root; - struct kernfs_node *kn; - - root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) - return ERR_PTR(-ENOMEM); - - ida_init(&root->ino_ida); - - kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, - KERNFS_DIR); - if (!kn) { - ida_destroy(&root->ino_ida); - kfree(root); - return ERR_PTR(-ENOMEM); - } - - kernfs_activate(kn); - kn->priv = priv; - kn->dir.root = root; - - root->syscall_ops = scops; - root->kn = kn; - init_waitqueue_head(&root->deactivate_waitq); - - return root; -} - -/** - * kernfs_destroy_root - destroy a kernfs hierarchy - * @root: root of the hierarchy to destroy - * - * Destroy the hierarchy anchored at @root by removing all existing - * directories and destroying @root. - */ -void kernfs_destroy_root(struct kernfs_root *root) -{ - kernfs_remove(root->kn); /* will also free @root */ -} - -/** - * kernfs_create_dir_ns - create a directory - * @parent: parent in which to create a new directory - * @name: name of the new directory - * @mode: mode of the new directory - * @priv: opaque data associated with the new directory - * @ns: optional namespace tag of the directory - * - * Returns the created node on success, ERR_PTR() value on failure. - */ -struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, - const char *name, umode_t mode, - void *priv, const void *ns) -{ - struct kernfs_node *kn; - int rc; - - /* allocate */ - kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR); - if (!kn) - return ERR_PTR(-ENOMEM); - - kn->dir.root = parent->dir.root; - kn->ns = ns; - kn->priv = priv; - - /* link in */ - rc = kernfs_add_one(kn); - if (!rc) - return kn; - - kernfs_put(kn); - return ERR_PTR(rc); -} - -static struct dentry *kernfs_iop_lookup(struct inode *dir, - struct dentry *dentry, - unsigned int flags) -{ - struct dentry *ret; - struct kernfs_node *parent = dentry->d_parent->d_fsdata; - struct kernfs_node *kn; - struct inode *inode; - const void *ns = NULL; - - mutex_lock(&kernfs_mutex); - - if (kernfs_ns_enabled(parent)) - ns = kernfs_info(dir->i_sb)->ns; - - kn = kernfs_find_ns(parent, dentry->d_name.name, ns); - - /* no such entry */ - if (!kn || !kernfs_active(kn)) { - ret = NULL; - goto out_unlock; - } - kernfs_get(kn); - dentry->d_fsdata = kn; - - /* attach dentry and inode */ - inode = kernfs_get_inode(dir->i_sb, kn); - if (!inode) { - ret = ERR_PTR(-ENOMEM); - goto out_unlock; - } - - /* instantiate and hash dentry */ - ret = d_materialise_unique(dentry, inode); - out_unlock: - mutex_unlock(&kernfs_mutex); - return ret; -} - -static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, - umode_t mode) -{ - struct kernfs_node *parent = dir->i_private; - struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; - int ret; - - if (!scops || !scops->mkdir) - return -EPERM; - - if (!kernfs_get_active(parent)) - return -ENODEV; - - ret = scops->mkdir(parent, dentry->d_name.name, mode); - - kernfs_put_active(parent); - return ret; -} - -static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct kernfs_node *kn = dentry->d_fsdata; - struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; - int ret; - - if (!scops || !scops->rmdir) - return -EPERM; - - if (!kernfs_get_active(kn)) - return -ENODEV; - - ret = scops->rmdir(kn); - - kernfs_put_active(kn); - return ret; -} - -static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - struct kernfs_node *kn = old_dentry->d_fsdata; - struct kernfs_node *new_parent = new_dir->i_private; - struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; - int ret; - - if (!scops || !scops->rename) - return -EPERM; - - if (!kernfs_get_active(kn)) - return -ENODEV; - - if (!kernfs_get_active(new_parent)) { - kernfs_put_active(kn); - return -ENODEV; - } - - ret = scops->rename(kn, new_parent, new_dentry->d_name.name); - - kernfs_put_active(new_parent); - kernfs_put_active(kn); - return ret; -} - -const struct inode_operations kernfs_dir_iops = { - .lookup = kernfs_iop_lookup, - .permission = kernfs_iop_permission, - .setattr = kernfs_iop_setattr, - .getattr = kernfs_iop_getattr, - .setxattr = kernfs_iop_setxattr, - .removexattr = kernfs_iop_removexattr, - .getxattr = kernfs_iop_getxattr, - .listxattr = kernfs_iop_listxattr, - - .mkdir = kernfs_iop_mkdir, - .rmdir = kernfs_iop_rmdir, - .rename = kernfs_iop_rename, -}; - -static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) -{ - struct kernfs_node *last; - - while (true) { - struct rb_node *rbn; - - last = pos; - - if (kernfs_type(pos) != KERNFS_DIR) - break; - - rbn = rb_first(&pos->dir.children); - if (!rbn) - break; - - pos = rb_to_kn(rbn); - } - - return last; -} - -/** - * kernfs_next_descendant_post - find the next descendant for post-order walk - * @pos: the current position (%NULL to initiate traversal) - * @root: kernfs_node whose descendants to walk - * - * Find the next descendant to visit for post-order traversal of @root's - * descendants. @root is included in the iteration and the last node to be - * visited. - */ -static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, - struct kernfs_node *root) -{ - struct rb_node *rbn; - - lockdep_assert_held(&kernfs_mutex); - - /* if first iteration, visit leftmost descendant which may be root */ - if (!pos) - return kernfs_leftmost_descendant(root); - - /* if we visited @root, we're done */ - if (pos == root) - return NULL; - - /* if there's an unvisited sibling, visit its leftmost descendant */ - rbn = rb_next(&pos->rb); - if (rbn) - return kernfs_leftmost_descendant(rb_to_kn(rbn)); - - /* no sibling left, visit parent */ - return pos->parent; -} - -/** - * kernfs_activate - activate a node which started deactivated - * @kn: kernfs_node whose subtree is to be activated - * - * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node - * needs to be explicitly activated. A node which hasn't been activated - * isn't visible to userland and deactivation is skipped during its - * removal. This is useful to construct atomic init sequences where - * creation of multiple nodes should either succeed or fail atomically. - * - * The caller is responsible for ensuring that this function is not called - * after kernfs_remove*() is invoked on @kn. - */ -void kernfs_activate(struct kernfs_node *kn) -{ - struct kernfs_node *pos; - - mutex_lock(&kernfs_mutex); - - pos = NULL; - while ((pos = kernfs_next_descendant_post(pos, kn))) { - if (!pos || (pos->flags & KERNFS_ACTIVATED)) - continue; - - WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); - WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); - - atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); - pos->flags |= KERNFS_ACTIVATED; - } - - mutex_unlock(&kernfs_mutex); -} - -static void __kernfs_remove(struct kernfs_node *kn) -{ - struct kernfs_node *pos; - - lockdep_assert_held(&kernfs_mutex); - - /* - * Short-circuit if @kn has already finished removal. This is for - * kernfs_remove_self() which plays with active ref after removal. - */ - if (!kn || RB_EMPTY_NODE(&kn->rb)) - return; - - pr_debug("kernfs %s: removing\n", kn->name); - - /* prevent any new usage under @kn by deactivating all nodes */ - pos = NULL; - while ((pos = kernfs_next_descendant_post(pos, kn))) - if (kernfs_active(pos)) - atomic_add(KN_DEACTIVATED_BIAS, &pos->active); - - /* deactivate and unlink the subtree node-by-node */ - do { - pos = kernfs_leftmost_descendant(kn); - - /* - * kernfs_drain() drops kernfs_mutex temporarily and @pos's - * base ref could have been put by someone else by the time - * the function returns. Make sure it doesn't go away - * underneath us. - */ - kernfs_get(pos); - - /* - * Drain iff @kn was activated. This avoids draining and - * its lockdep annotations for nodes which have never been - * activated and allows embedding kernfs_remove() in create - * error paths without worrying about draining. - */ - if (kn->flags & KERNFS_ACTIVATED) - kernfs_drain(pos); - else - WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); - - /* - * kernfs_unlink_sibling() succeeds once per node. Use it - * to decide who's responsible for cleanups. - */ - if (!pos->parent || kernfs_unlink_sibling(pos)) { - struct kernfs_iattrs *ps_iattr = - pos->parent ? pos->parent->iattr : NULL; - - /* update timestamps on the parent */ - if (ps_iattr) { - ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; - ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; - } - - kernfs_put(pos); - } - - kernfs_put(pos); - } while (pos != kn); -} - -/** - * kernfs_remove - remove a kernfs_node recursively - * @kn: the kernfs_node to remove - * - * Remove @kn along with all its subdirectories and files. - */ -void kernfs_remove(struct kernfs_node *kn) -{ - mutex_lock(&kernfs_mutex); - __kernfs_remove(kn); - mutex_unlock(&kernfs_mutex); -} - -/** - * kernfs_break_active_protection - break out of active protection - * @kn: the self kernfs_node - * - * The caller must be running off of a kernfs operation which is invoked - * with an active reference - e.g. one of kernfs_ops. Each invocation of - * this function must also be matched with an invocation of - * kernfs_unbreak_active_protection(). - * - * This function releases the active reference of @kn the caller is - * holding. Once this function is called, @kn may be removed at any point - * and the caller is solely responsible for ensuring that the objects it - * dereferences are accessible. - */ -void kernfs_break_active_protection(struct kernfs_node *kn) -{ - /* - * Take out ourself out of the active ref dependency chain. If - * we're called without an active ref, lockdep will complain. - */ - kernfs_put_active(kn); -} - -/** - * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() - * @kn: the self kernfs_node - * - * If kernfs_break_active_protection() was called, this function must be - * invoked before finishing the kernfs operation. Note that while this - * function restores the active reference, it doesn't and can't actually - * restore the active protection - @kn may already or be in the process of - * being removed. Once kernfs_break_active_protection() is invoked, that - * protection is irreversibly gone for the kernfs operation instance. - * - * While this function may be called at any point after - * kernfs_break_active_protection() is invoked, its most useful location - * would be right before the enclosing kernfs operation returns. - */ -void kernfs_unbreak_active_protection(struct kernfs_node *kn) -{ - /* - * @kn->active could be in any state; however, the increment we do - * here will be undone as soon as the enclosing kernfs operation - * finishes and this temporary bump can't break anything. If @kn - * is alive, nothing changes. If @kn is being deactivated, the - * soon-to-follow put will either finish deactivation or restore - * deactivated state. If @kn is already removed, the temporary - * bump is guaranteed to be gone before @kn is released. - */ - atomic_inc(&kn->active); - if (kernfs_lockdep(kn)) - rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); -} - -/** - * kernfs_remove_self - remove a kernfs_node from its own method - * @kn: the self kernfs_node to remove - * - * The caller must be running off of a kernfs operation which is invoked - * with an active reference - e.g. one of kernfs_ops. This can be used to - * implement a file operation which deletes itself. - * - * For example, the "delete" file for a sysfs device directory can be - * implemented by invoking kernfs_remove_self() on the "delete" file - * itself. This function breaks the circular dependency of trying to - * deactivate self while holding an active ref itself. It isn't necessary - * to modify the usual removal path to use kernfs_remove_self(). The - * "delete" implementation can simply invoke kernfs_remove_self() on self - * before proceeding with the usual removal path. kernfs will ignore later - * kernfs_remove() on self. - * - * kernfs_remove_self() can be called multiple times concurrently on the - * same kernfs_node. Only the first one actually performs removal and - * returns %true. All others will wait until the kernfs operation which - * won self-removal finishes and return %false. Note that the losers wait - * for the completion of not only the winning kernfs_remove_self() but also - * the whole kernfs_ops which won the arbitration. This can be used to - * guarantee, for example, all concurrent writes to a "delete" file to - * finish only after the whole operation is complete. - */ -bool kernfs_remove_self(struct kernfs_node *kn) -{ - bool ret; - - mutex_lock(&kernfs_mutex); - kernfs_break_active_protection(kn); - - /* - * SUICIDAL is used to arbitrate among competing invocations. Only - * the first one will actually perform removal. When the removal - * is complete, SUICIDED is set and the active ref is restored - * while holding kernfs_mutex. The ones which lost arbitration - * waits for SUICDED && drained which can happen only after the - * enclosing kernfs operation which executed the winning instance - * of kernfs_remove_self() finished. - */ - if (!(kn->flags & KERNFS_SUICIDAL)) { - kn->flags |= KERNFS_SUICIDAL; - __kernfs_remove(kn); - kn->flags |= KERNFS_SUICIDED; - ret = true; - } else { - wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; - DEFINE_WAIT(wait); - - while (true) { - prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); - - if ((kn->flags & KERNFS_SUICIDED) && - atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) - break; - - mutex_unlock(&kernfs_mutex); - schedule(); - mutex_lock(&kernfs_mutex); - } - finish_wait(waitq, &wait); - WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); - ret = false; - } - - /* - * This must be done while holding kernfs_mutex; otherwise, waiting - * for SUICIDED && deactivated could finish prematurely. - */ - kernfs_unbreak_active_protection(kn); - - mutex_unlock(&kernfs_mutex); - return ret; -} - -/** - * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it - * @parent: parent of the target - * @name: name of the kernfs_node to remove - * @ns: namespace tag of the kernfs_node to remove - * - * Look for the kernfs_node with @name and @ns under @parent and remove it. - * Returns 0 on success, -ENOENT if such entry doesn't exist. - */ -int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, - const void *ns) -{ - struct kernfs_node *kn; - - if (!parent) { - WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", - name); - return -ENOENT; - } - - mutex_lock(&kernfs_mutex); - - kn = kernfs_find_ns(parent, name, ns); - if (kn) - __kernfs_remove(kn); - - mutex_unlock(&kernfs_mutex); - - if (kn) - return 0; - else - return -ENOENT; -} - -/** - * kernfs_rename_ns - move and rename a kernfs_node - * @kn: target node - * @new_parent: new parent to put @sd under - * @new_name: new name - * @new_ns: new namespace tag - */ -int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns) -{ - int error; - - mutex_lock(&kernfs_mutex); - - error = -ENOENT; - if (!kernfs_active(kn) || !kernfs_active(new_parent)) - goto out; - - error = 0; - if ((kn->parent == new_parent) && (kn->ns == new_ns) && - (strcmp(kn->name, new_name) == 0)) - goto out; /* nothing to rename */ - - error = -EEXIST; - if (kernfs_find_ns(new_parent, new_name, new_ns)) - goto out; - - /* rename kernfs_node */ - if (strcmp(kn->name, new_name) != 0) { - error = -ENOMEM; - new_name = kstrdup(new_name, GFP_KERNEL); - if (!new_name) - goto out; - - if (kn->flags & KERNFS_STATIC_NAME) - kn->flags &= ~KERNFS_STATIC_NAME; - else - kfree(kn->name); - - kn->name = new_name; - } - - /* - * Move to the appropriate place in the appropriate directories rbtree. - */ - kernfs_unlink_sibling(kn); - kernfs_get(new_parent); - kernfs_put(kn->parent); - kn->ns = new_ns; - kn->hash = kernfs_name_hash(kn->name, kn->ns); - kn->parent = new_parent; - kernfs_link_sibling(kn); - - error = 0; - out: - mutex_unlock(&kernfs_mutex); - return error; -} - -/* Relationship between s_mode and the DT_xxx types */ -static inline unsigned char dt_type(struct kernfs_node *kn) -{ - return (kn->mode >> 12) & 15; -} - -static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) -{ - kernfs_put(filp->private_data); - return 0; -} - -static struct kernfs_node *kernfs_dir_pos(const void *ns, - struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) -{ - if (pos) { - int valid = kernfs_active(pos) && - pos->parent == parent && hash == pos->hash; - kernfs_put(pos); - if (!valid) - pos = NULL; - } - if (!pos && (hash > 1) && (hash < INT_MAX)) { - struct rb_node *node = parent->dir.children.rb_node; - while (node) { - pos = rb_to_kn(node); - - if (hash < pos->hash) - node = node->rb_left; - else if (hash > pos->hash) - node = node->rb_right; - else - break; - } - } - /* Skip over entries which are dying/dead or in the wrong namespace */ - while (pos && (!kernfs_active(pos) || pos->ns != ns)) { - struct rb_node *node = rb_next(&pos->rb); - if (!node) - pos = NULL; - else - pos = rb_to_kn(node); - } - return pos; -} - -static struct kernfs_node *kernfs_dir_next_pos(const void *ns, - struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) -{ - pos = kernfs_dir_pos(ns, parent, ino, pos); - if (pos) - do { - struct rb_node *node = rb_next(&pos->rb); - if (!node) - pos = NULL; - else - pos = rb_to_kn(node); - } while (pos && (!kernfs_active(pos) || pos->ns != ns)); - return pos; -} - -static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) -{ - struct dentry *dentry = file->f_path.dentry; - struct kernfs_node *parent = dentry->d_fsdata; - struct kernfs_node *pos = file->private_data; - const void *ns = NULL; - - if (!dir_emit_dots(file, ctx)) - return 0; - mutex_lock(&kernfs_mutex); - - if (kernfs_ns_enabled(parent)) - ns = kernfs_info(dentry->d_sb)->ns; - - for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); - pos; - pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { - const char *name = pos->name; - unsigned int type = dt_type(pos); - int len = strlen(name); - ino_t ino = pos->ino; - - ctx->pos = pos->hash; - file->private_data = pos; - kernfs_get(pos); - - mutex_unlock(&kernfs_mutex); - if (!dir_emit(ctx, name, len, ino, type)) - return 0; - mutex_lock(&kernfs_mutex); - } - mutex_unlock(&kernfs_mutex); - file->private_data = NULL; - ctx->pos = INT_MAX; - return 0; -} - -static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset, - int whence) -{ - struct inode *inode = file_inode(file); - loff_t ret; - - mutex_lock(&inode->i_mutex); - ret = generic_file_llseek(file, offset, whence); - mutex_unlock(&inode->i_mutex); - - return ret; -} - -const struct file_operations kernfs_dir_fops = { - .read = generic_read_dir, - .iterate = kernfs_fop_readdir, - .release = kernfs_dir_fop_release, - .llseek = kernfs_dir_fop_llseek, -}; -- cgit v1.1 From 88391d49abb7d8dee91d405f96bd9e003cb6798d Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 5 Mar 2014 17:10:52 +0100 Subject: kernfs: fix off by one error. The hash values 0 and 1 are reserved for magic directory entries, but the code only prevents names hashing to 0. This patch fixes the test to also prevent hash value 1. Signed-off-by: Richard Cochran Cc: Reviewed-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8245d3b..0bd05ab 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -190,7 +190,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); hash &= 0x7fffffffU; /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ - if (hash < 1) + if (hash < 2) hash += 2; if (hash >= INT_MAX) hash = INT_MAX - 1; -- cgit v1.1 From aa0689b36b2144e91d2182605bed951565c4899b Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 4 Mar 2014 19:57:51 -0800 Subject: Revert "driver core: synchronize device shutdown" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 401097ea4b89846d66ac78f7f108d49c2e922d9c. The original changelog said: A patch series to make .shutdown execute asynchronously. Some drivers's shutdown can take a lot of time. The patches can help save some shutdown time. The patches use Arjan's async API. This patch: synchronize all tasks submitted by .shutdown However, I'm not able to find any evidence that any other patches from this series were applied, nor am I able to find any async tasks that are scheduled in a .shutdown context. On the other hand, we see occasional hangs on shutdown that appear to be caused by the async_synchronize_full() in device_shutdown() waiting forever for the async probing in sd if a SCSI disk shows up at just the wrong time — the system starts the probe, but begins shutting down and tears down too much of the SCSI driver to finish the probe. If we had any async shutdown tasks, I guess the right fix would be to create a "shutdown" async domain and have device_shutdown() only wait for that domain. But since there apparently are no async shutdown tasks, we can just revert the waiting. Signed-off-by: Roland Dreier Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 80b6a9b..20da3ad 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -1987,7 +1986,6 @@ void device_shutdown(void) spin_lock(&devices_kset->list_lock); } spin_unlock(&devices_kset->list_lock); - async_synchronize_full(); } /* -- cgit v1.1 From 92d585ef067da7a966d6ce78c601bd1562b62619 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Thu, 6 Mar 2014 17:18:21 +0800 Subject: numa: fix NULL pointer access and memory leak in unregister_one_node() When doing socket hot remove, "node_devices[nid]" is set to NULL; acpi_processor_remove() try_offline_node() unregister_one_node() Then hot add a socket, but do not echo 1 > /sys/devices/system/cpu/cpuXX/online, so register_one_node() will not be called, and "node_devices[nid]" is still NULL. If doing socket hot remove again, NULL pointer access will be happen. unregister_one_node() unregister_node() Another, we should free the memory used by "node_devices[nid]" in unregister_one_node(). Signed-off-by: Xishi Qiu Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index bc9f43b..8f7ed99 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -599,7 +599,11 @@ int register_one_node(int nid) void unregister_one_node(int nid) { + if (!node_devices[nid]) + return; + unregister_node(node_devices[nid]); + kfree(node_devices[nid]); node_devices[nid] = NULL; } -- cgit v1.1 From b7ce40cff0b9f6597f8318fd761accd92727f61f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Mar 2014 15:38:46 -0500 Subject: kernfs: cache atomic_write_len in kernfs_open_file While implementing atomic_write_len, 4d3773c4bb41 ("kernfs: implement kernfs_ops->atomic_write_len") moved data copy from userland inside kernfs_get_active() and kernfs_open_file->mutex so that kernfs_ops->atomic_write_len can be accessed before copying buffer from userland; unfortunately, this could lead to locking order inversion involving mmap_sem if copy_from_user() takes a page fault. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 Tainted: G W ------------------------------------------------------- trinity-c236/10658 is trying to acquire lock: (&of->mutex#2){+.+.+.}, at: [] kernfs_fop_mmap+0x54/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x6e/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] lock_acquire+0x182/0x1d0 [] might_fault+0x7e/0xb0 [] kernfs_fop_write+0xd8/0x190 [] vfs_write+0xe3/0x1d0 [] SyS_write+0x5d/0xa0 [] tracesys+0xdd/0xe2 -> #0 (&of->mutex#2){+.+.+.}: [] check_prev_add+0x13f/0x560 [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] lock_acquire+0x182/0x1d0 [] mutex_lock_nested+0x6a/0x510 [] kernfs_fop_mmap+0x54/0x120 [] mmap_region+0x310/0x5c0 [] do_mmap_pgoff+0x385/0x430 [] vm_mmap_pgoff+0x8f/0xe0 [] SyS_mmap_pgoff+0x1b0/0x210 [] SyS_mmap+0x1d/0x20 [] tracesys+0xdd/0xe2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&of->mutex#2); lock(&mm->mmap_sem); lock(&of->mutex#2); *** DEADLOCK *** 1 lock held by trinity-c236/10658: #0: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x6e/0xe0 stack backtrace: CPU: 2 PID: 10658 Comm: trinity-c236 Tainted: G W 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 0000000000000000 ffff88011911fa48 ffffffff8438e945 0000000000000000 0000000000000000 ffff88011911fa98 ffffffff811a0109 ffff88011911fab8 ffff88011911fab8 ffff88011911fa98 ffff880119128cc0 ffff880119128cf8 Call Trace: [] dump_stack+0x52/0x7f [] print_circular_bug+0x129/0x160 [] check_prev_add+0x13f/0x560 [] ? deactivate_slab+0x511/0x550 [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] ? mmap_region+0x24a/0x5c0 [] lock_acquire+0x182/0x1d0 [] ? kernfs_fop_mmap+0x54/0x120 [] mutex_lock_nested+0x6a/0x510 [] ? kernfs_fop_mmap+0x54/0x120 [] ? get_parent_ip+0x11/0x50 [] ? kernfs_fop_mmap+0x54/0x120 [] kernfs_fop_mmap+0x54/0x120 [] mmap_region+0x310/0x5c0 [] do_mmap_pgoff+0x385/0x430 [] ? vm_mmap_pgoff+0x6e/0xe0 [] vm_mmap_pgoff+0x8f/0xe0 [] ? __rcu_read_unlock+0x44/0xb0 [] ? dup_fd+0x3c0/0x3c0 [] SyS_mmap_pgoff+0x1b0/0x210 [] SyS_mmap+0x1d/0x20 [] tracesys+0xdd/0xe2 Fix it by caching atomic_write_len in kernfs_open_file during open so that it can be determined without accessing kernfs_ops in kernfs_fop_write(). This restores the structure of kernfs_fop_write() before 4d3773c4bb41 with updated @len determination logic. Signed-off-by: Tejun Heo Reported-by: Sasha Levin References: http://lkml.kernel.org/g/53113485.2090407@oracle.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 63 +++++++++++++++++++++++++------------------------- include/linux/kernfs.h | 1 + 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index ddcb471..8034706 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -253,55 +253,50 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - char *buf = NULL; - ssize_t len; - - /* - * @of->mutex nests outside active ref and is just to ensure that - * the ops aren't called concurrently for the same open file. - */ - mutex_lock(&of->mutex); - if (!kernfs_get_active(of->kn)) { - mutex_unlock(&of->mutex); - return -ENODEV; - } - - ops = kernfs_ops(of->kn); - if (!ops->write) { - len = -EINVAL; - goto out_unlock; - } + size_t len; + char *buf; - if (ops->atomic_write_len) { + if (of->atomic_write_len) { len = count; - if (len > ops->atomic_write_len) { - len = -E2BIG; - goto out_unlock; - } + if (len > of->atomic_write_len) + return -E2BIG; } else { len = min_t(size_t, count, PAGE_SIZE); } buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) { - len = -ENOMEM; - goto out_unlock; - } + if (!buf) + return -ENOMEM; if (copy_from_user(buf, user_buf, len)) { len = -EFAULT; - goto out_unlock; + goto out_free; } buf[len] = '\0'; /* guarantee string termination */ - len = ops->write(of, buf, len, *ppos); -out_unlock: + /* + * @of->mutex nests outside active ref and is just to ensure that + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!kernfs_get_active(of->kn)) { + mutex_unlock(&of->mutex); + len = -ENODEV; + goto out_free; + } + + ops = kernfs_ops(of->kn); + if (ops->write) + len = ops->write(of, buf, len, *ppos); + else + len = -EINVAL; + kernfs_put_active(of->kn); mutex_unlock(&of->mutex); if (len > 0) *ppos += len; - +out_free: kfree(buf); return len; } @@ -666,6 +661,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) of->file = file; /* + * Write path needs to atomic_write_len outside active reference. + * Cache it in open_file. See kernfs_fop_write() for details. + */ + of->atomic_write_len = ops->atomic_write_len; + + /* * Always instantiate seq_file even if read access doesn't use * seq_file or is not requested. This unifies private data access * and readable regular files are the vast majority anyway. diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 09669d0..b0122dc 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -158,6 +158,7 @@ struct kernfs_open_file { int event; struct list_head list; + size_t atomic_write_len; bool mmapped; const struct vm_operations_struct *vm_ops; }; -- cgit v1.1 From 72099304eeb316c4b00df3ae83efe4375729bd78 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Mar 2014 20:54:57 -0700 Subject: Revert "sysfs, driver-core: remove unused {sysfs|device}_schedule_callback_owner()" This reverts commit d1ba277e79889085a2faec3b68b91ce89c63f888. As reported by Stephen, this patch breaks linux-next as a ppc patch suddenly (after 2 years) started using this old api call. So revert it for now, it will go away in 3.15-rc2 when we can change the PPC call to the new api. Reported-by: Stephen Rothwell Cc: Tejun Heo Cc: Stewart Smith Cc: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 33 ++++++++++++++++++ fs/sysfs/file.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/device.h | 11 +++++- include/linux/sysfs.h | 9 +++++ 4 files changed, 144 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 20da3ad..0dd6528 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -614,6 +614,39 @@ void device_remove_bin_file(struct device *dev, } EXPORT_SYMBOL_GPL(device_remove_bin_file); +/** + * device_schedule_callback_owner - helper to schedule a callback for a device + * @dev: device. + * @func: callback function to invoke later. + * @owner: module owning the callback routine + * + * Attribute methods must not unregister themselves or their parent device + * (which would amount to the same thing). Attempts to do so will deadlock, + * since unregistration is mutually exclusive with driver callbacks. + * + * Instead methods can call this routine, which will attempt to allocate + * and schedule a workqueue request to call back @func with @dev as its + * argument in the workqueue's process context. @dev will be pinned until + * @func returns. + * + * This routine is usually called via the inline device_schedule_callback(), + * which automatically sets @owner to THIS_MODULE. + * + * Returns 0 if the request was submitted, -ENOMEM if storage could not + * be allocated, -ENODEV if a reference to @owner isn't available. + * + * NOTE: This routine won't work if CONFIG_SYSFS isn't set! It uses an + * underlying sysfs routine (since it is intended for use by attribute + * methods), and if sysfs isn't available you'll get nothing but -ENOSYS. + */ +int device_schedule_callback_owner(struct device *dev, + void (*func)(struct device *), struct module *owner) +{ + return sysfs_schedule_callback(&dev->kobj, + (void (*)(void *)) func, dev, owner); +} +EXPORT_SYMBOL_GPL(device_schedule_callback_owner); + static void klist_children_get(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 28cc1acd..1b8b91b 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -453,3 +453,95 @@ void sysfs_remove_bin_file(struct kobject *kobj, kernfs_remove_by_name(kobj->sd, attr->attr.name); } EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); + +struct sysfs_schedule_callback_struct { + struct list_head workq_list; + struct kobject *kobj; + void (*func)(void *); + void *data; + struct module *owner; + struct work_struct work; +}; + +static struct workqueue_struct *sysfs_workqueue; +static DEFINE_MUTEX(sysfs_workq_mutex); +static LIST_HEAD(sysfs_workq); +static void sysfs_schedule_callback_work(struct work_struct *work) +{ + struct sysfs_schedule_callback_struct *ss = container_of(work, + struct sysfs_schedule_callback_struct, work); + + (ss->func)(ss->data); + kobject_put(ss->kobj); + module_put(ss->owner); + mutex_lock(&sysfs_workq_mutex); + list_del(&ss->workq_list); + mutex_unlock(&sysfs_workq_mutex); + kfree(ss); +} + +/** + * sysfs_schedule_callback - helper to schedule a callback for a kobject + * @kobj: object we're acting for. + * @func: callback function to invoke later. + * @data: argument to pass to @func. + * @owner: module owning the callback code + * + * sysfs attribute methods must not unregister themselves or their parent + * kobject (which would amount to the same thing). Attempts to do so will + * deadlock, since unregistration is mutually exclusive with driver + * callbacks. + * + * Instead methods can call this routine, which will attempt to allocate + * and schedule a workqueue request to call back @func with @data as its + * argument in the workqueue's process context. @kobj will be pinned + * until @func returns. + * + * Returns 0 if the request was submitted, -ENOMEM if storage could not + * be allocated, -ENODEV if a reference to @owner isn't available, + * -EAGAIN if a callback has already been scheduled for @kobj. + */ +int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), + void *data, struct module *owner) +{ + struct sysfs_schedule_callback_struct *ss, *tmp; + + if (!try_module_get(owner)) + return -ENODEV; + + mutex_lock(&sysfs_workq_mutex); + list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list) + if (ss->kobj == kobj) { + module_put(owner); + mutex_unlock(&sysfs_workq_mutex); + return -EAGAIN; + } + mutex_unlock(&sysfs_workq_mutex); + + if (sysfs_workqueue == NULL) { + sysfs_workqueue = create_singlethread_workqueue("sysfsd"); + if (sysfs_workqueue == NULL) { + module_put(owner); + return -ENOMEM; + } + } + + ss = kmalloc(sizeof(*ss), GFP_KERNEL); + if (!ss) { + module_put(owner); + return -ENOMEM; + } + kobject_get(kobj); + ss->kobj = kobj; + ss->func = func; + ss->data = data; + ss->owner = owner; + INIT_WORK(&ss->work, sysfs_schedule_callback_work); + INIT_LIST_HEAD(&ss->workq_list); + mutex_lock(&sysfs_workq_mutex); + list_add_tail(&ss->workq_list, &sysfs_workq); + mutex_unlock(&sysfs_workq_mutex); + queue_work(sysfs_workqueue, &ss->work); + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_schedule_callback); diff --git a/include/linux/device.h b/include/linux/device.h index fb1ba13..1ff3f16 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -566,6 +566,12 @@ extern int __must_check device_create_bin_file(struct device *dev, const struct bin_attribute *attr); extern void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); +extern int device_schedule_callback_owner(struct device *dev, + void (*func)(struct device *dev), struct module *owner); + +/* This is a macro to avoid include problems with THIS_MODULE */ +#define device_schedule_callback(dev, func) \ + device_schedule_callback_owner(dev, func, THIS_MODULE) /* device resource management */ typedef void (*dr_release_t)(struct device *dev, void *res); @@ -925,7 +931,10 @@ extern int device_online(struct device *dev); extern struct device *__root_device_register(const char *name, struct module *owner); -/* This is a macro to avoid include problems with THIS_MODULE */ +/* + * This is a macro to avoid include problems with THIS_MODULE, + * just as per what is done for device_schedule_callback() above. + */ #define root_device_register(name) \ __root_device_register(name, THIS_MODULE) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fdaa0c6..e0bf210 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -178,6 +178,9 @@ struct sysfs_ops { #ifdef CONFIG_SYSFS +int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), + void *data, struct module *owner); + int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, @@ -251,6 +254,12 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) #else /* CONFIG_SYSFS */ +static inline int sysfs_schedule_callback(struct kobject *kobj, + void (*func)(void *), void *data, struct module *owner) +{ + return -ENOSYS; +} + static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { return 0; -- cgit v1.1