diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/kernfs/Makefile | 5 | ||||
-rw-r--r-- | fs/kernfs/dir.c | 1020 | ||||
-rw-r--r-- | fs/kernfs/file.c | 813 | ||||
-rw-r--r-- | fs/kernfs/inode.c (renamed from fs/sysfs/inode.c) | 179 | ||||
-rw-r--r-- | fs/kernfs/kernfs-internal.h | 122 | ||||
-rw-r--r-- | fs/kernfs/mount.c | 165 | ||||
-rw-r--r-- | fs/kernfs/symlink.c | 152 | ||||
-rw-r--r-- | fs/namespace.c | 2 | ||||
-rw-r--r-- | fs/sysfs/Makefile | 2 | ||||
-rw-r--r-- | fs/sysfs/dir.c | 1030 | ||||
-rw-r--r-- | fs/sysfs/file.c | 929 | ||||
-rw-r--r-- | fs/sysfs/group.c | 59 | ||||
-rw-r--r-- | fs/sysfs/mount.c | 182 | ||||
-rw-r--r-- | fs/sysfs/symlink.c | 179 | ||||
-rw-r--r-- | fs/sysfs/sysfs.h | 228 |
16 files changed, 2654 insertions, 2415 deletions
diff --git a/fs/Makefile b/fs/Makefile index 4fe6df3..39a824f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -53,7 +53,7 @@ obj-$(CONFIG_FHANDLE) += fhandle.o obj-y += quota/ obj-$(CONFIG_PROC_FS) += proc/ -obj-$(CONFIG_SYSFS) += sysfs/ +obj-$(CONFIG_SYSFS) += sysfs/ kernfs/ obj-$(CONFIG_CONFIGFS_FS) += configfs/ obj-y += devpts/ diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile new file mode 100644 index 0000000..674337c --- /dev/null +++ b/fs/kernfs/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the kernfs pseudo filesystem +# + +obj-y := mount.o inode.o dir.o file.o symlink.o diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c new file mode 100644 index 0000000..a441e3b --- /dev/null +++ b/fs/kernfs/dir.c @@ -0,0 +1,1020 @@ +/* + * fs/kernfs/dir.c - kernfs directory implementation + * + * Copyright (c) 2001-3 Patrick Mochel + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/idr.h> +#include <linux/slab.h> +#include <linux/security.h> +#include <linux/hash.h> + +#include "kernfs-internal.h" + +DEFINE_MUTEX(sysfs_mutex); + +#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) + +/** + * sysfs_name_hash + * @name: Null terminated string to hash + * @ns: Namespace tag to hash + * + * Returns 31 bit hash of ns + name (so it fits in an off_t ) + */ +static unsigned int sysfs_name_hash(const char *name, const void *ns) +{ + unsigned long hash = init_name_hash(); + unsigned int len = strlen(name); + while (len--) + hash = partial_name_hash(*name++, hash); + hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); + hash &= 0x7fffffffU; + /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ + if (hash < 1) + hash += 2; + if (hash >= INT_MAX) + hash = INT_MAX - 1; + return hash; +} + +static int sysfs_name_compare(unsigned int hash, const char *name, + const void *ns, const struct sysfs_dirent *sd) +{ + if (hash != sd->s_hash) + return hash - sd->s_hash; + if (ns != sd->s_ns) + return ns - sd->s_ns; + return strcmp(name, sd->s_name); +} + +static int sysfs_sd_compare(const struct sysfs_dirent *left, + const struct sysfs_dirent *right) +{ + return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, + right); +} + +/** + * sysfs_link_sibling - link sysfs_dirent into sibling rbtree + * @sd: sysfs_dirent of interest + * + * Link @sd into its sibling rbtree which starts from + * sd->s_parent->s_dir.children. + * + * Locking: + * mutex_lock(sysfs_mutex) + * + * RETURNS: + * 0 on susccess -EEXIST on failure. + */ +static int sysfs_link_sibling(struct sysfs_dirent *sd) +{ + struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; + struct rb_node *parent = NULL; + + if (sysfs_type(sd) == SYSFS_DIR) + sd->s_parent->s_dir.subdirs++; + + while (*node) { + struct sysfs_dirent *pos; + int result; + + pos = to_sysfs_dirent(*node); + parent = *node; + result = sysfs_sd_compare(sd, pos); + if (result < 0) + node = &pos->s_rb.rb_left; + else if (result > 0) + node = &pos->s_rb.rb_right; + else + return -EEXIST; + } + /* add new node and rebalance the tree */ + rb_link_node(&sd->s_rb, parent, node); + rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); + return 0; +} + +/** + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree + * @sd: sysfs_dirent of interest + * + * Unlink @sd from its sibling rbtree which starts from + * sd->s_parent->s_dir.children. + * + * Locking: + * mutex_lock(sysfs_mutex) + */ +static void sysfs_unlink_sibling(struct sysfs_dirent *sd) +{ + if (sysfs_type(sd) == SYSFS_DIR) + sd->s_parent->s_dir.subdirs--; + + rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); +} + +/** + * sysfs_get_active - get an active reference to sysfs_dirent + * @sd: sysfs_dirent to get an active reference to + * + * Get an active reference of @sd. This function is noop if @sd + * is NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) +{ + if (unlikely(!sd)) + return NULL; + + if (!atomic_inc_unless_negative(&sd->s_active)) + return NULL; + + if (sd->s_flags & SYSFS_FLAG_LOCKDEP) + rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); + return sd; +} + +/** + * sysfs_put_active - put an active reference to sysfs_dirent + * @sd: sysfs_dirent to put an active reference to + * + * Put an active reference to @sd. This function is noop if @sd + * is NULL. + */ +void sysfs_put_active(struct sysfs_dirent *sd) +{ + int v; + + if (unlikely(!sd)) + return; + + if (sd->s_flags & SYSFS_FLAG_LOCKDEP) + rwsem_release(&sd->dep_map, 1, _RET_IP_); + v = atomic_dec_return(&sd->s_active); + if (likely(v != SD_DEACTIVATED_BIAS)) + return; + + /* atomic_dec_return() is a mb(), we'll always see the updated + * sd->u.completion. + */ + complete(sd->u.completion); +} + +/** + * sysfs_deactivate - deactivate sysfs_dirent + * @sd: sysfs_dirent to deactivate + * + * Deny new active references and drain existing ones. + */ +static void sysfs_deactivate(struct sysfs_dirent *sd) +{ + DECLARE_COMPLETION_ONSTACK(wait); + int v; + + BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED)); + + if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) + return; + + sd->u.completion = (void *)&wait; + + rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); + /* atomic_add_return() is a mb(), put_active() will always see + * the updated sd->u.completion. + */ + v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); + + if (v != SD_DEACTIVATED_BIAS) { + lock_contended(&sd->dep_map, _RET_IP_); + wait_for_completion(&wait); + } + + lock_acquired(&sd->dep_map, _RET_IP_); + rwsem_release(&sd->dep_map, 1, _RET_IP_); +} + +/** + * kernfs_get - get a reference count on a sysfs_dirent + * @sd: the target sysfs_dirent + */ +void kernfs_get(struct sysfs_dirent *sd) +{ + if (sd) { + WARN_ON(!atomic_read(&sd->s_count)); + atomic_inc(&sd->s_count); + } +} +EXPORT_SYMBOL_GPL(kernfs_get); + +/** + * kernfs_put - put a reference count on a sysfs_dirent + * @sd: the target sysfs_dirent + * + * Put a reference count of @sd and destroy it if it reached zero. + */ +void kernfs_put(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *parent_sd; + struct kernfs_root *root; + + if (!sd || !atomic_dec_and_test(&sd->s_count)) + return; + root = kernfs_root(sd); + repeat: + /* Moving/renaming is always done while holding reference. + * sd->s_parent won't change beneath us. + */ + parent_sd = sd->s_parent; + + WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED), + "sysfs: free using entry: %s/%s\n", + parent_sd ? parent_sd->s_name : "", sd->s_name); + + if (sysfs_type(sd) == SYSFS_KOBJ_LINK) + kernfs_put(sd->s_symlink.target_sd); + if (sysfs_type(sd) & SYSFS_COPY_NAME) + kfree(sd->s_name); + if (sd->s_iattr) { + if (sd->s_iattr->ia_secdata) + security_release_secctx(sd->s_iattr->ia_secdata, + sd->s_iattr->ia_secdata_len); + simple_xattrs_free(&sd->s_iattr->xattrs); + } + kfree(sd->s_iattr); + ida_simple_remove(&root->ino_ida, sd->s_ino); + kmem_cache_free(sysfs_dir_cachep, sd); + + sd = parent_sd; + if (sd) { + if (atomic_dec_and_test(&sd->s_count)) + goto repeat; + } else { + /* just released the root sd, free @root too */ + ida_destroy(&root->ino_ida); + kfree(root); + } +} +EXPORT_SYMBOL_GPL(kernfs_put); + +static int sysfs_dentry_delete(const struct dentry *dentry) +{ + struct sysfs_dirent *sd = dentry->d_fsdata; + return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); +} + +static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct sysfs_dirent *sd; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + sd = dentry->d_fsdata; + mutex_lock(&sysfs_mutex); + + /* The sysfs dirent has been deleted */ + if (sd->s_flags & SYSFS_FLAG_REMOVED) + goto out_bad; + + /* The sysfs dirent has been moved? */ + if (dentry->d_parent->d_fsdata != sd->s_parent) + goto out_bad; + + /* The sysfs dirent has been renamed */ + if (strcmp(dentry->d_name.name, sd->s_name) != 0) + goto out_bad; + + /* The sysfs dirent has been moved to a different namespace */ + if (sd->s_parent && kernfs_ns_enabled(sd->s_parent) && + sysfs_info(dentry->d_sb)->ns != sd->s_ns) + goto out_bad; + + mutex_unlock(&sysfs_mutex); +out_valid: + return 1; +out_bad: + /* Remove the dentry from the dcache hashes. + * If this is a deleted dentry we use d_drop instead of d_delete + * so sysfs doesn't need to cope with negative dentries. + * + * If this is a dentry that has simply been renamed we + * use d_drop to remove it from the dcache lookup on its + * old parent. If this dentry persists later when a lookup + * is performed at its new name the dentry will be readded + * to the dcache hashes. + */ + mutex_unlock(&sysfs_mutex); + + /* If we have submounts we must allow the vfs caches + * to lie about the state of the filesystem to prevent + * leaks and other nasty things. + */ + if (check_submounts_and_drop(dentry) != 0) + goto out_valid; + + return 0; +} + +static void sysfs_dentry_release(struct dentry *dentry) +{ + kernfs_put(dentry->d_fsdata); +} + +const struct dentry_operations sysfs_dentry_ops = { + .d_revalidate = sysfs_dentry_revalidate, + .d_delete = sysfs_dentry_delete, + .d_release = sysfs_dentry_release, +}; + +struct sysfs_dirent *sysfs_new_dirent(struct kernfs_root *root, + const char *name, umode_t mode, int type) +{ + char *dup_name = NULL; + struct sysfs_dirent *sd; + int ret; + + if (type & SYSFS_COPY_NAME) { + name = dup_name = kstrdup(name, GFP_KERNEL); + if (!name) + return NULL; + } + + sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); + if (!sd) + goto err_out1; + + ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); + if (ret < 0) + goto err_out2; + sd->s_ino = ret; + + atomic_set(&sd->s_count, 1); + atomic_set(&sd->s_active, 0); + + sd->s_name = name; + sd->s_mode = mode; + sd->s_flags = type | SYSFS_FLAG_REMOVED; + + return sd; + + err_out2: + kmem_cache_free(sysfs_dir_cachep, sd); + err_out1: + kfree(dup_name); + return NULL; +} + +/** + * sysfs_addrm_start - prepare for sysfs_dirent add/remove + * @acxt: pointer to sysfs_addrm_cxt to be used + * + * This function is called when the caller is about to add or remove + * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used + * to keep and pass context to other addrm functions. + * + * LOCKING: + * Kernel thread context (may sleep). sysfs_mutex is locked on + * return. + */ +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) + __acquires(sysfs_mutex) +{ + memset(acxt, 0, sizeof(*acxt)); + + mutex_lock(&sysfs_mutex); +} + +/** + * sysfs_add_one - add sysfs_dirent to parent without warning + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * @parent_sd: the parent sysfs_dirent to add @sd to + * + * Get @parent_sd and set @sd->s_parent to it and increment nlink of + * the parent inode if @sd is a directory and link into the children + * list of the parent. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + * + * RETURNS: + * 0 on success, -EEXIST if entry with the given name already + * exists. + */ +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd) +{ + bool has_ns = kernfs_ns_enabled(parent_sd); + struct sysfs_inode_attrs *ps_iattr; + int ret; + + if (has_ns != (bool)sd->s_ns) { + WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", + has_ns ? "required" : "invalid", + parent_sd->s_name, sd->s_name); + return -EINVAL; + } + + if (sysfs_type(parent_sd) != SYSFS_DIR) + return -EINVAL; + + sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); + sd->s_parent = parent_sd; + kernfs_get(parent_sd); + + ret = sysfs_link_sibling(sd); + if (ret) + return ret; + + /* Update timestamps on the parent */ + ps_iattr = parent_sd->s_iattr; + if (ps_iattr) { + struct iattr *ps_iattrs = &ps_iattr->ia_iattr; + ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; + } + + /* Mark the entry added into directory tree */ + sd->s_flags &= ~SYSFS_FLAG_REMOVED; + + return 0; +} + +/** + * sysfs_remove_one - remove sysfs_dirent from parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be removed + * + * Mark @sd removed and drop nlink of parent inode if @sd is a + * directory. @sd is unlinked from the children list. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + */ +static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd) +{ + struct sysfs_inode_attrs *ps_iattr; + + /* + * Removal can be called multiple times on the same node. Only the + * first invocation is effective and puts the base ref. + */ + if (sd->s_flags & SYSFS_FLAG_REMOVED) + return; + + if (sd->s_parent) { + sysfs_unlink_sibling(sd); + + /* Update timestamps on the parent */ + ps_iattr = sd->s_parent->s_iattr; + if (ps_iattr) { + ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; + ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; + } + } + + sd->s_flags |= SYSFS_FLAG_REMOVED; + sd->u.removed_list = acxt->removed; + acxt->removed = sd; +} + +/** + * sysfs_addrm_finish - finish up sysfs_dirent add/remove + * @acxt: addrm context to finish up + * + * Finish up sysfs_dirent add/remove. Resources acquired by + * sysfs_addrm_start() are released and removed sysfs_dirents are + * cleaned up. + * + * LOCKING: + * sysfs_mutex is released. + */ +void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) + __releases(sysfs_mutex) +{ + /* release resources acquired by sysfs_addrm_start() */ + mutex_unlock(&sysfs_mutex); + + /* kill removed sysfs_dirents */ + while (acxt->removed) { + struct sysfs_dirent *sd = acxt->removed; + + acxt->removed = sd->u.removed_list; + + sysfs_deactivate(sd); + sysfs_unmap_bin_file(sd); + kernfs_put(sd); + } +} + +/** + * kernfs_find_ns - find sysfs_dirent with the given name + * @parent: sysfs_dirent to search under + * @name: name to look for + * @ns: the namespace tag to use + * + * Look for sysfs_dirent with name @name under @parent. Returns pointer to + * the found sysfs_dirent on success, %NULL on failure. + */ +static struct sysfs_dirent *kernfs_find_ns(struct sysfs_dirent *parent, + const unsigned char *name, + const void *ns) +{ + struct rb_node *node = parent->s_dir.children.rb_node; + bool has_ns = kernfs_ns_enabled(parent); + unsigned int hash; + + lockdep_assert_held(&sysfs_mutex); + + if (has_ns != (bool)ns) { + WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", + has_ns ? "required" : "invalid", + parent->s_name, name); + return NULL; + } + + hash = sysfs_name_hash(name, ns); + while (node) { + struct sysfs_dirent *sd; + int result; + + sd = to_sysfs_dirent(node); + result = sysfs_name_compare(hash, name, ns, sd); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return sd; + } + return NULL; +} + +/** + * kernfs_find_and_get_ns - find and get sysfs_dirent with the given name + * @parent: sysfs_dirent to search under + * @name: name to look for + * @ns: the namespace tag to use + * + * Look for sysfs_dirent with name @name under @parent and get a reference + * if found. This function may sleep and returns pointer to the found + * sysfs_dirent on success, %NULL on failure. + */ +struct sysfs_dirent *kernfs_find_and_get_ns(struct sysfs_dirent *parent, + const char *name, const void *ns) +{ + struct sysfs_dirent *sd; + + mutex_lock(&sysfs_mutex); + sd = kernfs_find_ns(parent, name, ns); + kernfs_get(sd); + mutex_unlock(&sysfs_mutex); + + return sd; +} +EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); + +/** + * kernfs_create_root - create a new kernfs hierarchy + * @priv: opaque data associated with the new directory + * + * Returns the root of the new hierarchy on success, ERR_PTR() value on + * failure. + */ +struct kernfs_root *kernfs_create_root(void *priv) +{ + struct kernfs_root *root; + struct sysfs_dirent *sd; + + root = kzalloc(sizeof(*root), GFP_KERNEL); + if (!root) + return ERR_PTR(-ENOMEM); + + ida_init(&root->ino_ida); + + sd = sysfs_new_dirent(root, "", S_IFDIR | S_IRUGO | S_IXUGO, SYSFS_DIR); + if (!sd) { + ida_destroy(&root->ino_ida); + kfree(root); + return ERR_PTR(-ENOMEM); + } + + sd->s_flags &= ~SYSFS_FLAG_REMOVED; + sd->priv = priv; + sd->s_dir.root = root; + + root->sd = sd; + + return root; +} + +/** + * kernfs_destroy_root - destroy a kernfs hierarchy + * @root: root of the hierarchy to destroy + * + * Destroy the hierarchy anchored at @root by removing all existing + * directories and destroying @root. + */ +void kernfs_destroy_root(struct kernfs_root *root) +{ + kernfs_remove(root->sd); /* will also free @root */ +} + +/** + * kernfs_create_dir_ns - create a directory + * @parent: parent in which to create a new directory + * @name: name of the new directory + * @priv: opaque data associated with the new directory + * @ns: optional namespace tag of the directory + * + * Returns the created node on success, ERR_PTR() value on failure. + */ +struct sysfs_dirent *kernfs_create_dir_ns(struct sysfs_dirent *parent, + const char *name, void *priv, + const void *ns) +{ + umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + int rc; + + /* allocate */ + sd = sysfs_new_dirent(kernfs_root(parent), name, mode, SYSFS_DIR); + if (!sd) + return ERR_PTR(-ENOMEM); + + sd->s_dir.root = parent->s_dir.root; + sd->s_ns = ns; + sd->priv = priv; + + /* link in */ + sysfs_addrm_start(&acxt); + rc = sysfs_add_one(&acxt, sd, parent); + sysfs_addrm_finish(&acxt); + + if (!rc) + return sd; + + kernfs_put(sd); + return ERR_PTR(rc); +} + +static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct dentry *ret = NULL; + struct dentry *parent = dentry->d_parent; + struct sysfs_dirent *parent_sd = parent->d_fsdata; + struct sysfs_dirent *sd; + struct inode *inode; + const void *ns = NULL; + + mutex_lock(&sysfs_mutex); + + if (kernfs_ns_enabled(parent_sd)) + ns = sysfs_info(dir->i_sb)->ns; + + sd = kernfs_find_ns(parent_sd, dentry->d_name.name, ns); + + /* no such entry */ + if (!sd) { + ret = ERR_PTR(-ENOENT); + goto out_unlock; + } + kernfs_get(sd); + dentry->d_fsdata = sd; + + /* attach dentry and inode */ + inode = sysfs_get_inode(dir->i_sb, sd); + if (!inode) { + ret = ERR_PTR(-ENOMEM); + goto out_unlock; + } + + /* instantiate and hash dentry */ + ret = d_materialise_unique(dentry, inode); + out_unlock: + mutex_unlock(&sysfs_mutex); + return ret; +} + +const struct inode_operations sysfs_dir_inode_operations = { + .lookup = sysfs_lookup, + .permission = sysfs_permission, + .setattr = sysfs_setattr, + .getattr = sysfs_getattr, + .setxattr = sysfs_setxattr, + .removexattr = sysfs_removexattr, + .getxattr = sysfs_getxattr, + .listxattr = sysfs_listxattr, +}; + +static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) +{ + struct sysfs_dirent *last; + + while (true) { + struct rb_node *rbn; + + last = pos; + + if (sysfs_type(pos) != SYSFS_DIR) + break; + + rbn = rb_first(&pos->s_dir.children); + if (!rbn) + break; + + pos = to_sysfs_dirent(rbn); + } + + return last; +} + +/** + * sysfs_next_descendant_post - find the next descendant for post-order walk + * @pos: the current position (%NULL to initiate traversal) + * @root: sysfs_dirent whose descendants to walk + * + * Find the next descendant to visit for post-order traversal of @root's + * descendants. @root is included in the iteration and the last node to be + * visited. + */ +static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, + struct sysfs_dirent *root) +{ + struct rb_node *rbn; + + lockdep_assert_held(&sysfs_mutex); + + /* if first iteration, visit leftmost descendant which may be root */ + if (!pos) + return sysfs_leftmost_descendant(root); + + /* if we visited @root, we're done */ + if (pos == root) + return NULL; + + /* if there's an unvisited sibling, visit its leftmost descendant */ + rbn = rb_next(&pos->s_rb); + if (rbn) + return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); + + /* no sibling left, visit parent */ + return pos->s_parent; +} + +static void __kernfs_remove(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd) +{ + struct sysfs_dirent *pos, *next; + + if (!sd) + return; + + pr_debug("sysfs %s: removing\n", sd->s_name); + + next = NULL; + do { + pos = next; + next = sysfs_next_descendant_post(pos, sd); + if (pos) + sysfs_remove_one(acxt, pos); + } while (next); +} + +/** + * kernfs_remove - remove a sysfs_dirent recursively + * @sd: the sysfs_dirent to remove + * + * Remove @sd along with all its subdirectories and files. + */ +void kernfs_remove(struct sysfs_dirent *sd) +{ + struct sysfs_addrm_cxt acxt; + + sysfs_addrm_start(&acxt); + __kernfs_remove(&acxt, sd); + sysfs_addrm_finish(&acxt); +} + +/** + * kernfs_remove_by_name_ns - find a sysfs_dirent by name and remove it + * @dir_sd: parent of the target + * @name: name of the sysfs_dirent to remove + * @ns: namespace tag of the sysfs_dirent to remove + * + * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove + * it. Returns 0 on success, -ENOENT if such entry doesn't exist. + */ +int kernfs_remove_by_name_ns(struct sysfs_dirent *dir_sd, const char *name, + const void *ns) +{ + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + + if (!dir_sd) { + WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", + name); + return -ENOENT; + } + + sysfs_addrm_start(&acxt); + + sd = kernfs_find_ns(dir_sd, name, ns); + if (sd) + __kernfs_remove(&acxt, sd); + + sysfs_addrm_finish(&acxt); + + if (sd) + return 0; + else + return -ENOENT; +} + +/** + * kernfs_rename_ns - move and rename a kernfs_node + * @sd: target node + * @new_parent: new parent to put @sd under + * @new_name: new name + * @new_ns: new namespace tag + */ +int kernfs_rename_ns(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent, + const char *new_name, const void *new_ns) +{ + int error; + + mutex_lock(&sysfs_mutex); + + error = 0; + if ((sd->s_parent == new_parent) && (sd->s_ns == new_ns) && + (strcmp(sd->s_name, new_name) == 0)) + goto out; /* nothing to rename */ + + error = -EEXIST; + if (kernfs_find_ns(new_parent, new_name, new_ns)) + goto out; + + /* rename sysfs_dirent */ + if (strcmp(sd->s_name, new_name) != 0) { + error = -ENOMEM; + new_name = kstrdup(new_name, GFP_KERNEL); + if (!new_name) + goto out; + + kfree(sd->s_name); + sd->s_name = new_name; + } + + /* + * Move to the appropriate place in the appropriate directories rbtree. + */ + sysfs_unlink_sibling(sd); + kernfs_get(new_parent); + kernfs_put(sd->s_parent); + sd->s_ns = new_ns; + sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); + sd->s_parent = new_parent; + sysfs_link_sibling(sd); + + error = 0; + out: + mutex_unlock(&sysfs_mutex); + return error; +} + +/* Relationship between s_mode and the DT_xxx types */ +static inline unsigned char dt_type(struct sysfs_dirent *sd) +{ + return (sd->s_mode >> 12) & 15; +} + +static int sysfs_dir_release(struct inode *inode, struct file *filp) +{ + kernfs_put(filp->private_data); + return 0; +} + +static struct sysfs_dirent *sysfs_dir_pos(const void *ns, + struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) +{ + if (pos) { + int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && + pos->s_parent == parent_sd && + hash == pos->s_hash; + kernfs_put(pos); + if (!valid) + pos = NULL; + } + if (!pos && (hash > 1) && (hash < INT_MAX)) { + struct rb_node *node = parent_sd->s_dir.children.rb_node; + while (node) { + pos = to_sysfs_dirent(node); + + if (hash < pos->s_hash) + node = node->rb_left; + else if (hash > pos->s_hash) + node = node->rb_right; + else + break; + } + } + /* Skip over entries in the wrong namespace */ + while (pos && pos->s_ns != ns) { + struct rb_node *node = rb_next(&pos->s_rb); + if (!node) + pos = NULL; + else + pos = to_sysfs_dirent(node); + } + return pos; +} + +static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, + struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) +{ + pos = sysfs_dir_pos(ns, parent_sd, ino, pos); + if (pos) + do { + struct rb_node *node = rb_next(&pos->s_rb); + if (!node) + pos = NULL; + else + pos = to_sysfs_dirent(node); + } while (pos && pos->s_ns != ns); + return pos; +} + +static int sysfs_readdir(struct file *file, struct dir_context *ctx) +{ + struct dentry *dentry = file->f_path.dentry; + struct sysfs_dirent *parent_sd = dentry->d_fsdata; + struct sysfs_dirent *pos = file->private_data; + const void *ns = NULL; + + if (!dir_emit_dots(file, ctx)) + return 0; + mutex_lock(&sysfs_mutex); + + if (kernfs_ns_enabled(parent_sd)) + ns = sysfs_info(dentry->d_sb)->ns; + + for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); + pos; + pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { + const char *name = pos->s_name; + unsigned int type = dt_type(pos); + int len = strlen(name); + ino_t ino = pos->s_ino; + + ctx->pos = pos->s_hash; + file->private_data = pos; + kernfs_get(pos); + + mutex_unlock(&sysfs_mutex); + if (!dir_emit(ctx, name, len, ino, type)) + return 0; + mutex_lock(&sysfs_mutex); + } + mutex_unlock(&sysfs_mutex); + file->private_data = NULL; + ctx->pos = INT_MAX; + return 0; +} + +static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file_inode(file); + loff_t ret; + + mutex_lock(&inode->i_mutex); + ret = generic_file_llseek(file, offset, whence); + mutex_unlock(&inode->i_mutex); + + return ret; +} + +const struct file_operations sysfs_dir_operations = { + .read = generic_read_dir, + .iterate = sysfs_readdir, + .release = sysfs_dir_release, + .llseek = sysfs_dir_llseek, +}; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c new file mode 100644 index 0000000..4a5863b --- /dev/null +++ b/fs/kernfs/file.c @@ -0,0 +1,813 @@ +/* + * fs/kernfs/file.c - kernfs file implementation + * + * Copyright (c) 2001-3 Patrick Mochel + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + */ + +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/poll.h> +#include <linux/pagemap.h> +#include <linux/sched.h> + +#include "kernfs-internal.h" + +/* + * There's one sysfs_open_file for each open file and one sysfs_open_dirent + * for each sysfs_dirent with one or more open files. + * + * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is + * protected by sysfs_open_dirent_lock. + * + * filp->private_data points to seq_file whose ->private points to + * sysfs_open_file. sysfs_open_files are chained at + * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex. + */ +static DEFINE_SPINLOCK(sysfs_open_dirent_lock); +static DEFINE_MUTEX(sysfs_open_file_mutex); + +struct sysfs_open_dirent { + atomic_t refcnt; + atomic_t event; + wait_queue_head_t poll; + struct list_head files; /* goes through sysfs_open_file.list */ +}; + +static struct sysfs_open_file *sysfs_of(struct file *file) +{ + return ((struct seq_file *)file->private_data)->private; +} + +/* + * Determine the kernfs_ops for the given sysfs_dirent. This function must + * be called while holding an active reference. + */ +static const struct kernfs_ops *kernfs_ops(struct sysfs_dirent *sd) +{ + if (sd->s_flags & SYSFS_FLAG_LOCKDEP) + lockdep_assert_held(sd); + return sd->s_attr.ops; +} + +static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) +{ + struct sysfs_open_file *of = sf->private; + const struct kernfs_ops *ops; + + /* + * @of->mutex nests outside active ref and is just to ensure that + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) + return ERR_PTR(-ENODEV); + + ops = kernfs_ops(of->sd); + if (ops->seq_start) { + return ops->seq_start(sf, ppos); + } else { + /* + * The same behavior and code as single_open(). Returns + * !NULL if pos is at the beginning; otherwise, NULL. + */ + return NULL + !*ppos; + } +} + +static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) +{ + struct sysfs_open_file *of = sf->private; + const struct kernfs_ops *ops = kernfs_ops(of->sd); + + if (ops->seq_next) { + return ops->seq_next(sf, v, ppos); + } else { + /* + * The same behavior and code as single_open(), always + * terminate after the initial read. + */ + ++*ppos; + return NULL; + } +} + +static void kernfs_seq_stop(struct seq_file *sf, void *v) +{ + struct sysfs_open_file *of = sf->private; + const struct kernfs_ops *ops = kernfs_ops(of->sd); + + if (ops->seq_stop) + ops->seq_stop(sf, v); + + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); +} + +static int kernfs_seq_show(struct seq_file *sf, void *v) +{ + struct sysfs_open_file *of = sf->private; + + of->event = atomic_read(&of->sd->s_attr.open->event); + + return of->sd->s_attr.ops->seq_show(sf, v); +} + +static const struct seq_operations kernfs_seq_ops = { + .start = kernfs_seq_start, + .next = kernfs_seq_next, + .stop = kernfs_seq_stop, + .show = kernfs_seq_show, +}; + +/* + * As reading a bin file can have side-effects, the exact offset and bytes + * specified in read(2) call should be passed to the read callback making + * it difficult to use seq_file. Implement simplistic custom buffering for + * bin files. + */ +static ssize_t kernfs_file_direct_read(struct sysfs_open_file *of, + char __user *user_buf, size_t count, + loff_t *ppos) +{ + ssize_t len = min_t(size_t, count, PAGE_SIZE); + const struct kernfs_ops *ops; + char *buf; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* + * @of->mutex nests outside active ref and is just to ensure that + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + len = -ENODEV; + mutex_unlock(&of->mutex); + goto out_free; + } + + ops = kernfs_ops(of->sd); + if (ops->read) + len = ops->read(of, buf, len, *ppos); + else + len = -EINVAL; + + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); + + if (len < 0) + goto out_free; + + if (copy_to_user(user_buf, buf, len)) { + len = -EFAULT; + goto out_free; + } + + *ppos += len; + + out_free: + kfree(buf); + return len; +} + +/** + * kernfs_file_read - kernfs vfs read callback + * @file: file pointer + * @user_buf: data to write + * @count: number of bytes + * @ppos: starting offset + */ +static ssize_t kernfs_file_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct sysfs_open_file *of = sysfs_of(file); + + if (of->sd->s_flags & SYSFS_FLAG_HAS_SEQ_SHOW) + return seq_read(file, user_buf, count, ppos); + else + return kernfs_file_direct_read(of, user_buf, count, ppos); +} + +/** + * kernfs_file_write - kernfs vfs write callback + * @file: file pointer + * @user_buf: data to write + * @count: number of bytes + * @ppos: starting offset + * + * Copy data in from userland and pass it to the matching kernfs write + * operation. + * + * There is no easy way for us to know if userspace is only doing a partial + * write, so we don't support them. We expect the entire buffer to come on + * the first write. Hint: if you're writing a value, first read the file, + * modify only the the value you're changing, then write entire buffer + * back. + */ +static ssize_t kernfs_file_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct sysfs_open_file *of = sysfs_of(file); + ssize_t len = min_t(size_t, count, PAGE_SIZE); + const struct kernfs_ops *ops; + char *buf; + + buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, len)) { + len = -EFAULT; + goto out_free; + } + buf[len] = '\0'; /* guarantee string termination */ + + /* + * @of->mutex nests outside active ref and is just to ensure that + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + mutex_unlock(&of->mutex); + len = -ENODEV; + goto out_free; + } + + ops = kernfs_ops(of->sd); + if (ops->write) + len = ops->write(of, buf, len, *ppos); + else + len = -EINVAL; + + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); + + if (len > 0) + *ppos += len; +out_free: + kfree(buf); + return len; +} + +static void kernfs_vma_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + + if (!of->vm_ops) + return; + + if (!sysfs_get_active(of->sd)) + return; + + if (of->vm_ops->open) + of->vm_ops->open(vma); + + sysfs_put_active(of->sd); +} + +static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(of->sd)) + return VM_FAULT_SIGBUS; + + ret = VM_FAULT_SIGBUS; + if (of->vm_ops->fault) + ret = of->vm_ops->fault(vma, vmf); + + sysfs_put_active(of->sd); + return ret; +} + +static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(of->sd)) + return VM_FAULT_SIGBUS; + + ret = 0; + if (of->vm_ops->page_mkwrite) + ret = of->vm_ops->page_mkwrite(vma, vmf); + else + file_update_time(file); + + sysfs_put_active(of->sd); + return ret; +} + +static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return -EINVAL; + + if (!sysfs_get_active(of->sd)) + return -EINVAL; + + ret = -EINVAL; + if (of->vm_ops->access) + ret = of->vm_ops->access(vma, addr, buf, len, write); + + sysfs_put_active(of->sd); + return ret; +} + +#ifdef CONFIG_NUMA +static int kernfs_vma_set_policy(struct vm_area_struct *vma, + struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return 0; + + if (!sysfs_get_active(of->sd)) + return -EINVAL; + + ret = 0; + if (of->vm_ops->set_policy) + ret = of->vm_ops->set_policy(vma, new); + + sysfs_put_active(of->sd); + return ret; +} + +static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + struct mempolicy *pol; + + if (!of->vm_ops) + return vma->vm_policy; + + if (!sysfs_get_active(of->sd)) + return vma->vm_policy; + + pol = vma->vm_policy; + if (of->vm_ops->get_policy) + pol = of->vm_ops->get_policy(vma, addr); + + sysfs_put_active(of->sd); + return pol; +} + +static int kernfs_vma_migrate(struct vm_area_struct *vma, + const nodemask_t *from, const nodemask_t *to, + unsigned long flags) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return 0; + + if (!sysfs_get_active(of->sd)) + return 0; + + ret = 0; + if (of->vm_ops->migrate) + ret = of->vm_ops->migrate(vma, from, to, flags); + + sysfs_put_active(of->sd); + return ret; +} +#endif + +static const struct vm_operations_struct kernfs_vm_ops = { + .open = kernfs_vma_open, + .fault = kernfs_vma_fault, + .page_mkwrite = kernfs_vma_page_mkwrite, + .access = kernfs_vma_access, +#ifdef CONFIG_NUMA + .set_policy = kernfs_vma_set_policy, + .get_policy = kernfs_vma_get_policy, + .migrate = kernfs_vma_migrate, +#endif +}; + +static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct sysfs_open_file *of = sysfs_of(file); + const struct kernfs_ops *ops; + int rc; + + mutex_lock(&of->mutex); + + rc = -ENODEV; + if (!sysfs_get_active(of->sd)) + goto out_unlock; + + ops = kernfs_ops(of->sd); + if (ops->mmap) + rc = ops->mmap(of, vma); + if (rc) + goto out_put; + + /* + * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() + * to satisfy versions of X which crash if the mmap fails: that + * substitutes a new vm_file, and we don't then want bin_vm_ops. + */ + if (vma->vm_file != file) + goto out_put; + + rc = -EINVAL; + if (of->mmapped && of->vm_ops != vma->vm_ops) + goto out_put; + + /* + * It is not possible to successfully wrap close. + * So error if someone is trying to use close. + */ + rc = -EINVAL; + if (vma->vm_ops && vma->vm_ops->close) + goto out_put; + + rc = 0; + of->mmapped = 1; + of->vm_ops = vma->vm_ops; + vma->vm_ops = &kernfs_vm_ops; +out_put: + sysfs_put_active(of->sd); +out_unlock: + mutex_unlock(&of->mutex); + + return rc; +} + +/** + * sysfs_get_open_dirent - get or create sysfs_open_dirent + * @sd: target sysfs_dirent + * @of: sysfs_open_file for this instance of open + * + * If @sd->s_attr.open exists, increment its reference count; + * otherwise, create one. @of is chained to the files list. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int sysfs_get_open_dirent(struct sysfs_dirent *sd, + struct sysfs_open_file *of) +{ + struct sysfs_open_dirent *od, *new_od = NULL; + + retry: + mutex_lock(&sysfs_open_file_mutex); + spin_lock_irq(&sysfs_open_dirent_lock); + + if (!sd->s_attr.open && new_od) { + sd->s_attr.open = new_od; + new_od = NULL; + } + + od = sd->s_attr.open; + if (od) { + atomic_inc(&od->refcnt); + list_add_tail(&of->list, &od->files); + } + + spin_unlock_irq(&sysfs_open_dirent_lock); + mutex_unlock(&sysfs_open_file_mutex); + + if (od) { + kfree(new_od); + return 0; + } + + /* not there, initialize a new one and retry */ + new_od = kmalloc(sizeof(*new_od), GFP_KERNEL); + if (!new_od) + return -ENOMEM; + + atomic_set(&new_od->refcnt, 0); + atomic_set(&new_od->event, 1); + init_waitqueue_head(&new_od->poll); + INIT_LIST_HEAD(&new_od->files); + goto retry; +} + +/** + * sysfs_put_open_dirent - put sysfs_open_dirent + * @sd: target sysfs_dirent + * @of: associated sysfs_open_file + * + * Put @sd->s_attr.open and unlink @of from the files list. If + * reference count reaches zero, disassociate and free it. + * + * LOCKING: + * None. + */ +static void sysfs_put_open_dirent(struct sysfs_dirent *sd, + struct sysfs_open_file *of) +{ + struct sysfs_open_dirent *od = sd->s_attr.open; + unsigned long flags; + + mutex_lock(&sysfs_open_file_mutex); + spin_lock_irqsave(&sysfs_open_dirent_lock, flags); + + if (of) + list_del(&of->list); + + if (atomic_dec_and_test(&od->refcnt)) + sd->s_attr.open = NULL; + else + od = NULL; + + spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); + mutex_unlock(&sysfs_open_file_mutex); + + kfree(od); +} + +static int kernfs_file_open(struct inode *inode, struct file *file) +{ + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + const struct kernfs_ops *ops; + struct sysfs_open_file *of; + bool has_read, has_write, has_mmap; + int error = -EACCES; + + if (!sysfs_get_active(attr_sd)) + return -ENODEV; + + ops = kernfs_ops(attr_sd); + + has_read = ops->seq_show || ops->read || ops->mmap; + has_write = ops->write || ops->mmap; + has_mmap = ops->mmap; + + /* check perms and supported operations */ + if ((file->f_mode & FMODE_WRITE) && + (!(inode->i_mode & S_IWUGO) || !has_write)) + goto err_out; + + if ((file->f_mode & FMODE_READ) && + (!(inode->i_mode & S_IRUGO) || !has_read)) + goto err_out; + + /* allocate a sysfs_open_file for the file */ + error = -ENOMEM; + of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); + if (!of) + goto err_out; + + /* + * The following is done to give a different lockdep key to + * @of->mutex for files which implement mmap. This is a rather + * crude way to avoid false positive lockdep warning around + * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and + * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under + * which mm->mmap_sem nests, while holding @of->mutex. As each + * open file has a separate mutex, it's okay as long as those don't + * happen on the same file. At this point, we can't easily give + * each file a separate locking class. Let's differentiate on + * whether the file has mmap or not for now. + */ + if (has_mmap) + mutex_init(&of->mutex); + else + mutex_init(&of->mutex); + + of->sd = attr_sd; + of->file = file; + + /* + * Always instantiate seq_file even if read access doesn't use + * seq_file or is not requested. This unifies private data access + * and readable regular files are the vast majority anyway. + */ + if (ops->seq_show) + error = seq_open(file, &kernfs_seq_ops); + else + error = seq_open(file, NULL); + if (error) + goto err_free; + + ((struct seq_file *)file->private_data)->private = of; + + /* seq_file clears PWRITE unconditionally, restore it if WRITE */ + if (file->f_mode & FMODE_WRITE) + file->f_mode |= FMODE_PWRITE; + + /* make sure we have open dirent struct */ + error = sysfs_get_open_dirent(attr_sd, of); + if (error) + goto err_close; + + /* open succeeded, put active references */ + sysfs_put_active(attr_sd); + return 0; + +err_close: + seq_release(inode, file); +err_free: + kfree(of); +err_out: + sysfs_put_active(attr_sd); + return error; +} + +static int kernfs_file_release(struct inode *inode, struct file *filp) +{ + struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata; + struct sysfs_open_file *of = sysfs_of(filp); + + sysfs_put_open_dirent(sd, of); + seq_release(inode, filp); + kfree(of); + + return 0; +} + +void sysfs_unmap_bin_file(struct sysfs_dirent *sd) +{ + struct sysfs_open_dirent *od; + struct sysfs_open_file *of; + + if (!(sd->s_flags & SYSFS_FLAG_HAS_MMAP)) + return; + + spin_lock_irq(&sysfs_open_dirent_lock); + od = sd->s_attr.open; + if (od) + atomic_inc(&od->refcnt); + spin_unlock_irq(&sysfs_open_dirent_lock); + if (!od) + return; + + mutex_lock(&sysfs_open_file_mutex); + list_for_each_entry(of, &od->files, list) { + struct inode *inode = file_inode(of->file); + unmap_mapping_range(inode->i_mapping, 0, 0, 1); + } + mutex_unlock(&sysfs_open_file_mutex); + + sysfs_put_open_dirent(sd, NULL); +} + +/* Sysfs attribute files are pollable. The idea is that you read + * the content and then you use 'poll' or 'select' to wait for + * the content to change. When the content changes (assuming the + * manager for the kobject supports notification), poll will + * return POLLERR|POLLPRI, and select will return the fd whether + * it is waiting for read, write, or exceptions. + * Once poll/select indicates that the value has changed, you + * need to close and re-open the file, or seek to 0 and read again. + * Reminder: this only works for attributes which actively support + * it, and it is not possible to test an attribute from userspace + * to see if it supports poll (Neither 'poll' nor 'select' return + * an appropriate error code). When in doubt, set a suitable timeout value. + */ +static unsigned int kernfs_file_poll(struct file *filp, poll_table *wait) +{ + struct sysfs_open_file *of = sysfs_of(filp); + struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; + struct sysfs_open_dirent *od = attr_sd->s_attr.open; + + /* need parent for the kobj, grab both */ + if (!sysfs_get_active(attr_sd)) + goto trigger; + + poll_wait(filp, &od->poll, wait); + + sysfs_put_active(attr_sd); + + if (of->event != atomic_read(&od->event)) + goto trigger; + + return DEFAULT_POLLMASK; + + trigger: + return DEFAULT_POLLMASK|POLLERR|POLLPRI; +} + +/** + * kernfs_notify - notify a kernfs file + * @sd: file to notify + * + * Notify @sd such that poll(2) on @sd wakes up. + */ +void kernfs_notify(struct sysfs_dirent *sd) +{ + struct sysfs_open_dirent *od; + unsigned long flags; + + spin_lock_irqsave(&sysfs_open_dirent_lock, flags); + + if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) { + od = sd->s_attr.open; + if (od) { + atomic_inc(&od->event); + wake_up_interruptible(&od->poll); + } + } + + spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); +} +EXPORT_SYMBOL_GPL(kernfs_notify); + +const struct file_operations kernfs_file_operations = { + .read = kernfs_file_read, + .write = kernfs_file_write, + .llseek = generic_file_llseek, + .mmap = kernfs_file_mmap, + .open = kernfs_file_open, + .release = kernfs_file_release, + .poll = kernfs_file_poll, +}; + +/** + * kernfs_create_file_ns_key - create a file + * @parent: directory to create the file in + * @name: name of the file + * @mode: mode of the file + * @size: size of the file + * @ops: kernfs operations for the file + * @priv: private data for the file + * @ns: optional namespace tag of the file + * @key: lockdep key for the file's active_ref, %NULL to disable lockdep + * + * Returns the created node on success, ERR_PTR() value on error. + */ +struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent, + const char *name, + umode_t mode, loff_t size, + const struct kernfs_ops *ops, + void *priv, const void *ns, + struct lock_class_key *key) +{ + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + int rc; + + sd = sysfs_new_dirent(kernfs_root(parent), name, + (mode & S_IALLUGO) | S_IFREG, SYSFS_KOBJ_ATTR); + if (!sd) + return ERR_PTR(-ENOMEM); + + sd->s_attr.ops = ops; + sd->s_attr.size = size; + sd->s_ns = ns; + sd->priv = priv; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (key) { + lockdep_init_map(&sd->dep_map, "s_active", key, 0); + sd->s_flags |= SYSFS_FLAG_LOCKDEP; + } +#endif + + /* + * sd->s_attr.ops is accesible only while holding active ref. We + * need to know whether some ops are implemented outside active + * ref. Cache their existence in flags. + */ + if (ops->seq_show) + sd->s_flags |= SYSFS_FLAG_HAS_SEQ_SHOW; + if (ops->mmap) + sd->s_flags |= SYSFS_FLAG_HAS_MMAP; + + sysfs_addrm_start(&acxt); + rc = sysfs_add_one(&acxt, sd, parent); + sysfs_addrm_finish(&acxt); + + if (rc) { + kernfs_put(sd); + return ERR_PTR(rc); + } + return sd; +} diff --git a/fs/sysfs/inode.c b/fs/kernfs/inode.c index 1750f79..18ad431 100644 --- a/fs/sysfs/inode.c +++ b/fs/kernfs/inode.c @@ -1,28 +1,22 @@ /* - * fs/sysfs/inode.c - basic sysfs inode and dentry operations + * fs/kernfs/inode.c - kernfs inode implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH - * Copyright (c) 2007 Tejun Heo <teheo@suse.de> + * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> * * This file is released under the GPLv2. - * - * Please see Documentation/filesystems/sysfs.txt for more information. */ -#undef DEBUG - #include <linux/pagemap.h> -#include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/errno.h> -#include <linux/sched.h> #include <linux/slab.h> -#include <linux/sysfs.h> #include <linux/xattr.h> #include <linux/security.h> -#include "sysfs.h" + +#include "kernfs-internal.h" static const struct address_space_operations sysfs_aops = { .readpage = simple_readpage, @@ -41,22 +35,28 @@ static const struct inode_operations sysfs_inode_operations = { .setattr = sysfs_setattr, .getattr = sysfs_getattr, .setxattr = sysfs_setxattr, + .removexattr = sysfs_removexattr, + .getxattr = sysfs_getxattr, + .listxattr = sysfs_listxattr, }; -int __init sysfs_inode_init(void) +void __init sysfs_inode_init(void) { - return bdi_init(&sysfs_backing_dev_info); + if (bdi_init(&sysfs_backing_dev_info)) + panic("failed to init sysfs_backing_dev_info"); } -static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) +static struct sysfs_inode_attrs *sysfs_inode_attrs(struct sysfs_dirent *sd) { - struct sysfs_inode_attrs *attrs; struct iattr *iattrs; - attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL); - if (!attrs) + if (sd->s_iattr) + return sd->s_iattr; + + sd->s_iattr = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL); + if (!sd->s_iattr) return NULL; - iattrs = &attrs->ia_iattr; + iattrs = &sd->s_iattr->ia_iattr; /* assign default attributes */ iattrs->ia_mode = sd->s_mode; @@ -64,26 +64,22 @@ static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) iattrs->ia_gid = GLOBAL_ROOT_GID; iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; - return attrs; + simple_xattrs_init(&sd->s_iattr->xattrs); + + return sd->s_iattr; } -int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr) +static int __kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr) { - struct sysfs_inode_attrs *sd_attrs; + struct sysfs_inode_attrs *attrs; struct iattr *iattrs; unsigned int ia_valid = iattr->ia_valid; - sd_attrs = sd->s_iattr; + attrs = sysfs_inode_attrs(sd); + if (!attrs) + return -ENOMEM; - if (!sd_attrs) { - /* setting attributes for the first time, allocate now */ - sd_attrs = sysfs_init_inode_attrs(sd); - if (!sd_attrs) - return -ENOMEM; - sd->s_iattr = sd_attrs; - } - /* attributes were changed at least once in past */ - iattrs = &sd_attrs->ia_iattr; + iattrs = &attrs->ia_iattr; if (ia_valid & ATTR_UID) iattrs->ia_uid = iattr->ia_uid; @@ -102,6 +98,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr) return 0; } +/** + * kernfs_setattr - set iattr on a node + * @sd: target node + * @iattr: iattr to set + * + * Returns 0 on success, -errno on failure. + */ +int kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr) +{ + int ret; + + mutex_lock(&sysfs_mutex); + ret = __kernfs_setattr(sd, iattr); + mutex_unlock(&sysfs_mutex); + return ret; +} + int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; @@ -116,7 +129,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) if (error) goto out; - error = sysfs_sd_setattr(sd, iattr); + error = __kernfs_setattr(sd, iattr); if (error) goto out; @@ -131,22 +144,19 @@ out: static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *secdata_len) { - struct sysfs_inode_attrs *iattrs; + struct sysfs_inode_attrs *attrs; void *old_secdata; size_t old_secdata_len; - if (!sd->s_iattr) { - sd->s_iattr = sysfs_init_inode_attrs(sd); - if (!sd->s_iattr) - return -ENOMEM; - } + attrs = sysfs_inode_attrs(sd); + if (!attrs) + return -ENOMEM; - iattrs = sd->s_iattr; - old_secdata = iattrs->ia_secdata; - old_secdata_len = iattrs->ia_secdata_len; + old_secdata = attrs->ia_secdata; + old_secdata_len = attrs->ia_secdata_len; - iattrs->ia_secdata = *secdata; - iattrs->ia_secdata_len = *secdata_len; + attrs->ia_secdata = *secdata; + attrs->ia_secdata_len = *secdata_len; *secdata = old_secdata; *secdata_len = old_secdata_len; @@ -157,23 +167,25 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_inode_attrs *attrs; void *secdata; int error; u32 secdata_len = 0; - if (!sd) - return -EINVAL; + attrs = sysfs_inode_attrs(sd); + if (!attrs) + return -ENOMEM; if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; error = security_inode_setsecurity(dentry->d_inode, suffix, value, size, flags); if (error) - goto out; + return error; error = security_inode_getsecctx(dentry->d_inode, &secdata, &secdata_len); if (error) - goto out; + return error; mutex_lock(&sysfs_mutex); error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len); @@ -181,10 +193,50 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (secdata) security_release_secctx(secdata, secdata_len); - } else - return -EINVAL; -out: - return error; + return error; + } else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { + return simple_xattr_set(&attrs->xattrs, name, value, size, + flags); + } + + return -EINVAL; +} + +int sysfs_removexattr(struct dentry *dentry, const char *name) +{ + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_inode_attrs *attrs; + + attrs = sysfs_inode_attrs(sd); + if (!attrs) + return -ENOMEM; + + return simple_xattr_remove(&attrs->xattrs, name); +} + +ssize_t sysfs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size) +{ + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_inode_attrs *attrs; + + attrs = sysfs_inode_attrs(sd); + if (!attrs) + return -ENOMEM; + + return simple_xattr_get(&attrs->xattrs, name, buf, size); +} + +ssize_t sysfs_listxattr(struct dentry *dentry, char *buf, size_t size) +{ + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_inode_attrs *attrs; + + attrs = sysfs_inode_attrs(sd); + if (!attrs) + return -ENOMEM; + + return simple_xattr_list(&attrs->xattrs, buf, size); } static inline void set_default_inode_attr(struct inode *inode, umode_t mode) @@ -204,17 +256,16 @@ static inline void set_inode_attr(struct inode *inode, struct iattr *iattr) static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode) { - struct sysfs_inode_attrs *iattrs = sd->s_iattr; + struct sysfs_inode_attrs *attrs = sd->s_iattr; inode->i_mode = sd->s_mode; - if (iattrs) { + if (attrs) { /* sysfs_dirent has non-default attributes * get them from persistent copy in sysfs_dirent */ - set_inode_attr(inode, &iattrs->ia_iattr); - security_inode_notifysecctx(inode, - iattrs->ia_secdata, - iattrs->ia_secdata_len); + set_inode_attr(inode, &attrs->ia_iattr); + security_inode_notifysecctx(inode, attrs->ia_secdata, + attrs->ia_secdata_len); } if (sysfs_type(sd) == SYSFS_DIR) @@ -237,9 +288,8 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) { - struct bin_attribute *bin_attr; - - inode->i_private = sysfs_get(sd); + kernfs_get(sd); + inode->i_private = sd; inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; inode->i_op = &sysfs_inode_operations; @@ -254,13 +304,8 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) inode->i_fop = &sysfs_dir_operations; break; case SYSFS_KOBJ_ATTR: - inode->i_size = PAGE_SIZE; - inode->i_fop = &sysfs_file_operations; - break; - case SYSFS_KOBJ_BIN_ATTR: - bin_attr = sd->s_attr.bin_attr; - inode->i_size = bin_attr->size; - inode->i_fop = &sysfs_bin_operations; + inode->i_size = sd->s_attr.size; + inode->i_fop = &kernfs_file_operations; break; case SYSFS_KOBJ_LINK: inode->i_op = &sysfs_symlink_inode_operations; @@ -311,7 +356,7 @@ void sysfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); - sysfs_put(sd); + kernfs_put(sd); } int sysfs_permission(struct inode *inode, int mask) diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h new file mode 100644 index 0000000..910e485 --- /dev/null +++ b/fs/kernfs/kernfs-internal.h @@ -0,0 +1,122 @@ +/* + * fs/kernfs/kernfs-internal.h - kernfs internal header file + * + * Copyright (c) 2001-3 Patrick Mochel + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de> + * + * This file is released under the GPLv2. + */ + +#ifndef __KERNFS_INTERNAL_H +#define __KERNFS_INTERNAL_H + +#include <linux/lockdep.h> +#include <linux/fs.h> +#include <linux/mutex.h> +#include <linux/xattr.h> + +#include <linux/kernfs.h> + +struct sysfs_inode_attrs { + struct iattr ia_iattr; + void *ia_secdata; + u32 ia_secdata_len; + + struct simple_xattrs xattrs; +}; + +#define SD_DEACTIVATED_BIAS INT_MIN + +/* SYSFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ + +/** + * kernfs_root - find out the kernfs_root a sysfs_dirent belongs to + * @sd: sysfs_dirent of interest + * + * Return the kernfs_root @sd belongs to. + */ +static inline struct kernfs_root *kernfs_root(struct sysfs_dirent *sd) +{ + /* if parent exists, it's always a dir; otherwise, @sd is a dir */ + if (sd->s_parent) + sd = sd->s_parent; + return sd->s_dir.root; +} + +/* + * Context structure to be used while adding/removing nodes. + */ +struct sysfs_addrm_cxt { + struct sysfs_dirent *removed; +}; + +/* + * mount.c + */ +struct sysfs_super_info { + /* + * The root associated with this super_block. Each super_block is + * identified by the root and ns it's associated with. + */ + struct kernfs_root *root; + + /* + * Each sb is associated with one namespace tag, currently the network + * namespace of the task which mounted this sysfs instance. If multiple + * tags become necessary, make the following an array and compare + * sysfs_dirent tag against every entry. + */ + const void *ns; +}; +#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) + +extern struct kmem_cache *sysfs_dir_cachep; + +/* + * inode.c + */ +struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); +void sysfs_evict_inode(struct inode *inode); +int sysfs_permission(struct inode *inode, int mask); +int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); +int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); +int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags); +int sysfs_removexattr(struct dentry *dentry, const char *name); +ssize_t sysfs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size); +ssize_t sysfs_listxattr(struct dentry *dentry, char *buf, size_t size); +void sysfs_inode_init(void); + +/* + * dir.c + */ +extern struct mutex sysfs_mutex; +extern const struct dentry_operations sysfs_dentry_ops; +extern const struct file_operations sysfs_dir_operations; +extern const struct inode_operations sysfs_dir_inode_operations; + +struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); +void sysfs_put_active(struct sysfs_dirent *sd); +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt); +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd); +void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); +struct sysfs_dirent *sysfs_new_dirent(struct kernfs_root *root, + const char *name, umode_t mode, int type); + +/* + * file.c + */ +extern const struct file_operations kernfs_file_operations; + +void sysfs_unmap_bin_file(struct sysfs_dirent *sd); + +/* + * symlink.c + */ +extern const struct inode_operations sysfs_symlink_inode_operations; + +#endif /* __KERNFS_INTERNAL_H */ diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c new file mode 100644 index 0000000..84c83e2 --- /dev/null +++ b/fs/kernfs/mount.c @@ -0,0 +1,165 @@ +/* + * fs/kernfs/mount.c - kernfs mount implementation + * + * Copyright (c) 2001-3 Patrick Mochel + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/init.h> +#include <linux/magic.h> +#include <linux/slab.h> +#include <linux/pagemap.h> + +#include "kernfs-internal.h" + +struct kmem_cache *sysfs_dir_cachep; + +static const struct super_operations sysfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .evict_inode = sysfs_evict_inode, +}; + +static int sysfs_fill_super(struct super_block *sb) +{ + struct sysfs_super_info *info = sysfs_info(sb); + struct inode *inode; + struct dentry *root; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = SYSFS_MAGIC; + sb->s_op = &sysfs_ops; + sb->s_time_gran = 1; + + /* get root inode, initialize and unlock it */ + mutex_lock(&sysfs_mutex); + inode = sysfs_get_inode(sb, info->root->sd); + mutex_unlock(&sysfs_mutex); + if (!inode) { + pr_debug("sysfs: could not get root inode\n"); + return -ENOMEM; + } + + /* instantiate and link root dentry */ + root = d_make_root(inode); + if (!root) { + pr_debug("%s: could not get root dentry!\n", __func__); + return -ENOMEM; + } + kernfs_get(info->root->sd); + root->d_fsdata = info->root->sd; + sb->s_root = root; + sb->s_d_op = &sysfs_dentry_ops; + return 0; +} + +static int sysfs_test_super(struct super_block *sb, void *data) +{ + struct sysfs_super_info *sb_info = sysfs_info(sb); + struct sysfs_super_info *info = data; + + return sb_info->root == info->root && sb_info->ns == info->ns; +} + +static int sysfs_set_super(struct super_block *sb, void *data) +{ + int error; + error = set_anon_super(sb, data); + if (!error) + sb->s_fs_info = data; + return error; +} + +/** + * kernfs_super_ns - determine the namespace tag of a kernfs super_block + * @sb: super_block of interest + * + * Return the namespace tag associated with kernfs super_block @sb. + */ +const void *kernfs_super_ns(struct super_block *sb) +{ + struct sysfs_super_info *info = sysfs_info(sb); + + return info->ns; +} + +/** + * kernfs_mount_ns - kernfs mount helper + * @fs_type: file_system_type of the fs being mounted + * @flags: mount flags specified for the mount + * @root: kernfs_root of the hierarchy being mounted + * @ns: optional namespace tag of the mount + * + * This is to be called from each kernfs user's file_system_type->mount() + * implementation, which should pass through the specified @fs_type and + * @flags, and specify the hierarchy and namespace tag to mount via @root + * and @ns, respectively. + * + * The return value can be passed to the vfs layer verbatim. + */ +struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, + struct kernfs_root *root, const void *ns) +{ + struct super_block *sb; + struct sysfs_super_info *info; + int error; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + info->root = root; + info->ns = ns; + + sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info); + if (IS_ERR(sb) || sb->s_fs_info != info) + kfree(info); + if (IS_ERR(sb)) + return ERR_CAST(sb); + if (!sb->s_root) { + error = sysfs_fill_super(sb); + if (error) { + deactivate_locked_super(sb); + return ERR_PTR(error); + } + sb->s_flags |= MS_ACTIVE; + } + + return dget(sb->s_root); +} + +/** + * kernfs_kill_sb - kill_sb for kernfs + * @sb: super_block being killed + * + * This can be used directly for file_system_type->kill_sb(). If a kernfs + * user needs extra cleanup, it can implement its own kill_sb() and call + * this function at the end. + */ +void kernfs_kill_sb(struct super_block *sb) +{ + struct sysfs_super_info *info = sysfs_info(sb); + struct sysfs_dirent *root_sd = sb->s_root->d_fsdata; + + /* + * Remove the superblock from fs_supers/s_instances + * so we can't find it, before freeing sysfs_super_info. + */ + kill_anon_super(sb); + kfree(info); + kernfs_put(root_sd); +} + +void __init kernfs_init(void) +{ + sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache", + sizeof(struct sysfs_dirent), + 0, SLAB_PANIC, NULL); + sysfs_inode_init(); +} diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c new file mode 100644 index 0000000..adf2875 --- /dev/null +++ b/fs/kernfs/symlink.c @@ -0,0 +1,152 @@ +/* + * fs/kernfs/symlink.c - kernfs symlink implementation + * + * Copyright (c) 2001-3 Patrick Mochel + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + */ + +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/namei.h> + +#include "kernfs-internal.h" + +/** + * kernfs_create_link - create a symlink + * @parent: directory to create the symlink in + * @name: name of the symlink + * @target: target node for the symlink to point to + * + * Returns the created node on success, ERR_PTR() value on error. + */ +struct sysfs_dirent *kernfs_create_link(struct sysfs_dirent *parent, + const char *name, + struct sysfs_dirent *target) +{ + struct sysfs_dirent *sd; + struct sysfs_addrm_cxt acxt; + int error; + + sd = sysfs_new_dirent(kernfs_root(parent), name, S_IFLNK|S_IRWXUGO, + SYSFS_KOBJ_LINK); + if (!sd) + return ERR_PTR(-ENOMEM); + + if (kernfs_ns_enabled(parent)) + sd->s_ns = target->s_ns; + sd->s_symlink.target_sd = target; + kernfs_get(target); /* ref owned by symlink */ + + sysfs_addrm_start(&acxt); + error = sysfs_add_one(&acxt, sd, parent); + sysfs_addrm_finish(&acxt); + + if (!error) + return sd; + + kernfs_put(sd); + return ERR_PTR(error); +} + +static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, + struct sysfs_dirent *target_sd, char *path) +{ + struct sysfs_dirent *base, *sd; + char *s = path; + int len = 0; + + /* go up to the root, stop at the base */ + base = parent_sd; + while (base->s_parent) { + sd = target_sd->s_parent; + while (sd->s_parent && base != sd) + sd = sd->s_parent; + + if (base == sd) + break; + + strcpy(s, "../"); + s += 3; + base = base->s_parent; + } + + /* determine end of target string for reverse fillup */ + sd = target_sd; + while (sd->s_parent && sd != base) { + len += strlen(sd->s_name) + 1; + sd = sd->s_parent; + } + + /* check limits */ + if (len < 2) + return -EINVAL; + len--; + if ((s - path) + len > PATH_MAX) + return -ENAMETOOLONG; + + /* reverse fillup of target string from target to base */ + sd = target_sd; + while (sd->s_parent && sd != base) { + int slen = strlen(sd->s_name); + + len -= slen; + strncpy(s + len, sd->s_name, slen); + if (len) + s[--len] = '/'; + + sd = sd->s_parent; + } + + return 0; +} + +static int sysfs_getlink(struct dentry *dentry, char *path) +{ + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_dirent *parent_sd = sd->s_parent; + struct sysfs_dirent *target_sd = sd->s_symlink.target_sd; + int error; + + mutex_lock(&sysfs_mutex); + error = sysfs_get_target_path(parent_sd, target_sd, path); + mutex_unlock(&sysfs_mutex); + + return error; +} + +static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + int error = -ENOMEM; + unsigned long page = get_zeroed_page(GFP_KERNEL); + if (page) { + error = sysfs_getlink(dentry, (char *) page); + if (error < 0) + free_page((unsigned long)page); + } + nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); + return NULL; +} + +static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *page = nd_get_link(nd); + if (!IS_ERR(page)) + free_page((unsigned long)page); +} + +const struct inode_operations sysfs_symlink_inode_operations = { + .setxattr = sysfs_setxattr, + .removexattr = sysfs_removexattr, + .getxattr = sysfs_getxattr, + .listxattr = sysfs_listxattr, + .readlink = generic_readlink, + .follow_link = sysfs_follow_link, + .put_link = sysfs_put_link, + .setattr = sysfs_setattr, + .getattr = sysfs_getattr, + .permission = sysfs_permission, +}; diff --git a/fs/namespace.c b/fs/namespace.c index ac2ce8a..a511ea0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2790,6 +2790,8 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mountpoint_hashtable[u]); + kernfs_init(); + err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile index 8876ac1..6eff6e1 100644 --- a/fs/sysfs/Makefile +++ b/fs/sysfs/Makefile @@ -2,4 +2,4 @@ # Makefile for the sysfs virtual filesystem # -obj-y := inode.o file.o dir.o symlink.o mount.o group.o +obj-y := file.o dir.o symlink.o mount.o group.o diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 5e73d66..2fea501 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -13,446 +13,12 @@ #undef DEBUG #include <linux/fs.h> -#include <linux/mount.h> -#include <linux/module.h> #include <linux/kobject.h> -#include <linux/namei.h> -#include <linux/idr.h> -#include <linux/completion.h> -#include <linux/mutex.h> #include <linux/slab.h> -#include <linux/security.h> -#include <linux/hash.h> #include "sysfs.h" -DEFINE_MUTEX(sysfs_mutex); DEFINE_SPINLOCK(sysfs_symlink_target_lock); -#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) - -static DEFINE_SPINLOCK(sysfs_ino_lock); -static DEFINE_IDA(sysfs_ino_ida); - -/** - * sysfs_name_hash - * @name: Null terminated string to hash - * @ns: Namespace tag to hash - * - * Returns 31 bit hash of ns + name (so it fits in an off_t ) - */ -static unsigned int sysfs_name_hash(const char *name, const void *ns) -{ - unsigned long hash = init_name_hash(); - unsigned int len = strlen(name); - while (len--) - hash = partial_name_hash(*name++, hash); - hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); - hash &= 0x7fffffffU; - /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ - if (hash < 1) - hash += 2; - if (hash >= INT_MAX) - hash = INT_MAX - 1; - return hash; -} - -static int sysfs_name_compare(unsigned int hash, const char *name, - const void *ns, const struct sysfs_dirent *sd) -{ - if (hash != sd->s_hash) - return hash - sd->s_hash; - if (ns != sd->s_ns) - return ns - sd->s_ns; - return strcmp(name, sd->s_name); -} - -static int sysfs_sd_compare(const struct sysfs_dirent *left, - const struct sysfs_dirent *right) -{ - return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, - right); -} - -/** - * sysfs_link_sibling - link sysfs_dirent into sibling rbtree - * @sd: sysfs_dirent of interest - * - * Link @sd into its sibling rbtree which starts from - * sd->s_parent->s_dir.children. - * - * Locking: - * mutex_lock(sysfs_mutex) - * - * RETURNS: - * 0 on susccess -EEXIST on failure. - */ -static int sysfs_link_sibling(struct sysfs_dirent *sd) -{ - struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; - struct rb_node *parent = NULL; - - if (sysfs_type(sd) == SYSFS_DIR) - sd->s_parent->s_dir.subdirs++; - - while (*node) { - struct sysfs_dirent *pos; - int result; - - pos = to_sysfs_dirent(*node); - parent = *node; - result = sysfs_sd_compare(sd, pos); - if (result < 0) - node = &pos->s_rb.rb_left; - else if (result > 0) - node = &pos->s_rb.rb_right; - else - return -EEXIST; - } - /* add new node and rebalance the tree */ - rb_link_node(&sd->s_rb, parent, node); - rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); - return 0; -} - -/** - * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree - * @sd: sysfs_dirent of interest - * - * Unlink @sd from its sibling rbtree which starts from - * sd->s_parent->s_dir.children. - * - * Locking: - * mutex_lock(sysfs_mutex) - */ -static void sysfs_unlink_sibling(struct sysfs_dirent *sd) -{ - if (sysfs_type(sd) == SYSFS_DIR) - sd->s_parent->s_dir.subdirs--; - - rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); -} - -/** - * sysfs_get_active - get an active reference to sysfs_dirent - * @sd: sysfs_dirent to get an active reference to - * - * Get an active reference of @sd. This function is noop if @sd - * is NULL. - * - * RETURNS: - * Pointer to @sd on success, NULL on failure. - */ -struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) -{ - if (unlikely(!sd)) - return NULL; - - if (!atomic_inc_unless_negative(&sd->s_active)) - return NULL; - - if (likely(!sysfs_ignore_lockdep(sd))) - rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); - return sd; -} - -/** - * sysfs_put_active - put an active reference to sysfs_dirent - * @sd: sysfs_dirent to put an active reference to - * - * Put an active reference to @sd. This function is noop if @sd - * is NULL. - */ -void sysfs_put_active(struct sysfs_dirent *sd) -{ - int v; - - if (unlikely(!sd)) - return; - - if (likely(!sysfs_ignore_lockdep(sd))) - rwsem_release(&sd->dep_map, 1, _RET_IP_); - v = atomic_dec_return(&sd->s_active); - if (likely(v != SD_DEACTIVATED_BIAS)) - return; - - /* atomic_dec_return() is a mb(), we'll always see the updated - * sd->u.completion. - */ - complete(sd->u.completion); -} - -/** - * sysfs_deactivate - deactivate sysfs_dirent - * @sd: sysfs_dirent to deactivate - * - * Deny new active references and drain existing ones. - */ -static void sysfs_deactivate(struct sysfs_dirent *sd) -{ - DECLARE_COMPLETION_ONSTACK(wait); - int v; - - BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED)); - - if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) - return; - - sd->u.completion = (void *)&wait; - - rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); - /* atomic_add_return() is a mb(), put_active() will always see - * the updated sd->u.completion. - */ - v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); - - if (v != SD_DEACTIVATED_BIAS) { - lock_contended(&sd->dep_map, _RET_IP_); - wait_for_completion(&wait); - } - - lock_acquired(&sd->dep_map, _RET_IP_); - rwsem_release(&sd->dep_map, 1, _RET_IP_); -} - -static int sysfs_alloc_ino(unsigned int *pino) -{ - int ino, rc; - - retry: - spin_lock(&sysfs_ino_lock); - rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); - spin_unlock(&sysfs_ino_lock); - - if (rc == -EAGAIN) { - if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) - goto retry; - rc = -ENOMEM; - } - - *pino = ino; - return rc; -} - -static void sysfs_free_ino(unsigned int ino) -{ - spin_lock(&sysfs_ino_lock); - ida_remove(&sysfs_ino_ida, ino); - spin_unlock(&sysfs_ino_lock); -} - -void release_sysfs_dirent(struct sysfs_dirent *sd) -{ - struct sysfs_dirent *parent_sd; - - repeat: - /* Moving/renaming is always done while holding reference. - * sd->s_parent won't change beneath us. - */ - parent_sd = sd->s_parent; - - WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED), - "sysfs: free using entry: %s/%s\n", - parent_sd ? parent_sd->s_name : "", sd->s_name); - - if (sysfs_type(sd) == SYSFS_KOBJ_LINK) - sysfs_put(sd->s_symlink.target_sd); - if (sysfs_type(sd) & SYSFS_COPY_NAME) - kfree(sd->s_name); - if (sd->s_iattr && sd->s_iattr->ia_secdata) - security_release_secctx(sd->s_iattr->ia_secdata, - sd->s_iattr->ia_secdata_len); - kfree(sd->s_iattr); - sysfs_free_ino(sd->s_ino); - kmem_cache_free(sysfs_dir_cachep, sd); - - sd = parent_sd; - if (sd && atomic_dec_and_test(&sd->s_count)) - goto repeat; -} - -static int sysfs_dentry_delete(const struct dentry *dentry) -{ - struct sysfs_dirent *sd = dentry->d_fsdata; - return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); -} - -static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct sysfs_dirent *sd; - int type; - - if (flags & LOOKUP_RCU) - return -ECHILD; - - sd = dentry->d_fsdata; - mutex_lock(&sysfs_mutex); - - /* The sysfs dirent has been deleted */ - if (sd->s_flags & SYSFS_FLAG_REMOVED) - goto out_bad; - - /* The sysfs dirent has been moved? */ - if (dentry->d_parent->d_fsdata != sd->s_parent) - goto out_bad; - - /* The sysfs dirent has been renamed */ - if (strcmp(dentry->d_name.name, sd->s_name) != 0) - goto out_bad; - - /* The sysfs dirent has been moved to a different namespace */ - type = KOBJ_NS_TYPE_NONE; - if (sd->s_parent) { - type = sysfs_ns_type(sd->s_parent); - if (type != KOBJ_NS_TYPE_NONE && - sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns) - goto out_bad; - } - - mutex_unlock(&sysfs_mutex); -out_valid: - return 1; -out_bad: - /* Remove the dentry from the dcache hashes. - * If this is a deleted dentry we use d_drop instead of d_delete - * so sysfs doesn't need to cope with negative dentries. - * - * If this is a dentry that has simply been renamed we - * use d_drop to remove it from the dcache lookup on its - * old parent. If this dentry persists later when a lookup - * is performed at its new name the dentry will be readded - * to the dcache hashes. - */ - mutex_unlock(&sysfs_mutex); - - /* If we have submounts we must allow the vfs caches - * to lie about the state of the filesystem to prevent - * leaks and other nasty things. - */ - if (check_submounts_and_drop(dentry) != 0) - goto out_valid; - - return 0; -} - -static void sysfs_dentry_release(struct dentry *dentry) -{ - sysfs_put(dentry->d_fsdata); -} - -const struct dentry_operations sysfs_dentry_ops = { - .d_revalidate = sysfs_dentry_revalidate, - .d_delete = sysfs_dentry_delete, - .d_release = sysfs_dentry_release, -}; - -struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) -{ - char *dup_name = NULL; - struct sysfs_dirent *sd; - - if (type & SYSFS_COPY_NAME) { - name = dup_name = kstrdup(name, GFP_KERNEL); - if (!name) - return NULL; - } - - sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); - if (!sd) - goto err_out1; - - if (sysfs_alloc_ino(&sd->s_ino)) - goto err_out2; - - atomic_set(&sd->s_count, 1); - atomic_set(&sd->s_active, 0); - - sd->s_name = name; - sd->s_mode = mode; - sd->s_flags = type | SYSFS_FLAG_REMOVED; - - return sd; - - err_out2: - kmem_cache_free(sysfs_dir_cachep, sd); - err_out1: - kfree(dup_name); - return NULL; -} - -/** - * sysfs_addrm_start - prepare for sysfs_dirent add/remove - * @acxt: pointer to sysfs_addrm_cxt to be used - * - * This function is called when the caller is about to add or remove - * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used - * to keep and pass context to other addrm functions. - * - * LOCKING: - * Kernel thread context (may sleep). sysfs_mutex is locked on - * return. - */ -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) - __acquires(sysfs_mutex) -{ - memset(acxt, 0, sizeof(*acxt)); - - mutex_lock(&sysfs_mutex); -} - -/** - * __sysfs_add_one - add sysfs_dirent to parent without warning - * @acxt: addrm context to use - * @sd: sysfs_dirent to be added - * @parent_sd: the parent sysfs_dirent to add @sd to - * - * Get @parent_sd and set @sd->s_parent to it and increment nlink of - * the parent inode if @sd is a directory and link into the children - * list of the parent. - * - * This function should be called between calls to - * sysfs_addrm_start() and sysfs_addrm_finish() and should be - * passed the same @acxt as passed to sysfs_addrm_start(). - * - * LOCKING: - * Determined by sysfs_addrm_start(). - * - * RETURNS: - * 0 on success, -EEXIST if entry with the given name already - * exists. - */ -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd) -{ - struct sysfs_inode_attrs *ps_iattr; - int ret; - - if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) { - WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", - sysfs_ns_type(parent_sd) ? "required" : "invalid", - parent_sd->s_name, sd->s_name); - return -EINVAL; - } - - sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); - sd->s_parent = sysfs_get(parent_sd); - - ret = sysfs_link_sibling(sd); - if (ret) - return ret; - - /* Update timestamps on the parent */ - ps_iattr = parent_sd->s_iattr; - if (ps_iattr) { - struct iattr *ps_iattrs = &ps_iattr->ia_iattr; - ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; - } - - /* Mark the entry added into directory tree */ - sd->s_flags &= ~SYSFS_FLAG_REMOVED; - - return 0; -} - /** * sysfs_pathname - return full path to sysfs dirent * @sd: sysfs_dirent whose path we want @@ -489,445 +55,33 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) } /** - * sysfs_add_one - add sysfs_dirent to parent - * @acxt: addrm context to use - * @sd: sysfs_dirent to be added - * @parent_sd: the parent sysfs_dirent to add @sd to - * - * Get @parent_sd and set @sd->s_parent to it and increment nlink of - * the parent inode if @sd is a directory and link into the children - * list of the parent. - * - * This function should be called between calls to - * sysfs_addrm_start() and sysfs_addrm_finish() and should be - * passed the same @acxt as passed to sysfs_addrm_start(). - * - * LOCKING: - * Determined by sysfs_addrm_start(). - * - * RETURNS: - * 0 on success, -EEXIST if entry with the given name already - * exists. - */ -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd) -{ - int ret; - - ret = __sysfs_add_one(acxt, sd, parent_sd); - - if (ret == -EEXIST) - sysfs_warn_dup(parent_sd, sd->s_name); - return ret; -} - -/** - * sysfs_remove_one - remove sysfs_dirent from parent - * @acxt: addrm context to use - * @sd: sysfs_dirent to be removed - * - * Mark @sd removed and drop nlink of parent inode if @sd is a - * directory. @sd is unlinked from the children list. - * - * This function should be called between calls to - * sysfs_addrm_start() and sysfs_addrm_finish() and should be - * passed the same @acxt as passed to sysfs_addrm_start(). - * - * LOCKING: - * Determined by sysfs_addrm_start(). - */ -static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *sd) -{ - struct sysfs_inode_attrs *ps_iattr; - - /* - * Removal can be called multiple times on the same node. Only the - * first invocation is effective and puts the base ref. - */ - if (sd->s_flags & SYSFS_FLAG_REMOVED) - return; - - sysfs_unlink_sibling(sd); - - /* Update timestamps on the parent */ - ps_iattr = sd->s_parent->s_iattr; - if (ps_iattr) { - struct iattr *ps_iattrs = &ps_iattr->ia_iattr; - ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; - } - - sd->s_flags |= SYSFS_FLAG_REMOVED; - sd->u.removed_list = acxt->removed; - acxt->removed = sd; -} - -/** - * sysfs_addrm_finish - finish up sysfs_dirent add/remove - * @acxt: addrm context to finish up - * - * Finish up sysfs_dirent add/remove. Resources acquired by - * sysfs_addrm_start() are released and removed sysfs_dirents are - * cleaned up. - * - * LOCKING: - * sysfs_mutex is released. - */ -void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) - __releases(sysfs_mutex) -{ - /* release resources acquired by sysfs_addrm_start() */ - mutex_unlock(&sysfs_mutex); - - /* kill removed sysfs_dirents */ - while (acxt->removed) { - struct sysfs_dirent *sd = acxt->removed; - - acxt->removed = sd->u.removed_list; - - sysfs_deactivate(sd); - sysfs_unmap_bin_file(sd); - sysfs_put(sd); - } -} - -/** - * sysfs_find_dirent - find sysfs_dirent with the given name - * @parent_sd: sysfs_dirent to search under - * @name: name to look for - * @ns: the namespace tag to use - * - * Look for sysfs_dirent with name @name under @parent_sd. - * - * LOCKING: - * mutex_lock(sysfs_mutex) - * - * RETURNS: - * Pointer to sysfs_dirent if found, NULL if not. - */ -struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const unsigned char *name, - const void *ns) -{ - struct rb_node *node = parent_sd->s_dir.children.rb_node; - unsigned int hash; - - if (!!sysfs_ns_type(parent_sd) != !!ns) { - WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", - sysfs_ns_type(parent_sd) ? "required" : "invalid", - parent_sd->s_name, name); - return NULL; - } - - hash = sysfs_name_hash(name, ns); - while (node) { - struct sysfs_dirent *sd; - int result; - - sd = to_sysfs_dirent(node); - result = sysfs_name_compare(hash, name, ns, sd); - if (result < 0) - node = node->rb_left; - else if (result > 0) - node = node->rb_right; - else - return sd; - } - return NULL; -} - -/** - * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name - * @parent_sd: sysfs_dirent to search under - * @name: name to look for - * @ns: the namespace tag to use - * - * Look for sysfs_dirent with name @name under @parent_sd and get - * it if found. - * - * LOCKING: - * Kernel thread context (may sleep). Grabs sysfs_mutex. - * - * RETURNS: - * Pointer to sysfs_dirent if found, NULL if not. - */ -struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, - const unsigned char *name, - const void *ns) -{ - struct sysfs_dirent *sd; - - mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, name, ns); - sysfs_get(sd); - mutex_unlock(&sysfs_mutex); - - return sd; -} -EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns); - -static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, - enum kobj_ns_type type, - const char *name, const void *ns, - struct sysfs_dirent **p_sd) -{ - umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; - struct sysfs_addrm_cxt acxt; - struct sysfs_dirent *sd; - int rc; - - /* allocate */ - sd = sysfs_new_dirent(name, mode, SYSFS_DIR); - if (!sd) - return -ENOMEM; - - sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT); - sd->s_ns = ns; - sd->s_dir.kobj = kobj; - - /* link in */ - sysfs_addrm_start(&acxt); - rc = sysfs_add_one(&acxt, sd, parent_sd); - sysfs_addrm_finish(&acxt); - - if (rc == 0) - *p_sd = sd; - else - sysfs_put(sd); - - return rc; -} - -int sysfs_create_subdir(struct kobject *kobj, const char *name, - struct sysfs_dirent **p_sd) -{ - return create_dir(kobj, kobj->sd, - KOBJ_NS_TYPE_NONE, name, NULL, p_sd); -} - -/** - * sysfs_read_ns_type: return associated ns_type - * @kobj: the kobject being queried - * - * Each kobject can be tagged with exactly one namespace type - * (i.e. network or user). Return the ns_type associated with - * this object if any - */ -static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) -{ - const struct kobj_ns_type_operations *ops; - enum kobj_ns_type type; - - ops = kobj_child_ns_ops(kobj); - if (!ops) - return KOBJ_NS_TYPE_NONE; - - type = ops->type; - BUG_ON(type <= KOBJ_NS_TYPE_NONE); - BUG_ON(type >= KOBJ_NS_TYPES); - BUG_ON(!kobj_ns_type_registered(type)); - - return type; -} - -/** * sysfs_create_dir_ns - create a directory for an object with a namespace tag * @kobj: object we're creating directory for * @ns: the namespace tag to use */ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { - enum kobj_ns_type type; struct sysfs_dirent *parent_sd, *sd; - int error = 0; BUG_ON(!kobj); if (kobj->parent) parent_sd = kobj->parent->sd; else - parent_sd = &sysfs_root; + parent_sd = sysfs_root_sd; if (!parent_sd) return -ENOENT; - type = sysfs_read_ns_type(kobj); - - error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); - if (!error) - kobj->sd = sd; - return error; -} - -static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - struct dentry *ret = NULL; - struct dentry *parent = dentry->d_parent; - struct sysfs_dirent *parent_sd = parent->d_fsdata; - struct sysfs_dirent *sd; - struct inode *inode; - enum kobj_ns_type type; - const void *ns; - - mutex_lock(&sysfs_mutex); - - type = sysfs_ns_type(parent_sd); - ns = sysfs_info(dir->i_sb)->ns[type]; - - sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns); - - /* no such entry */ - if (!sd) { - ret = ERR_PTR(-ENOENT); - goto out_unlock; - } - dentry->d_fsdata = sysfs_get(sd); - - /* attach dentry and inode */ - inode = sysfs_get_inode(dir->i_sb, sd); - if (!inode) { - ret = ERR_PTR(-ENOMEM); - goto out_unlock; + sd = kernfs_create_dir_ns(parent_sd, kobject_name(kobj), kobj, ns); + if (IS_ERR(sd)) { + if (PTR_ERR(sd) == -EEXIST) + sysfs_warn_dup(parent_sd, kobject_name(kobj)); + return PTR_ERR(sd); } - /* instantiate and hash dentry */ - ret = d_materialise_unique(dentry, inode); - out_unlock: - mutex_unlock(&sysfs_mutex); - return ret; -} - -const struct inode_operations sysfs_dir_inode_operations = { - .lookup = sysfs_lookup, - .permission = sysfs_permission, - .setattr = sysfs_setattr, - .getattr = sysfs_getattr, - .setxattr = sysfs_setxattr, -}; - -static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) -{ - struct sysfs_dirent *last; - - while (true) { - struct rb_node *rbn; - - last = pos; - - if (sysfs_type(pos) != SYSFS_DIR) - break; - - rbn = rb_first(&pos->s_dir.children); - if (!rbn) - break; - - pos = to_sysfs_dirent(rbn); - } - - return last; -} - -/** - * sysfs_next_descendant_post - find the next descendant for post-order walk - * @pos: the current position (%NULL to initiate traversal) - * @root: sysfs_dirent whose descendants to walk - * - * Find the next descendant to visit for post-order traversal of @root's - * descendants. @root is included in the iteration and the last node to be - * visited. - */ -static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, - struct sysfs_dirent *root) -{ - struct rb_node *rbn; - - lockdep_assert_held(&sysfs_mutex); - - /* if first iteration, visit leftmost descendant which may be root */ - if (!pos) - return sysfs_leftmost_descendant(root); - - /* if we visited @root, we're done */ - if (pos == root) - return NULL; - - /* if there's an unvisited sibling, visit its leftmost descendant */ - rbn = rb_next(&pos->s_rb); - if (rbn) - return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); - - /* no sibling left, visit parent */ - return pos->s_parent; -} - -static void __sysfs_remove(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *sd) -{ - struct sysfs_dirent *pos, *next; - - if (!sd) - return; - - pr_debug("sysfs %s: removing\n", sd->s_name); - - next = NULL; - do { - pos = next; - next = sysfs_next_descendant_post(pos, sd); - if (pos) - sysfs_remove_one(acxt, pos); - } while (next); -} - -/** - * sysfs_remove - remove a sysfs_dirent recursively - * @sd: the sysfs_dirent to remove - * - * Remove @sd along with all its subdirectories and files. - */ -void sysfs_remove(struct sysfs_dirent *sd) -{ - struct sysfs_addrm_cxt acxt; - - sysfs_addrm_start(&acxt); - __sysfs_remove(&acxt, sd); - sysfs_addrm_finish(&acxt); -} - -/** - * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it - * @dir_sd: parent of the target - * @name: name of the sysfs_dirent to remove - * @ns: namespace tag of the sysfs_dirent to remove - * - * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove - * it. Returns 0 on success, -ENOENT if such entry doesn't exist. - */ -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, - const void *ns) -{ - struct sysfs_addrm_cxt acxt; - struct sysfs_dirent *sd; - - if (!dir_sd) { - WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", - name); - return -ENOENT; - } - - sysfs_addrm_start(&acxt); - - sd = sysfs_find_dirent(dir_sd, name, ns); - if (sd) - __sysfs_remove(&acxt, sd); - - sysfs_addrm_finish(&acxt); - - if (sd) - return 0; - else - return -ENOENT; + kobj->sd = sd; + return 0; } /** @@ -960,60 +114,16 @@ void sysfs_remove_dir(struct kobject *kobj) if (sd) { WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); - sysfs_remove(sd); + kernfs_remove(sd); } } -int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, - const char *new_name, const void *new_ns) -{ - int error; - - mutex_lock(&sysfs_mutex); - - error = 0; - if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) && - (strcmp(sd->s_name, new_name) == 0)) - goto out; /* nothing to rename */ - - error = -EEXIST; - if (sysfs_find_dirent(new_parent_sd, new_name, new_ns)) - goto out; - - /* rename sysfs_dirent */ - if (strcmp(sd->s_name, new_name) != 0) { - error = -ENOMEM; - new_name = kstrdup(new_name, GFP_KERNEL); - if (!new_name) - goto out; - - kfree(sd->s_name); - sd->s_name = new_name; - } - - /* - * Move to the appropriate place in the appropriate directories rbtree. - */ - sysfs_unlink_sibling(sd); - sysfs_get(new_parent_sd); - sysfs_put(sd->s_parent); - sd->s_ns = new_ns; - sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); - sd->s_parent = new_parent_sd; - sysfs_link_sibling(sd); - - error = 0; - out: - mutex_unlock(&sysfs_mutex); - return error; -} - int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, const void *new_ns) { struct sysfs_dirent *parent_sd = kobj->sd->s_parent; - return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); + return kernfs_rename_ns(kobj->sd, parent_sd, new_name, new_ns); } int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, @@ -1024,123 +134,7 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, BUG_ON(!sd->s_parent); new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? - new_parent_kobj->sd : &sysfs_root; + new_parent_kobj->sd : sysfs_root_sd; - return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); + return kernfs_rename_ns(sd, new_parent_sd, sd->s_name, new_ns); } - -/* Relationship between s_mode and the DT_xxx types */ -static inline unsigned char dt_type(struct sysfs_dirent *sd) -{ - return (sd->s_mode >> 12) & 15; -} - -static int sysfs_dir_release(struct inode *inode, struct file *filp) -{ - sysfs_put(filp->private_data); - return 0; -} - -static struct sysfs_dirent *sysfs_dir_pos(const void *ns, - struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) -{ - if (pos) { - int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && - pos->s_parent == parent_sd && - hash == pos->s_hash; - sysfs_put(pos); - if (!valid) - pos = NULL; - } - if (!pos && (hash > 1) && (hash < INT_MAX)) { - struct rb_node *node = parent_sd->s_dir.children.rb_node; - while (node) { - pos = to_sysfs_dirent(node); - - if (hash < pos->s_hash) - node = node->rb_left; - else if (hash > pos->s_hash) - node = node->rb_right; - else - break; - } - } - /* Skip over entries in the wrong namespace */ - while (pos && pos->s_ns != ns) { - struct rb_node *node = rb_next(&pos->s_rb); - if (!node) - pos = NULL; - else - pos = to_sysfs_dirent(node); - } - return pos; -} - -static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, - struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) -{ - pos = sysfs_dir_pos(ns, parent_sd, ino, pos); - if (pos) - do { - struct rb_node *node = rb_next(&pos->s_rb); - if (!node) - pos = NULL; - else - pos = to_sysfs_dirent(node); - } while (pos && pos->s_ns != ns); - return pos; -} - -static int sysfs_readdir(struct file *file, struct dir_context *ctx) -{ - struct dentry *dentry = file->f_path.dentry; - struct sysfs_dirent *parent_sd = dentry->d_fsdata; - struct sysfs_dirent *pos = file->private_data; - enum kobj_ns_type type; - const void *ns; - - type = sysfs_ns_type(parent_sd); - ns = sysfs_info(dentry->d_sb)->ns[type]; - - if (!dir_emit_dots(file, ctx)) - return 0; - mutex_lock(&sysfs_mutex); - for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); - pos; - pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { - const char *name = pos->s_name; - unsigned int type = dt_type(pos); - int len = strlen(name); - ino_t ino = pos->s_ino; - ctx->pos = pos->s_hash; - file->private_data = sysfs_get(pos); - - mutex_unlock(&sysfs_mutex); - if (!dir_emit(ctx, name, len, ino, type)) - return 0; - mutex_lock(&sysfs_mutex); - } - mutex_unlock(&sysfs_mutex); - file->private_data = NULL; - ctx->pos = INT_MAX; - return 0; -} - -static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) -{ - struct inode *inode = file_inode(file); - loff_t ret; - - mutex_lock(&inode->i_mutex); - ret = generic_file_llseek(file, offset, whence); - mutex_unlock(&inode->i_mutex); - - return ret; -} - -const struct file_operations sysfs_dir_operations = { - .read = generic_read_dir, - .iterate = sysfs_readdir, - .release = sysfs_dir_release, - .llseek = sysfs_dir_llseek, -}; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 35e7d08..ac77d2b 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -14,59 +14,12 @@ #include <linux/kobject.h> #include <linux/kallsyms.h> #include <linux/slab.h> -#include <linux/fsnotify.h> -#include <linux/namei.h> -#include <linux/poll.h> #include <linux/list.h> #include <linux/mutex.h> -#include <linux/limits.h> -#include <linux/uaccess.h> #include <linux/seq_file.h> -#include <linux/mm.h> #include "sysfs.h" - -/* - * There's one sysfs_open_file for each open file and one sysfs_open_dirent - * for each sysfs_dirent with one or more open files. - * - * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is - * protected by sysfs_open_dirent_lock. - * - * filp->private_data points to seq_file whose ->private points to - * sysfs_open_file. sysfs_open_files are chained at - * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex. - */ -static DEFINE_SPINLOCK(sysfs_open_dirent_lock); -static DEFINE_MUTEX(sysfs_open_file_mutex); - -struct sysfs_open_dirent { - atomic_t refcnt; - atomic_t event; - wait_queue_head_t poll; - struct list_head files; /* goes through sysfs_open_file.list */ -}; - -struct sysfs_open_file { - struct sysfs_dirent *sd; - struct file *file; - struct mutex mutex; - int event; - struct list_head list; - - bool mmapped; - const struct vm_operations_struct *vm_ops; -}; - -static bool sysfs_is_bin(struct sysfs_dirent *sd) -{ - return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR; -} - -static struct sysfs_open_file *sysfs_of(struct file *file) -{ - return ((struct seq_file *)file->private_data)->private; -} +#include "../kernfs/kernfs-internal.h" /* * Determine ktype->sysfs_ops for the given sysfs_dirent. This function @@ -74,9 +27,9 @@ static struct sysfs_open_file *sysfs_of(struct file *file) */ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) { - struct kobject *kobj = sd->s_parent->s_dir.kobj; + struct kobject *kobj = sd->s_parent->priv; - if (!sysfs_ignore_lockdep(sd)) + if (sd->s_flags & SYSFS_FLAG_LOCKDEP) lockdep_assert_held(sd); return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; } @@ -86,13 +39,13 @@ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) * details like buffering and seeking. The following function pipes * sysfs_ops->show() result through seq_file. */ -static int sysfs_seq_show(struct seq_file *sf, void *v) +static int sysfs_kf_seq_show(struct seq_file *sf, void *v) { struct sysfs_open_file *of = sf->private; - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - const struct sysfs_ops *ops; - char *buf; + struct kobject *kobj = of->sd->s_parent->priv; + const struct sysfs_ops *ops = sysfs_file_ops(of->sd); ssize_t count; + char *buf; /* acquire buffer and ensure that it's >= PAGE_SIZE */ count = seq_get_buf(sf, &buf); @@ -102,34 +55,15 @@ static int sysfs_seq_show(struct seq_file *sf, void *v) } /* - * Need @of->sd for attr and ops, its parent for kobj. @of->mutex - * nests outside active ref and is just to ensure that the ops - * aren't called concurrently for the same open file. + * Invoke show(). Control may reach here via seq file lseek even + * if @ops->show() isn't implemented. */ - mutex_lock(&of->mutex); - if (!sysfs_get_active(of->sd)) { - mutex_unlock(&of->mutex); - return -ENODEV; + if (ops->show) { + count = ops->show(kobj, of->sd->priv, buf); + if (count < 0) + return count; } - of->event = atomic_read(&of->sd->s_attr.open->event); - - /* - * Lookup @ops and invoke show(). Control may reach here via seq - * file lseek even if @ops->show() isn't implemented. - */ - ops = sysfs_file_ops(of->sd); - if (ops->show) - count = ops->show(kobj, of->sd->s_attr.attr, buf); - else - count = 0; - - sysfs_put_active(of->sd); - mutex_unlock(&of->mutex); - - if (count < 0) - return count; - /* * The code works fine with PAGE_SIZE return but it's likely to * indicate truncated result or overflow in normal use cases. @@ -144,726 +78,190 @@ static int sysfs_seq_show(struct seq_file *sf, void *v) return 0; } -/* - * Read method for bin files. As reading a bin file can have side-effects, - * the exact offset and bytes specified in read(2) call should be passed to - * the read callback making it difficult to use seq_file. Implement - * simplistic custom buffering for bin files. - */ -static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf, - size_t bytes, loff_t *off) +static ssize_t sysfs_kf_bin_read(struct sysfs_open_file *of, char *buf, + size_t count, loff_t pos) { - struct sysfs_open_file *of = sysfs_of(file); - struct bin_attribute *battr = of->sd->s_attr.bin_attr; - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - loff_t size = file_inode(file)->i_size; - int count = min_t(size_t, bytes, PAGE_SIZE); - loff_t offs = *off; - char *buf; + struct bin_attribute *battr = of->sd->priv; + struct kobject *kobj = of->sd->s_parent->priv; + loff_t size = file_inode(of->file)->i_size; - if (!bytes) + if (!count) return 0; if (size) { - if (offs > size) + if (pos > size) return 0; - if (offs + count > size) - count = size - offs; - } - - buf = kmalloc(count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - /* need of->sd for battr, its parent for kobj */ - mutex_lock(&of->mutex); - if (!sysfs_get_active(of->sd)) { - count = -ENODEV; - mutex_unlock(&of->mutex); - goto out_free; + if (pos + count > size) + count = size - pos; } - if (battr->read) - count = battr->read(file, kobj, battr, buf, offs, count); - else - count = -EIO; - - sysfs_put_active(of->sd); - mutex_unlock(&of->mutex); - - if (count < 0) - goto out_free; - - if (copy_to_user(userbuf, buf, count)) { - count = -EFAULT; - goto out_free; - } - - pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); + if (!battr->read) + return -EIO; - *off = offs + count; - - out_free: - kfree(buf); - return count; + return battr->read(of->file, kobj, battr, buf, pos, count); } -/** - * flush_write_buffer - push buffer to kobject - * @of: open file - * @buf: data buffer for file - * @off: file offset to write to - * @count: number of bytes - * - * Get the correct pointers for the kobject and the attribute we're dealing - * with, then call the store() method for it with @buf. - */ -static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off, - size_t count) +/* kernfs write callback for regular sysfs files */ +static ssize_t sysfs_kf_write(struct sysfs_open_file *of, char *buf, + size_t count, loff_t pos) { - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - int rc = 0; - - /* - * Need @of->sd for attr and ops, its parent for kobj. @of->mutex - * nests outside active ref and is just to ensure that the ops - * aren't called concurrently for the same open file. - */ - mutex_lock(&of->mutex); - if (!sysfs_get_active(of->sd)) { - mutex_unlock(&of->mutex); - return -ENODEV; - } - - if (sysfs_is_bin(of->sd)) { - struct bin_attribute *battr = of->sd->s_attr.bin_attr; + const struct sysfs_ops *ops = sysfs_file_ops(of->sd); + struct kobject *kobj = of->sd->s_parent->priv; - rc = -EIO; - if (battr->write) - rc = battr->write(of->file, kobj, battr, buf, off, - count); - } else { - const struct sysfs_ops *ops = sysfs_file_ops(of->sd); - - rc = ops->store(kobj, of->sd->s_attr.attr, buf, count); - } - - sysfs_put_active(of->sd); - mutex_unlock(&of->mutex); + if (!count) + return 0; - return rc; + return ops->store(kobj, of->sd->priv, buf, count); } -/** - * sysfs_write_file - write an attribute - * @file: file pointer - * @user_buf: data to write - * @count: number of bytes - * @ppos: starting offset - * - * Copy data in from userland and pass it to the matching - * sysfs_ops->store() by invoking flush_write_buffer(). - * - * There is no easy way for us to know if userspace is only doing a partial - * write, so we don't support them. We expect the entire buffer to come on - * the first write. Hint: if you're writing a value, first read the file, - * modify only the the value you're changing, then write entire buffer - * back. - */ -static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) +/* kernfs write callback for bin sysfs files */ +static ssize_t sysfs_kf_bin_write(struct sysfs_open_file *of, char *buf, + size_t count, loff_t pos) { - struct sysfs_open_file *of = sysfs_of(file); - ssize_t len = min_t(size_t, count, PAGE_SIZE); - loff_t size = file_inode(file)->i_size; - char *buf; + struct bin_attribute *battr = of->sd->priv; + struct kobject *kobj = of->sd->s_parent->priv; + loff_t size = file_inode(of->file)->i_size; - if (sysfs_is_bin(of->sd) && size) { - if (size <= *ppos) + if (size) { + if (size <= pos) return 0; - len = min_t(ssize_t, len, size - *ppos); + count = min_t(ssize_t, count, size - pos); } - - if (!len) + if (!count) return 0; - buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; + if (!battr->write) + return -EIO; - if (copy_from_user(buf, user_buf, len)) { - len = -EFAULT; - goto out_free; - } - buf[len] = '\0'; /* guarantee string termination */ - - len = flush_write_buffer(of, buf, *ppos, len); - if (len > 0) - *ppos += len; -out_free: - kfree(buf); - return len; + return battr->write(of->file, kobj, battr, buf, pos, count); } -static void sysfs_bin_vma_open(struct vm_area_struct *vma) +static int sysfs_kf_bin_mmap(struct sysfs_open_file *of, + struct vm_area_struct *vma) { - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - - if (!of->vm_ops) - return; - - if (!sysfs_get_active(of->sd)) - return; - - if (of->vm_ops->open) - of->vm_ops->open(vma); - - sysfs_put_active(of->sd); -} - -static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(of->sd)) - return VM_FAULT_SIGBUS; - - ret = VM_FAULT_SIGBUS; - if (of->vm_ops->fault) - ret = of->vm_ops->fault(vma, vmf); - - sysfs_put_active(of->sd); - return ret; -} - -static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(of->sd)) - return VM_FAULT_SIGBUS; - - ret = 0; - if (of->vm_ops->page_mkwrite) - ret = of->vm_ops->page_mkwrite(vma, vmf); - else - file_update_time(file); - - sysfs_put_active(of->sd); - return ret; -} - -static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return -EINVAL; - - if (!sysfs_get_active(of->sd)) - return -EINVAL; - - ret = -EINVAL; - if (of->vm_ops->access) - ret = of->vm_ops->access(vma, addr, buf, len, write); - - sysfs_put_active(of->sd); - return ret; -} - -#ifdef CONFIG_NUMA -static int sysfs_bin_set_policy(struct vm_area_struct *vma, - struct mempolicy *new) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return 0; - - if (!sysfs_get_active(of->sd)) - return -EINVAL; - - ret = 0; - if (of->vm_ops->set_policy) - ret = of->vm_ops->set_policy(vma, new); - - sysfs_put_active(of->sd); - return ret; -} - -static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma, - unsigned long addr) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - struct mempolicy *pol; - - if (!of->vm_ops) - return vma->vm_policy; - - if (!sysfs_get_active(of->sd)) - return vma->vm_policy; - - pol = vma->vm_policy; - if (of->vm_ops->get_policy) - pol = of->vm_ops->get_policy(vma, addr); - - sysfs_put_active(of->sd); - return pol; -} - -static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, - const nodemask_t *to, unsigned long flags) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return 0; - - if (!sysfs_get_active(of->sd)) - return 0; - - ret = 0; - if (of->vm_ops->migrate) - ret = of->vm_ops->migrate(vma, from, to, flags); - - sysfs_put_active(of->sd); - return ret; -} -#endif - -static const struct vm_operations_struct sysfs_bin_vm_ops = { - .open = sysfs_bin_vma_open, - .fault = sysfs_bin_fault, - .page_mkwrite = sysfs_bin_page_mkwrite, - .access = sysfs_bin_access, -#ifdef CONFIG_NUMA - .set_policy = sysfs_bin_set_policy, - .get_policy = sysfs_bin_get_policy, - .migrate = sysfs_bin_migrate, -#endif -}; - -static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct sysfs_open_file *of = sysfs_of(file); - struct bin_attribute *battr = of->sd->s_attr.bin_attr; - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - int rc; - - mutex_lock(&of->mutex); - - /* need of->sd for battr, its parent for kobj */ - rc = -ENODEV; - if (!sysfs_get_active(of->sd)) - goto out_unlock; + struct bin_attribute *battr = of->sd->priv; + struct kobject *kobj = of->sd->s_parent->priv; if (!battr->mmap) - goto out_put; - - rc = battr->mmap(file, kobj, battr, vma); - if (rc) - goto out_put; - - /* - * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() - * to satisfy versions of X which crash if the mmap fails: that - * substitutes a new vm_file, and we don't then want bin_vm_ops. - */ - if (vma->vm_file != file) - goto out_put; - - rc = -EINVAL; - if (of->mmapped && of->vm_ops != vma->vm_ops) - goto out_put; - - /* - * It is not possible to successfully wrap close. - * So error if someone is trying to use close. - */ - rc = -EINVAL; - if (vma->vm_ops && vma->vm_ops->close) - goto out_put; - - rc = 0; - of->mmapped = 1; - of->vm_ops = vma->vm_ops; - vma->vm_ops = &sysfs_bin_vm_ops; -out_put: - sysfs_put_active(of->sd); -out_unlock: - mutex_unlock(&of->mutex); - - return rc; -} - -/** - * sysfs_get_open_dirent - get or create sysfs_open_dirent - * @sd: target sysfs_dirent - * @of: sysfs_open_file for this instance of open - * - * If @sd->s_attr.open exists, increment its reference count; - * otherwise, create one. @of is chained to the files list. - * - * LOCKING: - * Kernel thread context (may sleep). - * - * RETURNS: - * 0 on success, -errno on failure. - */ -static int sysfs_get_open_dirent(struct sysfs_dirent *sd, - struct sysfs_open_file *of) -{ - struct sysfs_open_dirent *od, *new_od = NULL; - - retry: - mutex_lock(&sysfs_open_file_mutex); - spin_lock_irq(&sysfs_open_dirent_lock); - - if (!sd->s_attr.open && new_od) { - sd->s_attr.open = new_od; - new_od = NULL; - } - - od = sd->s_attr.open; - if (od) { - atomic_inc(&od->refcnt); - list_add_tail(&of->list, &od->files); - } - - spin_unlock_irq(&sysfs_open_dirent_lock); - mutex_unlock(&sysfs_open_file_mutex); - - if (od) { - kfree(new_od); - return 0; - } - - /* not there, initialize a new one and retry */ - new_od = kmalloc(sizeof(*new_od), GFP_KERNEL); - if (!new_od) - return -ENOMEM; + return -ENODEV; - atomic_set(&new_od->refcnt, 0); - atomic_set(&new_od->event, 1); - init_waitqueue_head(&new_od->poll); - INIT_LIST_HEAD(&new_od->files); - goto retry; + return battr->mmap(of->file, kobj, battr, vma); } -/** - * sysfs_put_open_dirent - put sysfs_open_dirent - * @sd: target sysfs_dirent - * @of: associated sysfs_open_file - * - * Put @sd->s_attr.open and unlink @of from the files list. If - * reference count reaches zero, disassociate and free it. - * - * LOCKING: - * None. - */ -static void sysfs_put_open_dirent(struct sysfs_dirent *sd, - struct sysfs_open_file *of) +void sysfs_notify(struct kobject *k, const char *dir, const char *attr) { - struct sysfs_open_dirent *od = sd->s_attr.open; - unsigned long flags; - - mutex_lock(&sysfs_open_file_mutex); - spin_lock_irqsave(&sysfs_open_dirent_lock, flags); + struct sysfs_dirent *sd = k->sd, *tmp; - if (of) - list_del(&of->list); - - if (atomic_dec_and_test(&od->refcnt)) - sd->s_attr.open = NULL; + if (sd && dir) + sd = kernfs_find_and_get(sd, dir); else - od = NULL; - - spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); - mutex_unlock(&sysfs_open_file_mutex); - - kfree(od); -} - -static int sysfs_open_file(struct inode *inode, struct file *file) -{ - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - struct sysfs_open_file *of; - bool has_read, has_write; - int error = -EACCES; - - /* need attr_sd for attr and ops, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) - return -ENODEV; + kernfs_get(sd); - if (sysfs_is_bin(attr_sd)) { - struct bin_attribute *battr = attr_sd->s_attr.bin_attr; - - has_read = battr->read || battr->mmap; - has_write = battr->write || battr->mmap; - } else { - const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); - - /* every kobject with an attribute needs a ktype assigned */ - if (WARN(!ops, KERN_ERR - "missing sysfs attribute operations for kobject: %s\n", - kobject_name(kobj))) - goto err_out; - - has_read = ops->show; - has_write = ops->store; + if (sd && attr) { + tmp = kernfs_find_and_get(sd, attr); + kernfs_put(sd); + sd = tmp; } - /* check perms and supported operations */ - if ((file->f_mode & FMODE_WRITE) && - (!(inode->i_mode & S_IWUGO) || !has_write)) - goto err_out; - - if ((file->f_mode & FMODE_READ) && - (!(inode->i_mode & S_IRUGO) || !has_read)) - goto err_out; - - /* allocate a sysfs_open_file for the file */ - error = -ENOMEM; - of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); - if (!of) - goto err_out; - - /* - * The following is done to give a different lockdep key to - * @of->mutex for files which implement mmap. This is a rather - * crude way to avoid false positive lockdep warning around - * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and - * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under - * which mm->mmap_sem nests, while holding @of->mutex. As each - * open file has a separate mutex, it's okay as long as those don't - * happen on the same file. At this point, we can't easily give - * each file a separate locking class. Let's differentiate on - * whether the file is bin or not for now. - */ - if (sysfs_is_bin(attr_sd)) - mutex_init(&of->mutex); - else - mutex_init(&of->mutex); - - of->sd = attr_sd; - of->file = file; - - /* - * Always instantiate seq_file even if read access doesn't use - * seq_file or is not requested. This unifies private data access - * and readable regular files are the vast majority anyway. - */ - if (sysfs_is_bin(attr_sd)) - error = single_open(file, NULL, of); - else - error = single_open(file, sysfs_seq_show, of); - if (error) - goto err_free; - - /* seq_file clears PWRITE unconditionally, restore it if WRITE */ - if (file->f_mode & FMODE_WRITE) - file->f_mode |= FMODE_PWRITE; - - /* make sure we have open dirent struct */ - error = sysfs_get_open_dirent(attr_sd, of); - if (error) - goto err_close; - - /* open succeeded, put active references */ - sysfs_put_active(attr_sd); - return 0; - -err_close: - single_release(inode, file); -err_free: - kfree(of); -err_out: - sysfs_put_active(attr_sd); - return error; -} - -static int sysfs_release(struct inode *inode, struct file *filp) -{ - struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata; - struct sysfs_open_file *of = sysfs_of(filp); - - sysfs_put_open_dirent(sd, of); - single_release(inode, filp); - kfree(of); - - return 0; -} - -void sysfs_unmap_bin_file(struct sysfs_dirent *sd) -{ - struct sysfs_open_dirent *od; - struct sysfs_open_file *of; - - if (!sysfs_is_bin(sd)) - return; - - spin_lock_irq(&sysfs_open_dirent_lock); - od = sd->s_attr.open; - if (od) - atomic_inc(&od->refcnt); - spin_unlock_irq(&sysfs_open_dirent_lock); - if (!od) - return; - - mutex_lock(&sysfs_open_file_mutex); - list_for_each_entry(of, &od->files, list) { - struct inode *inode = file_inode(of->file); - unmap_mapping_range(inode->i_mapping, 0, 0, 1); + if (sd) { + kernfs_notify(sd); + kernfs_put(sd); } - mutex_unlock(&sysfs_open_file_mutex); - - sysfs_put_open_dirent(sd, NULL); -} - -/* Sysfs attribute files are pollable. The idea is that you read - * the content and then you use 'poll' or 'select' to wait for - * the content to change. When the content changes (assuming the - * manager for the kobject supports notification), poll will - * return POLLERR|POLLPRI, and select will return the fd whether - * it is waiting for read, write, or exceptions. - * Once poll/select indicates that the value has changed, you - * need to close and re-open the file, or seek to 0 and read again. - * Reminder: this only works for attributes which actively support - * it, and it is not possible to test an attribute from userspace - * to see if it supports poll (Neither 'poll' nor 'select' return - * an appropriate error code). When in doubt, set a suitable timeout value. - */ -static unsigned int sysfs_poll(struct file *filp, poll_table *wait) -{ - struct sysfs_open_file *of = sysfs_of(filp); - struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; - struct sysfs_open_dirent *od = attr_sd->s_attr.open; - - /* need parent for the kobj, grab both */ - if (!sysfs_get_active(attr_sd)) - goto trigger; - - poll_wait(filp, &od->poll, wait); - - sysfs_put_active(attr_sd); - - if (of->event != atomic_read(&od->event)) - goto trigger; - - return DEFAULT_POLLMASK; - - trigger: - return DEFAULT_POLLMASK|POLLERR|POLLPRI; } +EXPORT_SYMBOL_GPL(sysfs_notify); -void sysfs_notify_dirent(struct sysfs_dirent *sd) -{ - struct sysfs_open_dirent *od; - unsigned long flags; - - spin_lock_irqsave(&sysfs_open_dirent_lock, flags); - - if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) { - od = sd->s_attr.open; - if (od) { - atomic_inc(&od->event); - wake_up_interruptible(&od->poll); - } - } - - spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); -} -EXPORT_SYMBOL_GPL(sysfs_notify_dirent); +static const struct kernfs_ops sysfs_file_kfops_empty = { +}; -void sysfs_notify(struct kobject *k, const char *dir, const char *attr) -{ - struct sysfs_dirent *sd = k->sd; +static const struct kernfs_ops sysfs_file_kfops_ro = { + .seq_show = sysfs_kf_seq_show, +}; - mutex_lock(&sysfs_mutex); +static const struct kernfs_ops sysfs_file_kfops_wo = { + .write = sysfs_kf_write, +}; - if (sd && dir) - sd = sysfs_find_dirent(sd, dir, NULL); - if (sd && attr) - sd = sysfs_find_dirent(sd, attr, NULL); - if (sd) - sysfs_notify_dirent(sd); +static const struct kernfs_ops sysfs_file_kfops_rw = { + .seq_show = sysfs_kf_seq_show, + .write = sysfs_kf_write, +}; - mutex_unlock(&sysfs_mutex); -} -EXPORT_SYMBOL_GPL(sysfs_notify); +static const struct kernfs_ops sysfs_bin_kfops_ro = { + .read = sysfs_kf_bin_read, +}; -const struct file_operations sysfs_file_operations = { - .read = seq_read, - .write = sysfs_write_file, - .llseek = generic_file_llseek, - .open = sysfs_open_file, - .release = sysfs_release, - .poll = sysfs_poll, +static const struct kernfs_ops sysfs_bin_kfops_wo = { + .write = sysfs_kf_bin_write, }; -const struct file_operations sysfs_bin_operations = { - .read = sysfs_bin_read, - .write = sysfs_write_file, - .llseek = generic_file_llseek, - .mmap = sysfs_bin_mmap, - .open = sysfs_open_file, - .release = sysfs_release, - .poll = sysfs_poll, +static const struct kernfs_ops sysfs_bin_kfops_rw = { + .read = sysfs_kf_bin_read, + .write = sysfs_kf_bin_write, + .mmap = sysfs_kf_bin_mmap, }; int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, - umode_t amode, const void *ns) + const struct attribute *attr, bool is_bin, + umode_t mode, const void *ns) { - umode_t mode = (amode & S_IALLUGO) | S_IFREG; - struct sysfs_addrm_cxt acxt; + struct lock_class_key *key = NULL; + const struct kernfs_ops *ops; struct sysfs_dirent *sd; - int rc; - - sd = sysfs_new_dirent(attr->name, mode, type); - if (!sd) - return -ENOMEM; + loff_t size; - sd->s_ns = ns; - sd->s_attr.attr = (void *)attr; - sysfs_dirent_init_lockdep(sd); + if (!is_bin) { + struct kobject *kobj = dir_sd->priv; + const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops; - sysfs_addrm_start(&acxt); - rc = sysfs_add_one(&acxt, sd, dir_sd); - sysfs_addrm_finish(&acxt); - - if (rc) - sysfs_put(sd); + /* every kobject with an attribute needs a ktype assigned */ + if (WARN(!sysfs_ops, KERN_ERR + "missing sysfs attribute operations for kobject: %s\n", + kobject_name(kobj))) + return -EINVAL; + + if (sysfs_ops->show && sysfs_ops->store) + ops = &sysfs_file_kfops_rw; + else if (sysfs_ops->show) + ops = &sysfs_file_kfops_ro; + else if (sysfs_ops->store) + ops = &sysfs_file_kfops_wo; + else + ops = &sysfs_file_kfops_empty; + + size = PAGE_SIZE; + } else { + struct bin_attribute *battr = (void *)attr; + + if ((battr->read && battr->write) || battr->mmap) + ops = &sysfs_bin_kfops_rw; + else if (battr->read) + ops = &sysfs_bin_kfops_ro; + else if (battr->write) + ops = &sysfs_bin_kfops_wo; + else + ops = &sysfs_file_kfops_empty; + + size = battr->size; + } - return rc; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!attr->ignore_lockdep) + key = attr->key ?: (struct lock_class_key *)&attr->skey; +#endif + sd = kernfs_create_file_ns_key(dir_sd, attr->name, mode, size, + ops, (void *)attr, ns, key); + if (IS_ERR(sd)) { + if (PTR_ERR(sd) == -EEXIST) + sysfs_warn_dup(dir_sd, attr->name); + return PTR_ERR(sd); + } + return 0; } - int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, - int type) + bool is_bin) { - return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); + return sysfs_add_file_mode_ns(dir_sd, attr, is_bin, attr->mode, NULL); } /** @@ -877,8 +275,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, { BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, - attr->mode, ns); + return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns); } EXPORT_SYMBOL_GPL(sysfs_create_file_ns); @@ -909,16 +306,18 @@ int sysfs_add_file_to_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; int error; - if (group) - dir_sd = sysfs_get_dirent(kobj->sd, group); - else - dir_sd = sysfs_get(kobj->sd); + if (group) { + dir_sd = kernfs_find_and_get(kobj->sd, group); + } else { + dir_sd = kobj->sd; + kernfs_get(dir_sd); + } if (!dir_sd) return -ENOENT; - error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); - sysfs_put(dir_sd); + error = sysfs_add_file(dir_sd, attr, false); + kernfs_put(dir_sd); return error; } @@ -938,19 +337,16 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, struct iattr newattrs; int rc; - mutex_lock(&sysfs_mutex); - - rc = -ENOENT; - sd = sysfs_find_dirent(kobj->sd, attr->name, NULL); + sd = kernfs_find_and_get(kobj->sd, attr->name); if (!sd) - goto out; + return -ENOENT; newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE; - rc = sysfs_sd_setattr(sd, &newattrs); - out: - mutex_unlock(&sysfs_mutex); + rc = kernfs_setattr(sd, &newattrs); + + kernfs_put(sd); return rc; } EXPORT_SYMBOL_GPL(sysfs_chmod_file); @@ -968,7 +364,7 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, { struct sysfs_dirent *dir_sd = kobj->sd; - sysfs_hash_and_remove(dir_sd, attr->name, ns); + kernfs_remove_by_name_ns(dir_sd, attr->name, ns); } EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); @@ -991,13 +387,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj, { struct sysfs_dirent *dir_sd; - if (group) - dir_sd = sysfs_get_dirent(kobj->sd, group); - else - dir_sd = sysfs_get(kobj->sd); + if (group) { + dir_sd = kernfs_find_and_get(kobj->sd, group); + } else { + dir_sd = kobj->sd; + kernfs_get(dir_sd); + } + if (dir_sd) { - sysfs_hash_and_remove(dir_sd, attr->name, NULL); - sysfs_put(dir_sd); + kernfs_remove_by_name(dir_sd, attr->name); + kernfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); @@ -1012,7 +411,7 @@ int sysfs_create_bin_file(struct kobject *kobj, { BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); + return sysfs_add_file(kobj->sd, &attr->attr, true); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); @@ -1024,7 +423,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_bin_file); void sysfs_remove_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { - sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); + kernfs_remove_by_name(kobj->sd, attr->attr.name); } EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 1898a10..7177532 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, if (grp->attrs) for (attr = grp->attrs; *attr; attr++) - sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); + kernfs_remove_by_name(dir_sd, (*attr)->name); if (grp->bin_attrs) for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) sysfs_remove_bin_file(kobj, *bin_attr); @@ -49,15 +49,13 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, * re-adding (if required) the file. */ if (update) - sysfs_hash_and_remove(dir_sd, (*attr)->name, - NULL); + kernfs_remove_by_name(dir_sd, (*attr)->name); if (grp->is_visible) { mode = grp->is_visible(kobj, *attr, i); if (!mode) continue; } - error = sysfs_add_file_mode_ns(dir_sd, *attr, - SYSFS_KOBJ_ATTR, + error = sysfs_add_file_mode_ns(dir_sd, *attr, false, (*attr)->mode | mode, NULL); if (unlikely(error)) @@ -102,18 +100,21 @@ static int internal_create_group(struct kobject *kobj, int update, return -EINVAL; } if (grp->name) { - error = sysfs_create_subdir(kobj, grp->name, &sd); - if (error) - return error; + sd = kernfs_create_dir(kobj->sd, grp->name, kobj); + if (IS_ERR(sd)) { + if (PTR_ERR(sd) == -EEXIST) + sysfs_warn_dup(kobj->sd, grp->name); + return PTR_ERR(sd); + } } else sd = kobj->sd; - sysfs_get(sd); + kernfs_get(sd); error = create_files(sd, kobj, grp, update); if (error) { if (grp->name) - sysfs_remove(sd); + kernfs_remove(sd); } - sysfs_put(sd); + kernfs_put(sd); return error; } @@ -207,21 +208,23 @@ void sysfs_remove_group(struct kobject *kobj, struct sysfs_dirent *sd; if (grp->name) { - sd = sysfs_get_dirent(dir_sd, grp->name); + sd = kernfs_find_and_get(dir_sd, grp->name); if (!sd) { WARN(!sd, KERN_WARNING "sysfs group %p not found for kobject '%s'\n", grp, kobject_name(kobj)); return; } - } else - sd = sysfs_get(dir_sd); + } else { + sd = dir_sd; + kernfs_get(sd); + } remove_files(sd, kobj, grp); if (grp->name) - sysfs_remove(sd); + kernfs_remove(sd); - sysfs_put(sd); + kernfs_put(sd); } EXPORT_SYMBOL_GPL(sysfs_remove_group); @@ -262,17 +265,17 @@ int sysfs_merge_group(struct kobject *kobj, struct attribute *const *attr; int i; - dir_sd = sysfs_get_dirent(kobj->sd, grp->name); + dir_sd = kernfs_find_and_get(kobj->sd, grp->name); if (!dir_sd) return -ENOENT; for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) - error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); + error = sysfs_add_file(dir_sd, *attr, false); if (error) { while (--i >= 0) - sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); + kernfs_remove_by_name(dir_sd, (*--attr)->name); } - sysfs_put(dir_sd); + kernfs_put(dir_sd); return error; } @@ -289,11 +292,11 @@ void sysfs_unmerge_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; struct attribute *const *attr; - dir_sd = sysfs_get_dirent(kobj->sd, grp->name); + dir_sd = kernfs_find_and_get(kobj->sd, grp->name); if (dir_sd) { for (attr = grp->attrs; *attr; ++attr) - sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); - sysfs_put(dir_sd); + kernfs_remove_by_name(dir_sd, (*attr)->name); + kernfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_unmerge_group); @@ -311,12 +314,12 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct sysfs_dirent *dir_sd; int error = 0; - dir_sd = sysfs_get_dirent(kobj->sd, group_name); + dir_sd = kernfs_find_and_get(kobj->sd, group_name); if (!dir_sd) return -ENOENT; error = sysfs_create_link_sd(dir_sd, target, link_name); - sysfs_put(dir_sd); + kernfs_put(dir_sd); return error; } @@ -333,10 +336,10 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, { struct sysfs_dirent *dir_sd; - dir_sd = sysfs_get_dirent(kobj->sd, group_name); + dir_sd = kernfs_find_and_get(kobj->sd, group_name); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, link_name, NULL); - sysfs_put(dir_sd); + kernfs_remove_by_name(dir_sd, link_name); + kernfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 834ec2c..e7e3aa8 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -14,146 +14,39 @@ #include <linux/fs.h> #include <linux/mount.h> -#include <linux/pagemap.h> #include <linux/init.h> -#include <linux/module.h> -#include <linux/magic.h> -#include <linux/slab.h> #include <linux/user_namespace.h> #include "sysfs.h" - -static struct vfsmount *sysfs_mnt; -struct kmem_cache *sysfs_dir_cachep; - -static const struct super_operations sysfs_ops = { - .statfs = simple_statfs, - .drop_inode = generic_delete_inode, - .evict_inode = sysfs_evict_inode, -}; - -struct sysfs_dirent sysfs_root = { - .s_name = "", - .s_count = ATOMIC_INIT(1), - .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), - .s_mode = S_IFDIR | S_IRUGO | S_IXUGO, - .s_ino = 1, -}; - -static int sysfs_fill_super(struct super_block *sb, void *data, int silent) -{ - struct inode *inode; - struct dentry *root; - - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; - sb->s_op = &sysfs_ops; - sb->s_time_gran = 1; - - /* get root inode, initialize and unlock it */ - mutex_lock(&sysfs_mutex); - inode = sysfs_get_inode(sb, &sysfs_root); - mutex_unlock(&sysfs_mutex); - if (!inode) { - pr_debug("sysfs: could not get root inode\n"); - return -ENOMEM; - } - - /* instantiate and link root dentry */ - root = d_make_root(inode); - if (!root) { - pr_debug("%s: could not get root dentry!\n", __func__); - return -ENOMEM; - } - root->d_fsdata = &sysfs_root; - sb->s_root = root; - sb->s_d_op = &sysfs_dentry_ops; - return 0; -} - -static int sysfs_test_super(struct super_block *sb, void *data) -{ - struct sysfs_super_info *sb_info = sysfs_info(sb); - struct sysfs_super_info *info = data; - enum kobj_ns_type type; - int found = 1; - - for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { - if (sb_info->ns[type] != info->ns[type]) - found = 0; - } - return found; -} - -static int sysfs_set_super(struct super_block *sb, void *data) -{ - int error; - error = set_anon_super(sb, data); - if (!error) - sb->s_fs_info = data; - return error; -} - -static void free_sysfs_super_info(struct sysfs_super_info *info) -{ - int type; - for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) - kobj_ns_drop(type, info->ns[type]); - kfree(info); -} +static struct kernfs_root *sysfs_root; +struct sysfs_dirent *sysfs_root_sd; static struct dentry *sysfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct sysfs_super_info *info; - enum kobj_ns_type type; - struct super_block *sb; - int error; + struct dentry *root; + void *ns; if (!(flags & MS_KERNMOUNT)) { if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) return ERR_PTR(-EPERM); - for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { - if (!kobj_ns_current_may_mount(type)) - return ERR_PTR(-EPERM); - } - } - - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return ERR_PTR(-ENOMEM); - - for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) - info->ns[type] = kobj_ns_grab_current(type); - - sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info); - if (IS_ERR(sb) || sb->s_fs_info != info) - free_sysfs_super_info(info); - if (IS_ERR(sb)) - return ERR_CAST(sb); - if (!sb->s_root) { - error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(sb); - return ERR_PTR(error); - } - sb->s_flags |= MS_ACTIVE; + if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) + return ERR_PTR(-EPERM); } - return dget(sb->s_root); + ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns); + if (IS_ERR(root)) + kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); + return root; } static void sysfs_kill_sb(struct super_block *sb) { - struct sysfs_super_info *info = sysfs_info(sb); - /* Remove the superblock from fs_supers/s_instances - * so we can't find it, before freeing sysfs_super_info. - */ - kill_anon_super(sb); - free_sysfs_super_info(info); + kernfs_kill_sb(sb); + kobj_ns_drop(KOBJ_NS_TYPE_NET, (void *)kernfs_super_ns(sb)); } static struct file_system_type sysfs_fs_type = { @@ -165,48 +58,19 @@ static struct file_system_type sysfs_fs_type = { int __init sysfs_init(void) { - int err = -ENOMEM; + int err; - sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache", - sizeof(struct sysfs_dirent), - 0, 0, NULL); - if (!sysfs_dir_cachep) - goto out; + sysfs_root = kernfs_create_root(NULL); + if (IS_ERR(sysfs_root)) + return PTR_ERR(sysfs_root); - err = sysfs_inode_init(); - if (err) - goto out_err; + sysfs_root_sd = sysfs_root->sd; err = register_filesystem(&sysfs_fs_type); - if (!err) { - sysfs_mnt = kern_mount(&sysfs_fs_type); - if (IS_ERR(sysfs_mnt)) { - printk(KERN_ERR "sysfs: could not mount!\n"); - err = PTR_ERR(sysfs_mnt); - sysfs_mnt = NULL; - unregister_filesystem(&sysfs_fs_type); - goto out_err; - } - } else - goto out_err; -out: - return err; -out_err: - kmem_cache_destroy(sysfs_dir_cachep); - sysfs_dir_cachep = NULL; - goto out; -} - -#undef sysfs_get -struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) -{ - return __sysfs_get(sd); -} -EXPORT_SYMBOL_GPL(sysfs_get); + if (err) { + kernfs_destroy_root(sysfs_root); + return err; + } -#undef sysfs_put -void sysfs_put(struct sysfs_dirent *sd) -{ - __sysfs_put(sd); + return 0; } -EXPORT_SYMBOL_GPL(sysfs_put); diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 3ae3f1b..1b8c9ed 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -11,11 +11,8 @@ */ #include <linux/fs.h> -#include <linux/gfp.h> -#include <linux/mount.h> #include <linux/module.h> #include <linux/kobject.h> -#include <linux/namei.h> #include <linux/mutex.h> #include <linux/security.h> @@ -25,11 +22,7 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, struct kobject *target, const char *name, int warn) { - struct sysfs_dirent *target_sd = NULL; - struct sysfs_dirent *sd = NULL; - struct sysfs_addrm_cxt acxt; - enum kobj_ns_type ns_type; - int error; + struct sysfs_dirent *sd, *target_sd = NULL; BUG_ON(!name || !parent_sd); @@ -39,53 +32,24 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, * sysfs_remove_dir() for details. */ spin_lock(&sysfs_symlink_target_lock); - if (target->sd) - target_sd = sysfs_get(target->sd); + if (target->sd) { + target_sd = target->sd; + kernfs_get(target_sd); + } spin_unlock(&sysfs_symlink_target_lock); - error = -ENOENT; if (!target_sd) - goto out_put; - - error = -ENOMEM; - sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK); - if (!sd) - goto out_put; + return -ENOENT; - ns_type = sysfs_ns_type(parent_sd); - if (ns_type) - sd->s_ns = target_sd->s_ns; - sd->s_symlink.target_sd = target_sd; - target_sd = NULL; /* reference is now owned by the symlink */ - - sysfs_addrm_start(&acxt); - /* Symlinks must be between directories with the same ns_type */ - if (!ns_type || - (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) { - if (warn) - error = sysfs_add_one(&acxt, sd, parent_sd); - else - error = __sysfs_add_one(&acxt, sd, parent_sd); - } else { - error = -EINVAL; - WARN(1, KERN_WARNING - "sysfs: symlink across ns_types %s/%s -> %s/%s\n", - parent_sd->s_name, - sd->s_name, - sd->s_symlink.target_sd->s_parent->s_name, - sd->s_symlink.target_sd->s_name); - } - sysfs_addrm_finish(&acxt); + sd = kernfs_create_link(parent_sd, name, target_sd); + kernfs_put(target_sd); - if (error) - goto out_put; + if (!IS_ERR(sd)) + return 0; - return 0; - - out_put: - sysfs_put(target_sd); - sysfs_put(sd); - return error; + if (warn && PTR_ERR(sd) == -EEXIST) + sysfs_warn_dup(parent_sd, name); + return PTR_ERR(sd); } /** @@ -106,7 +70,7 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, struct sysfs_dirent *parent_sd = NULL; if (!kobj) - parent_sd = &sysfs_root; + parent_sd = sysfs_root_sd; else parent_sd = kobj->sd; @@ -164,10 +128,10 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, * sysfs_remove_dir() for details. */ spin_lock(&sysfs_symlink_target_lock); - if (targ->sd && sysfs_ns_type(kobj->sd)) + if (targ->sd && kernfs_ns_enabled(kobj->sd)) ns = targ->sd->s_ns; spin_unlock(&sysfs_symlink_target_lock); - sysfs_hash_and_remove(kobj->sd, name, ns); + kernfs_remove_by_name_ns(kobj->sd, name, ns); } /** @@ -180,11 +144,11 @@ void sysfs_remove_link(struct kobject *kobj, const char *name) struct sysfs_dirent *parent_sd = NULL; if (!kobj) - parent_sd = &sysfs_root; + parent_sd = sysfs_root_sd; else parent_sd = kobj->sd; - sysfs_hash_and_remove(parent_sd, name, NULL); + kernfs_remove_by_name(parent_sd, name); } EXPORT_SYMBOL_GPL(sysfs_remove_link); @@ -206,7 +170,7 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, int result; if (!kobj) - parent_sd = &sysfs_root; + parent_sd = sysfs_root_sd; else parent_sd = kobj->sd; @@ -214,117 +178,20 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, old_ns = targ->sd->s_ns; result = -ENOENT; - sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); + sd = kernfs_find_and_get_ns(parent_sd, old, old_ns); if (!sd) goto out; result = -EINVAL; if (sysfs_type(sd) != SYSFS_KOBJ_LINK) goto out; - if (sd->s_symlink.target_sd->s_dir.kobj != targ) + if (sd->s_symlink.target_sd->priv != targ) goto out; - result = sysfs_rename(sd, parent_sd, new, new_ns); + result = kernfs_rename_ns(sd, parent_sd, new, new_ns); out: - sysfs_put(sd); + kernfs_put(sd); return result; } EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); - -static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, - struct sysfs_dirent *target_sd, char *path) -{ - struct sysfs_dirent *base, *sd; - char *s = path; - int len = 0; - - /* go up to the root, stop at the base */ - base = parent_sd; - while (base->s_parent) { - sd = target_sd->s_parent; - while (sd->s_parent && base != sd) - sd = sd->s_parent; - - if (base == sd) - break; - - strcpy(s, "../"); - s += 3; - base = base->s_parent; - } - - /* determine end of target string for reverse fillup */ - sd = target_sd; - while (sd->s_parent && sd != base) { - len += strlen(sd->s_name) + 1; - sd = sd->s_parent; - } - - /* check limits */ - if (len < 2) - return -EINVAL; - len--; - if ((s - path) + len > PATH_MAX) - return -ENAMETOOLONG; - - /* reverse fillup of target string from target to base */ - sd = target_sd; - while (sd->s_parent && sd != base) { - int slen = strlen(sd->s_name); - - len -= slen; - strncpy(s + len, sd->s_name, slen); - if (len) - s[--len] = '/'; - - sd = sd->s_parent; - } - - return 0; -} - -static int sysfs_getlink(struct dentry *dentry, char *path) -{ - struct sysfs_dirent *sd = dentry->d_fsdata; - struct sysfs_dirent *parent_sd = sd->s_parent; - struct sysfs_dirent *target_sd = sd->s_symlink.target_sd; - int error; - - mutex_lock(&sysfs_mutex); - error = sysfs_get_target_path(parent_sd, target_sd, path); - mutex_unlock(&sysfs_mutex); - - return error; -} - -static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - int error = -ENOMEM; - unsigned long page = get_zeroed_page(GFP_KERNEL); - if (page) { - error = sysfs_getlink(dentry, (char *) page); - if (error < 0) - free_page((unsigned long)page); - } - nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); - return NULL; -} - -static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *page = nd_get_link(nd); - if (!IS_ERR(page)) - free_page((unsigned long)page); -} - -const struct inode_operations sysfs_symlink_inode_operations = { - .setxattr = sysfs_setxattr, - .readlink = generic_readlink, - .follow_link = sysfs_follow_link, - .put_link = sysfs_put_link, - .setattr = sysfs_setattr, - .getattr = sysfs_getattr, - .permission = sysfs_permission, -}; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 0af09fb..c8e395b 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -8,248 +8,36 @@ * This file is released under the GPLv2. */ -#include <linux/lockdep.h> -#include <linux/kobject_ns.h> -#include <linux/fs.h> -#include <linux/rbtree.h> +#ifndef __SYSFS_INTERNAL_H +#define __SYSFS_INTERNAL_H -struct sysfs_open_dirent; - -/* type-specific structures for sysfs_dirent->s_* union members */ -struct sysfs_elem_dir { - struct kobject *kobj; - - unsigned long subdirs; - /* children rbtree starts here and goes through sd->s_rb */ - struct rb_root children; -}; - -struct sysfs_elem_symlink { - struct sysfs_dirent *target_sd; -}; - -struct sysfs_elem_attr { - union { - struct attribute *attr; - struct bin_attribute *bin_attr; - }; - struct sysfs_open_dirent *open; -}; - -struct sysfs_inode_attrs { - struct iattr ia_iattr; - void *ia_secdata; - u32 ia_secdata_len; -}; - -/* - * sysfs_dirent - the building block of sysfs hierarchy. Each and - * every sysfs node is represented by single sysfs_dirent. - * - * As long as s_count reference is held, the sysfs_dirent itself is - * accessible. Dereferencing s_elem or any other outer entity - * requires s_active reference. - */ -struct sysfs_dirent { - atomic_t s_count; - atomic_t s_active; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif - struct sysfs_dirent *s_parent; - const char *s_name; - - struct rb_node s_rb; - - union { - struct completion *completion; - struct sysfs_dirent *removed_list; - } u; - - const void *s_ns; /* namespace tag */ - unsigned int s_hash; /* ns + name hash */ - union { - struct sysfs_elem_dir s_dir; - struct sysfs_elem_symlink s_symlink; - struct sysfs_elem_attr s_attr; - }; - - unsigned short s_flags; - umode_t s_mode; - unsigned int s_ino; - struct sysfs_inode_attrs *s_iattr; -}; - -#define SD_DEACTIVATED_BIAS INT_MIN - -#define SYSFS_TYPE_MASK 0x00ff -#define SYSFS_DIR 0x0001 -#define SYSFS_KOBJ_ATTR 0x0002 -#define SYSFS_KOBJ_BIN_ATTR 0x0004 -#define SYSFS_KOBJ_LINK 0x0008 -#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) -#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) - -/* identify any namespace tag on sysfs_dirents */ -#define SYSFS_NS_TYPE_MASK 0xf00 -#define SYSFS_NS_TYPE_SHIFT 8 - -#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) -#define SYSFS_FLAG_REMOVED 0x02000 - -static inline unsigned int sysfs_type(struct sysfs_dirent *sd) -{ - return sd->s_flags & SYSFS_TYPE_MASK; -} - -/* - * Return any namespace tags on this dirent. - * enum kobj_ns_type is defined in linux/kobject.h - */ -static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) -{ - return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT; -} - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - -#define sysfs_dirent_init_lockdep(sd) \ -do { \ - struct attribute *attr = sd->s_attr.attr; \ - struct lock_class_key *key = attr->key; \ - if (!key) \ - key = &attr->skey; \ - \ - lockdep_init_map(&sd->dep_map, "s_active", key, 0); \ -} while (0) - -/* Test for attributes that want to ignore lockdep for read-locking */ -static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) -{ - int type = sysfs_type(sd); - - return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) && - sd->s_attr.attr->ignore_lockdep; -} - -#else - -#define sysfs_dirent_init_lockdep(sd) do {} while (0) - -static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) -{ - return true; -} - -#endif - -/* - * Context structure to be used while adding/removing nodes. - */ -struct sysfs_addrm_cxt { - struct sysfs_dirent *removed; -}; +#include <linux/sysfs.h> /* * mount.c */ - -/* - * Each sb is associated with a set of namespace tags (i.e. - * the network namespace of the task which mounted this sysfs - * instance). - */ -struct sysfs_super_info { - void *ns[KOBJ_NS_TYPES]; -}; -#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) -extern struct sysfs_dirent sysfs_root; -extern struct kmem_cache *sysfs_dir_cachep; +extern struct sysfs_dirent *sysfs_root_sd; /* * dir.c */ -extern struct mutex sysfs_mutex; extern spinlock_t sysfs_symlink_target_lock; -extern const struct dentry_operations sysfs_dentry_ops; - -extern const struct file_operations sysfs_dir_operations; -extern const struct inode_operations sysfs_dir_inode_operations; -struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); -void sysfs_put_active(struct sysfs_dirent *sd); -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt); void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name); -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd); -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd); -void sysfs_remove(struct sysfs_dirent *sd); -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, - const void *ns); -void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); - -struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const unsigned char *name, - const void *ns); -struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); - -void release_sysfs_dirent(struct sysfs_dirent *sd); - -int sysfs_create_subdir(struct kobject *kobj, const char *name, - struct sysfs_dirent **p_sd); - -int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, - const char *new_name, const void *new_ns); - -static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) -{ - if (sd) { - WARN_ON(!atomic_read(&sd->s_count)); - atomic_inc(&sd->s_count); - } - return sd; -} -#define sysfs_get(sd) __sysfs_get(sd) - -static inline void __sysfs_put(struct sysfs_dirent *sd) -{ - if (sd && atomic_dec_and_test(&sd->s_count)) - release_sysfs_dirent(sd); -} -#define sysfs_put(sd) __sysfs_put(sd) - -/* - * inode.c - */ -struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); -void sysfs_evict_inode(struct inode *inode); -int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); -int sysfs_permission(struct inode *inode, int mask); -int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); -int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); -int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags); -int sysfs_inode_init(void); /* * file.c */ -extern const struct file_operations sysfs_file_operations; -extern const struct file_operations sysfs_bin_operations; - int sysfs_add_file(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type); - + const struct attribute *attr, bool is_bin); int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, + const struct attribute *attr, bool is_bin, umode_t amode, const void *ns); -void sysfs_unmap_bin_file(struct sysfs_dirent *sd); /* * symlink.c */ -extern const struct inode_operations sysfs_symlink_inode_operations; int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, const char *name); + +#endif /* __SYSFS_INTERNAL_H */ |