/* * Copyright (c) 2003-2009 Erez Zadok * Copyright (c) 2003-2006 Charles P. Wright * Copyright (c) 2005-2007 Josef 'Jeff' Sipek * Copyright (c) 2005-2006 Junjiro Okajima * Copyright (c) 2005 Arun M. Krishnakumar * Copyright (c) 2004-2006 David P. Quigley * Copyright (c) 2003-2004 Mohammad Nayyer Zubair * Copyright (c) 2003 Puja Gupta * Copyright (c) 2003 Harikesavan Krishnan * Copyright (c) 2003-2009 Stony Brook University * Copyright (c) 2003-2009 The Research Foundation of SUNY * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include "union.h" #include #include static void unionfs_fill_inode(struct dentry *dentry, struct inode *inode) { struct inode *lower_inode; struct dentry *lower_dentry; int bindex, bstart, bend; bstart = dbstart(dentry); bend = dbend(dentry); for (bindex = bstart; bindex <= bend; bindex++) { lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); if (!lower_dentry) { unionfs_set_lower_inode_idx(inode, bindex, NULL); continue; } /* Initialize the lower inode to the new lower inode. */ if (!lower_dentry->d_inode) continue; unionfs_set_lower_inode_idx(inode, bindex, igrab(lower_dentry->d_inode)); } ibstart(inode) = dbstart(dentry); ibend(inode) = dbend(dentry); /* Use attributes from the first branch. */ lower_inode = unionfs_lower_inode(inode); /* Use different set of inode ops for symlinks & directories */ if (S_ISLNK(lower_inode->i_mode)) inode->i_op = &unionfs_symlink_iops; else if (S_ISDIR(lower_inode->i_mode)) inode->i_op = &unionfs_dir_iops; /* Use different set of file ops for directories */ if (S_ISDIR(lower_inode->i_mode)) inode->i_fop = &unionfs_dir_fops; /* properly initialize special inodes */ if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); /* all well, copy inode attributes */ unionfs_copy_attr_all(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); } /* * Connect a unionfs inode dentry/inode with several lower ones. This is * the classic stackable file system "vnode interposition" action. * * @sb: unionfs's super_block */ struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb, int flag) { int err = 0; struct inode *inode; int need_fill_inode = 1; struct dentry *spliced = NULL; verify_locked(dentry); /* * We allocate our new inode below by calling unionfs_iget, * which will initialize some of the new inode's fields */ /* * On revalidate we've already got our own inode and just need * to fix it up. */ if (flag == INTERPOSE_REVAL) { inode = dentry->d_inode; UNIONFS_I(inode)->bstart = -1; UNIONFS_I(inode)->bend = -1; atomic_set(&UNIONFS_I(inode)->generation, atomic_read(&UNIONFS_SB(sb)->generation)); UNIONFS_I(inode)->lower_inodes = kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL); if (unlikely(!UNIONFS_I(inode)->lower_inodes)) { err = -ENOMEM; goto out; } } else { /* get unique inode number for unionfs */ inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO)); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out; } if (atomic_read(&inode->i_count) > 1) goto skip; } need_fill_inode = 0; unionfs_fill_inode(dentry, inode); skip: /* only (our) lookup wants to do a d_add */ switch (flag) { case INTERPOSE_DEFAULT: /* for operations which create new inodes */ d_add(dentry, inode); break; case INTERPOSE_REVAL_NEG: d_instantiate(dentry, inode); break; case INTERPOSE_LOOKUP: spliced = d_splice_alias(inode, dentry); if (spliced && spliced != dentry) { /* * d_splice can return a dentry if it was * disconnected and had to be moved. We must ensure * that the private data of the new dentry is * correct and that the inode info was filled * properly. Finally we must return this new * dentry. */ spliced->d_op = &unionfs_dops; spliced->d_fsdata = dentry->d_fsdata; dentry->d_fsdata = NULL; dentry = spliced; if (need_fill_inode) { need_fill_inode = 0; unionfs_fill_inode(dentry, inode); } goto out_spliced; } else if (!spliced) { if (need_fill_inode) { need_fill_inode = 0; unionfs_fill_inode(dentry, inode); goto out_spliced; } } break; case INTERPOSE_REVAL: /* Do nothing. */ break; default: printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n"); BUG(); } goto out; out_spliced: if (!err) return spliced; out: return ERR_PTR(err); } /* like interpose above, but for an already existing dentry */ void unionfs_reinterpose(struct dentry *dentry) { struct dentry *lower_dentry; struct inode *inode; int bindex, bstart, bend; verify_locked(dentry); /* This is pre-allocated inode */ inode = dentry->d_inode; bstart = dbstart(dentry); bend = dbend(dentry); for (bindex = bstart; bindex <= bend; bindex++) { lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); if (!lower_dentry) continue; if (!lower_dentry->d_inode) continue; if (unionfs_lower_inode_idx(inode, bindex)) continue; unionfs_set_lower_inode_idx(inode, bindex, igrab(lower_dentry->d_inode)); } ibstart(inode) = dbstart(dentry); ibend(inode) = dbend(dentry); } /* * make sure the branch we just looked up (nd) makes sense: * * 1) we're not trying to stack unionfs on top of unionfs * 2) it exists * 3) is a directory */ int check_branch(struct nameidata *nd) { /* XXX: remove in ODF code -- stacking unions allowed there */ if (!strcmp(nd->path.dentry->d_sb->s_type->name, UNIONFS_NAME)) return -EINVAL; if (!nd->path.dentry->d_inode) return -ENOENT; if (!S_ISDIR(nd->path.dentry->d_inode->i_mode)) return -ENOTDIR; return 0; } /* checks if two lower_dentries have overlapping branches */ static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2) { struct dentry *dent = NULL; dent = dent1; while ((dent != dent2) && (dent->d_parent != dent)) dent = dent->d_parent; if (dent == dent2) return 1; dent = dent2; while ((dent != dent1) && (dent->d_parent != dent)) dent = dent->d_parent; return (dent == dent1); } /* * Parse "ro" or "rw" options, but default to "rw" if no mode options was * specified. Fill the mode bits in @perms. If encounter an unknown * string, return -EINVAL. Otherwise return 0. */ int parse_branch_mode(const char *name, int *perms) { if (!name || !strcmp(name, "rw")) { *perms = MAY_READ | MAY_WRITE; return 0; } if (!strcmp(name, "ro")) { *perms = MAY_READ; return 0; } return -EINVAL; } /* * parse the dirs= mount argument * * We don't need to lock the superblock private data's rwsem, as we get * called only by unionfs_read_super - it is still a long time before anyone * can even get a reference to us. */ static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info *lower_root_info, char *options) { struct nameidata nd; char *name; int err = 0; int branches = 1; int bindex = 0; int i = 0; int j = 0; struct dentry *dent1; struct dentry *dent2; if (options[0] == '\0') { printk(KERN_ERR "unionfs: no branches specified\n"); err = -EINVAL; goto out; } /* * Each colon means we have a separator, this is really just a rough * guess, since strsep will handle empty fields for us. */ for (i = 0; options[i]; i++) if (options[i] == ':') branches++; /* allocate space for underlying pointers to lower dentry */ UNIONFS_SB(sb)->data = kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL); if (unlikely(!UNIONFS_SB(sb)->data)) { err = -ENOMEM; goto out; } lower_root_info->lower_paths = kcalloc(branches, sizeof(struct path), GFP_KERNEL); if (unlikely(!lower_root_info->lower_paths)) { err = -ENOMEM; goto out; } /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */ branches = 0; while ((name = strsep(&options, ":")) != NULL) { int perms; char *mode = strchr(name, '='); if (!name) continue; if (!*name) { /* bad use of ':' (extra colons) */ err = -EINVAL; goto out; } branches++; /* strip off '=' if any */ if (mode) *mode++ = '\0'; err = parse_branch_mode(mode, &perms); if (err) { printk(KERN_ERR "unionfs: invalid mode \"%s\" for " "branch %d\n", mode, bindex); goto out; } /* ensure that leftmost branch is writeable */ if (!bindex && !(perms & MAY_WRITE)) { printk(KERN_ERR "unionfs: leftmost branch cannot be " "read-only (use \"-o ro\" to create a " "read-only union)\n"); err = -EINVAL; goto out; } err = path_lookup(name, LOOKUP_FOLLOW, &nd); if (err) { printk(KERN_ERR "unionfs: error accessing " "lower directory '%s' (error %d)\n", name, err); goto out; } err = check_branch(&nd); if (err) { printk(KERN_ERR "unionfs: lower directory " "'%s' is not a valid branch\n", name); path_put(&nd.path); goto out; } lower_root_info->lower_paths[bindex].dentry = nd.path.dentry; lower_root_info->lower_paths[bindex].mnt = nd.path.mnt; set_branchperms(sb, bindex, perms); set_branch_count(sb, bindex, 0); new_branch_id(sb, bindex); if (lower_root_info->bstart < 0) lower_root_info->bstart = bindex; lower_root_info->bend = bindex; bindex++; } if (branches == 0) { printk(KERN_ERR "unionfs: no branches specified\n"); err = -EINVAL; goto out; } BUG_ON(branches != (lower_root_info->bend + 1)); /* * Ensure that no overlaps exist in the branches. * * This test is required because the Linux kernel has no support * currently for ensuring coherency between stackable layers and * branches. If we were to allow overlapping branches, it would be * possible, for example, to delete a file via one branch, which * would not be reflected in another branch. Such incoherency could * lead to inconsistencies and even kernel oopses. Rather than * implement hacks to work around some of these cache-coherency * problems, we prevent branch overlapping, for now. A complete * solution will involve proper kernel/VFS support for cache * coherency, at which time we could safely remove this * branch-overlapping test. */ for (i = 0; i < branches; i++) { dent1 = lower_root_info->lower_paths[i].dentry; for (j = i + 1; j < branches; j++) { dent2 = lower_root_info->lower_paths[j].dentry; if (is_branch_overlap(dent1, dent2)) { printk(KERN_ERR "unionfs: branches %d and " "%d overlap\n", i, j); err = -EINVAL; goto out; } } } out: if (err) { for (i = 0; i < branches; i++) path_put(&lower_root_info->lower_paths[i]); kfree(lower_root_info->lower_paths); kfree(UNIONFS_SB(sb)->data); /* * MUST clear the pointers to prevent potential double free if * the caller dies later on */ lower_root_info->lower_paths = NULL; UNIONFS_SB(sb)->data = NULL; } return err; } /* * Parse mount options. See the manual page for usage instructions. * * Returns the dentry object of the lower-level (lower) directory; * We want to mount our stackable file system on top of that lower directory. */ static struct unionfs_dentry_info *unionfs_parse_options( struct super_block *sb, char *options) { struct unionfs_dentry_info *lower_root_info; char *optname; int err = 0; int bindex; int dirsfound = 0; /* allocate private data area */ err = -ENOMEM; lower_root_info = kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL); if (unlikely(!lower_root_info)) goto out_error; lower_root_info->bstart = -1; lower_root_info->bend = -1; lower_root_info->bopaque = -1; while ((optname = strsep(&options, ",")) != NULL) { char *optarg; if (!optname || !*optname) continue; optarg = strchr(optname, '='); if (optarg) *optarg++ = '\0'; /* * All of our options take an argument now. Insert ones that * don't, above this check. */ if (!optarg) { printk(KERN_ERR "unionfs: %s requires an argument\n", optname); err = -EINVAL; goto out_error; } if (!strcmp("dirs", optname)) { if (++dirsfound > 1) { printk(KERN_ERR "unionfs: multiple dirs specified\n"); err = -EINVAL; goto out_error; } err = parse_dirs_option(sb, lower_root_info, optarg); if (err) goto out_error; continue; } err = -EINVAL; printk(KERN_ERR "unionfs: unrecognized option '%s'\n", optname); goto out_error; } if (dirsfound != 1) { printk(KERN_ERR "unionfs: dirs option required\n"); err = -EINVAL; goto out_error; } goto out; out_error: if (lower_root_info && lower_root_info->lower_paths) { for (bindex = lower_root_info->bstart; bindex >= 0 && bindex <= lower_root_info->bend; bindex++) path_put(&lower_root_info->lower_paths[bindex]); } kfree(lower_root_info->lower_paths); kfree(lower_root_info); kfree(UNIONFS_SB(sb)->data); UNIONFS_SB(sb)->data = NULL; lower_root_info = ERR_PTR(err); out: return lower_root_info; } /* * our custom d_alloc_root work-alike * * we can't use d_alloc_root if we want to use our own interpose function * unchanged, so we simply call our own "fake" d_alloc_root */ static struct dentry *unionfs_d_alloc_root(struct super_block *sb) { struct dentry *ret = NULL; if (sb) { static const struct qstr name = { .name = "/", .len = 1 }; ret = d_alloc(NULL, &name); if (likely(ret)) { ret->d_op = &unionfs_dops; ret->d_sb = sb; ret->d_parent = ret; } } return ret; } /* * There is no need to lock the unionfs_super_info's rwsem as there is no * way anyone can have a reference to the superblock at this point in time. */ static int unionfs_read_super(struct super_block *sb, void *raw_data, int silent) { int err = 0; struct unionfs_dentry_info *lower_root_info = NULL; int bindex, bstart, bend; if (!raw_data) { printk(KERN_ERR "unionfs: read_super: missing data argument\n"); err = -EINVAL; goto out; } /* Allocate superblock private data */ sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL); if (unlikely(!UNIONFS_SB(sb))) { printk(KERN_CRIT "unionfs: read_super: out of memory\n"); err = -ENOMEM; goto out; } UNIONFS_SB(sb)->bend = -1; atomic_set(&UNIONFS_SB(sb)->generation, 1); init_rwsem(&UNIONFS_SB(sb)->rwsem); UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */ lower_root_info = unionfs_parse_options(sb, raw_data); if (IS_ERR(lower_root_info)) { printk(KERN_ERR "unionfs: read_super: error while parsing options " "(err = %ld)\n", PTR_ERR(lower_root_info)); err = PTR_ERR(lower_root_info); lower_root_info = NULL; goto out_free; } if (lower_root_info->bstart == -1) { err = -ENOENT; goto out_free; } /* set the lower superblock field of upper superblock */ bstart = lower_root_info->bstart; BUG_ON(bstart != 0); sbend(sb) = bend = lower_root_info->bend; for (bindex = bstart; bindex <= bend; bindex++) { struct dentry *d = lower_root_info->lower_paths[bindex].dentry; atomic_inc(&d->d_sb->s_active); unionfs_set_lower_super_idx(sb, bindex, d->d_sb); } /* max Bytes is the maximum bytes from highest priority branch */ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; /* * Our c/m/atime granularity is 1 ns because we may stack on file * systems whose granularity is as good. This is important for our * time-based cache coherency. */ sb->s_time_gran = 1; sb->s_op = &unionfs_sops; /* See comment next to the definition of unionfs_d_alloc_root */ sb->s_root = unionfs_d_alloc_root(sb); if (unlikely(!sb->s_root)) { err = -ENOMEM; goto out_dput; } /* link the upper and lower dentries */ sb->s_root->d_fsdata = NULL; err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT); if (unlikely(err)) goto out_freedpd; /* Set the lower dentries for s_root */ for (bindex = bstart; bindex <= bend; bindex++) { struct dentry *d; struct vfsmount *m; d = lower_root_info->lower_paths[bindex].dentry; m = lower_root_info->lower_paths[bindex].mnt; unionfs_set_lower_dentry_idx(sb->s_root, bindex, d); unionfs_set_lower_mnt_idx(sb->s_root, bindex, m); } dbstart(sb->s_root) = bstart; dbend(sb->s_root) = bend; /* Set the generation number to one, since this is for the mount. */ atomic_set(&UNIONFS_D(sb->s_root)->generation, 1); /* * Call interpose to create the upper level inode. Only * INTERPOSE_LOOKUP can return a value other than 0 on err. */ err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0)); unionfs_unlock_dentry(sb->s_root); if (!err) goto out; /* else fall through */ out_freedpd: if (UNIONFS_D(sb->s_root)) { kfree(UNIONFS_D(sb->s_root)->lower_paths); free_dentry_private_data(sb->s_root); } dput(sb->s_root); out_dput: if (lower_root_info && !IS_ERR(lower_root_info)) { for (bindex = lower_root_info->bstart; bindex <= lower_root_info->bend; bindex++) { struct dentry *d; d = lower_root_info->lower_paths[bindex].dentry; /* drop refs we took earlier */ atomic_dec(&d->d_sb->s_active); path_put(&lower_root_info->lower_paths[bindex]); } kfree(lower_root_info->lower_paths); kfree(lower_root_info); lower_root_info = NULL; } out_free: kfree(UNIONFS_SB(sb)->data); kfree(UNIONFS_SB(sb)); sb->s_fs_info = NULL; out: if (lower_root_info && !IS_ERR(lower_root_info)) { kfree(lower_root_info->lower_paths); kfree(lower_root_info); } return err; } static int unionfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { int err; err = get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt); if (!err) UNIONFS_SB(mnt->mnt_sb)->dev_name = kstrdup(dev_name, GFP_KERNEL); return err; } static struct file_system_type unionfs_fs_type = { .owner = THIS_MODULE, .name = UNIONFS_NAME, .get_sb = unionfs_get_sb, .kill_sb = generic_shutdown_super, .fs_flags = FS_REVAL_DOT, }; static int __init init_unionfs_fs(void) { int err; pr_info("Registering unionfs " UNIONFS_VERSION "\n"); err = unionfs_init_filldir_cache(); if (unlikely(err)) goto out; err = unionfs_init_inode_cache(); if (unlikely(err)) goto out; err = unionfs_init_dentry_cache(); if (unlikely(err)) goto out; err = init_sioq(); if (unlikely(err)) goto out; err = register_filesystem(&unionfs_fs_type); out: if (unlikely(err)) { stop_sioq(); unionfs_destroy_filldir_cache(); unionfs_destroy_inode_cache(); unionfs_destroy_dentry_cache(); } return err; } static void __exit exit_unionfs_fs(void) { stop_sioq(); unionfs_destroy_filldir_cache(); unionfs_destroy_inode_cache(); unionfs_destroy_dentry_cache(); unregister_filesystem(&unionfs_fs_type); pr_info("Completed unionfs module unload\n"); } MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" " (http://www.fsl.cs.sunysb.edu)"); MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION " (http://unionfs.filesystems.org)"); MODULE_LICENSE("GPL"); module_init(init_unionfs_fs); module_exit(exit_unionfs_fs);