diff options
Diffstat (limited to 'sys/fs/devfs')
-rw-r--r-- | sys/fs/devfs/devfs.h | 42 | ||||
-rw-r--r-- | sys/fs/devfs/devfs_devs.c | 546 | ||||
-rw-r--r-- | sys/fs/devfs/devfs_int.h | 22 | ||||
-rw-r--r-- | sys/fs/devfs/devfs_rule.c | 23 | ||||
-rw-r--r-- | sys/fs/devfs/devfs_vfsops.c | 58 | ||||
-rw-r--r-- | sys/fs/devfs/devfs_vnops.c | 192 |
6 files changed, 437 insertions, 446 deletions
diff --git a/sys/fs/devfs/devfs.h b/sys/fs/devfs/devfs.h index 07912a9..18061b9 100644 --- a/sys/fs/devfs/devfs.h +++ b/sys/fs/devfs/devfs.h @@ -118,32 +118,12 @@ struct devfs_rule { #ifdef _KERNEL -/* - * These are default sizes for the DEVFS inode table and the overflow - * table. If the default table overflows we allocate the overflow - * table, the size of which can also be set with a sysctl. If the - * overflow table fills you're toast. - */ -#ifndef NDEVFSINO -#define NDEVFSINO 1024 -#endif - -#ifndef NDEVFSOVERFLOW -#define NDEVFSOVERFLOW 32768 -#endif - -/* - * This is the first "per mount" inode, these are used for directories - * and symlinks and the like. Must be larger than the number of "true" - * device nodes and symlinks. It is. - */ -#define DEVFSINOMOUNT 0x2000000 - #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_DEVFS); #endif struct devfs_dirent { + struct cdev_priv *de_cdp; int de_inode; int de_flags; #define DE_WHITEOUT 0x1 @@ -152,7 +132,6 @@ struct devfs_dirent { struct dirent *de_dirent; TAILQ_ENTRY(devfs_dirent) de_list; TAILQ_HEAD(, devfs_dirent) de_dlist; - LIST_ENTRY(devfs_dirent) de_alias; struct devfs_dirent *de_dir; int de_links; mode_t de_mode; @@ -167,22 +146,17 @@ struct devfs_dirent { }; struct devfs_mount { + u_int dm_idx; struct mount *dm_mount; struct devfs_dirent *dm_rootdir; unsigned dm_generation; - struct devfs_dirent **dm_dirent; - struct devfs_dirent **dm_overflow; - int dm_inode; - struct lock dm_lock; + struct sx dm_lock; devfs_rsnum dm_ruleset; }; -extern unsigned devfs_rule_depth; +#define DEVFS_ROOTINO 2 -/* - * This is what we fill in dm_dirent[N] for a deleted entry. - */ -#define DE_DELETED ((struct devfs_dirent *)sizeof(struct devfs_dirent)) +extern unsigned devfs_rule_depth; #define VFSTODEVFS(mp) ((struct devfs_mount *)((mp)->mnt_data)) @@ -192,10 +166,12 @@ void devfs_rules_newmount(struct devfs_mount *dm, struct thread *td); int devfs_allocv (struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td); struct cdev **devfs_itod (int inode); struct devfs_dirent **devfs_itode (struct devfs_mount *dm, int inode); +void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de); void devfs_populate (struct devfs_mount *dm); +void devfs_cleanup (struct devfs_mount *dm); struct devfs_dirent *devfs_newdirent (char *name, int namelen); -void devfs_purge (struct devfs_dirent *dd); -struct devfs_dirent *devfs_vmkdir (char *name, int namelen, struct devfs_dirent *dotdot); +struct devfs_dirent *devfs_vmkdir (struct devfs_mount *, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode); +struct devfs_dirent *devfs_find (struct devfs_dirent *dd, const char *name, int namelen); #endif /* _KERNEL */ diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c index 77a5140..5b06b8c 100644 --- a/sys/fs/devfs/devfs_devs.c +++ b/sys/fs/devfs/devfs_devs.c @@ -36,44 +36,42 @@ #include <sys/conf.h> #include <sys/dirent.h> #include <sys/kernel.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/mac.h> #include <sys/malloc.h> #include <sys/proc.h> +#include <sys/sx.h> #include <sys/sysctl.h> #include <sys/vnode.h> -#include <machine/atomic.h> +#include <sys/kdb.h> #include <fs/devfs/devfs.h> #include <fs/devfs/devfs_int.h> -static struct cdev *devfs_inot[NDEVFSINO]; -static struct cdev **devfs_overflow; -static int devfs_ref[NDEVFSINO]; -static int *devfs_refoverflow; -static int devfs_nextino = 3; -static int devfs_numino; -static int devfs_topino; -static int devfs_noverflowwant = NDEVFSOVERFLOW; -static int devfs_noverflow; -static unsigned devfs_generation; +/* + * The one true (but secret) list of active devices in the system. + * Locked by dev_lock()/devmtx + */ +static TAILQ_HEAD(,cdev_priv) cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list); + +struct unrhdr *devfs_inos; -static struct devfs_dirent *devfs_find (struct devfs_dirent *dd, const char *name, int namelen); + +static MALLOC_DEFINE(M_DEVFS2, "DEVFS2", "DEVFS data 2"); +static MALLOC_DEFINE(M_DEVFS3, "DEVFS3", "DEVFS data 3"); +static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage"); static SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem"); -SYSCTL_UINT(_vfs_devfs, OID_AUTO, noverflow, CTLFLAG_RW, - &devfs_noverflowwant, 0, "Size of DEVFS overflow table"); + +static unsigned devfs_generation; SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD, &devfs_generation, 0, "DEVFS generation number"); -SYSCTL_UINT(_vfs_devfs, OID_AUTO, inodes, CTLFLAG_RD, - &devfs_numino, 0, "DEVFS inodes"); -SYSCTL_UINT(_vfs_devfs, OID_AUTO, topinode, CTLFLAG_RD, - &devfs_topino, 0, "DEVFS highest inode#"); unsigned devfs_rule_depth = 1; SYSCTL_UINT(_vfs_devfs, OID_AUTO, rule_depth, CTLFLAG_RW, - &devfs_rule_depth, 0, "Max depth of ruleset include"); + &devfs_rule_depth, 0, "Max depth of ruleset include"); /* * Helper sysctl for devname(3). We're given a struct cdev * and return @@ -84,20 +82,24 @@ sysctl_devname(SYSCTL_HANDLER_ARGS) { int error; dev_t ud; - struct cdev *dev, **dp; + struct cdev_priv *cdp; error = SYSCTL_IN(req, &ud, sizeof (ud)); if (error) return (error); if (ud == NODEV) return(EINVAL); - dp = devfs_itod(ud); - if (dp == NULL) - return(ENOENT); - dev = *dp; - if (dev == NULL) +/* + ud ^ devfs_random(); +*/ + dev_lock(); + TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) + if (cdp->cdp_inode == ud) + break; + dev_unlock(); + if (cdp == NULL) return(ENOENT); - return(SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1)); + return(SYSCTL_OUT(req, cdp->cdp_c.si_name, strlen(cdp->cdp_c.si_name) + 1)); return (error); } @@ -107,77 +109,45 @@ SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY, SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD, 0, sizeof(struct cdev), "sizeof(struct cdev)"); -static int * -devfs_itor(int inode) -{ - if (inode < NDEVFSINO) - return (&devfs_ref[inode]); - else if (inode < NDEVFSINO + devfs_noverflow) - return (&devfs_refoverflow[inode - NDEVFSINO]); - else - panic ("YRK!"); -} +SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD, + 0, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)"); -static void -devfs_dropref(int inode) +struct cdev * +devfs_alloc(void) { - int *ip; + struct cdev_priv *cdp; + struct cdev *cdev; - ip = devfs_itor(inode); - atomic_add_int(ip, -1); -} + cdp = malloc(sizeof *cdp, M_CDEVP, M_USE_RESERVE | M_ZERO | M_WAITOK); -static int -devfs_getref(int inode) -{ - int *ip, i, j; - struct cdev **dp; - - ip = devfs_itor(inode); - dp = devfs_itod(inode); - for (;;) { - i = *ip; - j = i + 1; - if (!atomic_cmpset_int(ip, i, j)) - continue; - if (*dp != NULL) - return (1); - atomic_add_int(ip, -1); - return(0); - } -} + cdp->cdp_dirents = &cdp->cdp_dirent0; + cdp->cdp_dirent0 = NULL; + cdp->cdp_maxdirent = 0; -struct devfs_dirent ** -devfs_itode (struct devfs_mount *dm, int inode) -{ + cdev = &cdp->cdp_c; + cdev->si_priv = cdp; - if (inode < 0) - return (NULL); - if (inode < NDEVFSINO) - return (&dm->dm_dirent[inode]); - if (devfs_overflow == NULL) - return (NULL); - if (inode < NDEVFSINO + devfs_noverflow) - return (&dm->dm_overflow[inode - NDEVFSINO]); - return (NULL); + cdev->si_name = cdev->__si_namebuf; + LIST_INIT(&cdev->si_children); + return (cdev); } -struct cdev ** -devfs_itod (int inode) +void +devfs_free(struct cdev *cdev) { - - if (inode < 0) - return (NULL); - if (inode < NDEVFSINO) - return (&devfs_inot[inode]); - if (devfs_overflow == NULL) - return (NULL); - if (inode < NDEVFSINO + devfs_noverflow) - return (&devfs_overflow[inode - NDEVFSINO]); - return (NULL); + struct cdev_priv *cdp; + + cdp = cdev->si_priv; + if (cdev->si_cred != NULL) + crfree(cdev->si_cred); + if (cdp->cdp_inode > 0) + free_unr(devfs_inos, cdp->cdp_inode); + if (cdp->cdp_maxdirent > 0) + free(cdp->cdp_dirents, M_DEVFS2); + free(cdp, M_CDEVP); } -static struct devfs_dirent * +struct devfs_dirent * devfs_find(struct devfs_dirent *dd, const char *name, int namelen) { struct devfs_dirent *de; @@ -200,8 +170,8 @@ devfs_newdirent(char *name, int namelen) struct dirent d; d.d_namlen = namelen; - i = sizeof (*de) + GENERIC_DIRSIZ(&d); - de = malloc(i, M_DEVFS, M_WAITOK | M_ZERO); + i = sizeof (*de) + GENERIC_DIRSIZ(&d); + de = malloc(i, M_DEVFS3, M_WAITOK | M_ZERO); de->de_dirent = (struct dirent *)(de + 1); de->de_dirent->d_namlen = namelen; de->de_dirent->d_reclen = GENERIC_DIRSIZ(&d); @@ -217,158 +187,274 @@ devfs_newdirent(char *name, int namelen) } struct devfs_dirent * -devfs_vmkdir(char *name, int namelen, struct devfs_dirent *dotdot) +devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode) { struct devfs_dirent *dd; struct devfs_dirent *de; + /* Create the new directory */ dd = devfs_newdirent(name, namelen); - TAILQ_INIT(&dd->de_dlist); - dd->de_dirent->d_type = DT_DIR; dd->de_mode = 0555; dd->de_links = 2; dd->de_dir = dd; + if (inode != 0) + dd->de_inode = inode; + else + dd->de_inode = alloc_unr(devfs_inos); + /* Create the "." entry in the new directory */ de = devfs_newdirent(".", 1); de->de_dirent->d_type = DT_DIR; - de->de_dir = dd; de->de_flags |= DE_DOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); + de->de_dir = dd; + /* Create the ".." entry in the new directory */ de = devfs_newdirent("..", 2); de->de_dirent->d_type = DT_DIR; - if (dotdot == NULL) - de->de_dir = dd; - else - de->de_dir = dotdot; de->de_flags |= DE_DOTDOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); + if (dotdot == NULL) { + de->de_dir = dd; + } else { + de->de_dir = dotdot; + TAILQ_INSERT_TAIL(&dotdot->de_dlist, dd, de_list); + dotdot->de_links++; + } +#ifdef MAC + mac_create_devfs_directory(dmp->dm_mount, name, namelen, dd); +#endif return (dd); } -static void -devfs_delete(struct devfs_dirent *dd, struct devfs_dirent *de) +void +devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de) { if (de->de_symlink) { free(de->de_symlink, M_DEVFS); de->de_symlink = NULL; } - if (de->de_vnode) + if (de->de_vnode != NULL) { de->de_vnode->v_data = NULL; - TAILQ_REMOVE(&dd->de_dlist, de, de_list); + vgone(de->de_vnode); + de->de_vnode = NULL; + } #ifdef MAC mac_destroy_devfsdirent(de); #endif - free(de, M_DEVFS); + if (de->de_inode > DEVFS_ROOTINO) { + free_unr(devfs_inos, de->de_inode); + de->de_inode = 0; + } + free(de, M_DEVFS3); } -void -devfs_purge(struct devfs_dirent *dd) +/* + * Called on unmount. + * Recursively removes the entire tree + */ + +static void +devfs_purge(struct devfs_mount *dm, struct devfs_dirent *dd) { struct devfs_dirent *de; + sx_assert(&dm->dm_lock, SX_XLOCKED); for (;;) { de = TAILQ_FIRST(&dd->de_dlist); if (de == NULL) break; - devfs_delete(dd, de); + TAILQ_REMOVE(&dd->de_dlist, de, de_list); + if (de->de_flags & (DE_DOT|DE_DOTDOT)) + devfs_delete(dm, de); + else if (de->de_dirent->d_type == DT_DIR) + devfs_purge(dm, de); + else + devfs_delete(dm, de); } - FREE(dd, M_DEVFS); + devfs_delete(dm, dd); } +/* + * Each cdev_priv has an array of pointers to devfs_dirent which is indexed + * by the mount points dm_idx. + * This function extends the array when necessary, taking into account that + * the default array is 1 element and not malloc'ed. + */ +static void +devfs_metoo(struct cdev_priv *cdp, struct devfs_mount *dm) +{ + struct devfs_dirent **dep; + int siz; + + siz = (dm->dm_idx + 1) * sizeof *dep; + dep = malloc(siz, M_DEVFS2, M_WAITOK | M_ZERO); + dev_lock(); + if (dm->dm_idx <= cdp->cdp_maxdirent) { + /* We got raced */ + dev_unlock(); + free(dep, M_DEVFS2); + return; + } + memcpy(dep, cdp->cdp_dirents, (cdp->cdp_maxdirent + 1) * sizeof *dep); + if (cdp->cdp_maxdirent > 0) + free(cdp->cdp_dirents, M_DEVFS2); + cdp->cdp_dirents = dep; + /* + * XXX: if malloc told us how much we actually got this could + * XXX: be optimized. + */ + cdp->cdp_maxdirent = dm->dm_idx; + dev_unlock(); +} -void -devfs_populate(struct devfs_mount *dm) +static int +devfs_populate_loop(struct devfs_mount *dm, int cleanup) { - int i, j; - struct cdev *dev, *pdev; + struct cdev_priv *cdp; + struct devfs_dirent *de; struct devfs_dirent *dd; - struct devfs_dirent *de, **dep; + struct cdev *pdev; + int j; char *q, *s; - if (dm->dm_generation == devfs_generation) - return; - if (devfs_noverflow && dm->dm_overflow == NULL) { - i = devfs_noverflow * sizeof (struct devfs_dirent *); - MALLOC(dm->dm_overflow, struct devfs_dirent **, i, - M_DEVFS, M_WAITOK | M_ZERO); - } - while (dm->dm_generation != devfs_generation) { - dm->dm_generation = devfs_generation; - for (i = 0; i <= devfs_topino; i++) { - dev = *devfs_itod(i); - dep = devfs_itode(dm, i); - de = *dep; - if (dev == NULL && de == DE_DELETED) { - *dep = NULL; - continue; - } - if (dev == NULL && de != NULL) { - dd = de->de_dir; - *dep = NULL; - devfs_delete(dd, de); - devfs_dropref(i); - continue; - } - if (dev == NULL) - continue; - if (de != NULL) + sx_assert(&dm->dm_lock, SX_XLOCKED); + dev_lock(); + TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { + + KASSERT(cdp->cdp_dirents != NULL, ("NULL cdp_dirents")); + + /* + * If we are unmounting, or the device has been destroyed, + * clean up our dirent. + */ + if ((cleanup || !(cdp->cdp_flags & CDP_ACTIVE)) && + dm->dm_idx <= cdp->cdp_maxdirent && + cdp->cdp_dirents[dm->dm_idx] != NULL) { + de = cdp->cdp_dirents[dm->dm_idx]; + cdp->cdp_dirents[dm->dm_idx] = NULL; + cdp->cdp_inuse--; + KASSERT(cdp == de->de_cdp, + ("%s %d %s %p %p", __func__, __LINE__, + cdp->cdp_c.si_name, cdp, de->de_cdp)); + KASSERT(de->de_dir != NULL, ("Null de->de_dir")); + dev_unlock(); + + TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list); + de->de_cdp = NULL; + de->de_inode = 0; + devfs_delete(dm, de); + return (1); + } + /* + * GC any lingering devices + */ + if (!(cdp->cdp_flags & CDP_ACTIVE)) { + if (cdp->cdp_inuse > 0) continue; - if (!devfs_getref(i)) + TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); + dev_unlock(); + dev_rel(&cdp->cdp_c); + return (1); + } + /* + * Don't create any new dirents if we are unmounting + */ + if (cleanup) + continue; + KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); + + if (dm->dm_idx <= cdp->cdp_maxdirent && + cdp->cdp_dirents[dm->dm_idx] != NULL) { + de = cdp->cdp_dirents[dm->dm_idx]; + KASSERT(cdp == de->de_cdp, ("inconsistent cdp")); + continue; + } + + + cdp->cdp_inuse++; + dev_unlock(); + + if (dm->dm_idx > cdp->cdp_maxdirent) + devfs_metoo(cdp, dm); + + dd = dm->dm_rootdir; + s = cdp->cdp_c.si_name; + for (;;) { + for (q = s; *q != '/' && *q != '\0'; q++) continue; - dd = dm->dm_rootdir; - s = dev->si_name; - for (;;) { - for (q = s; *q != '/' && *q != '\0'; q++) - continue; - if (*q != '/') - break; - de = devfs_find(dd, s, q - s); - if (de == NULL) { - de = devfs_vmkdir(s, q - s, dd); -#ifdef MAC - mac_create_devfs_directory( - dm->dm_mount, s, q - s, de); -#endif - de->de_inode = dm->dm_inode++; - TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); - dd->de_links++; - } - s = q + 1; - dd = de; - } - de = devfs_newdirent(s, q - s); - if (dev->si_flags & SI_ALIAS) { - de->de_inode = dm->dm_inode++; - de->de_uid = 0; - de->de_gid = 0; - de->de_mode = 0755; - de->de_dirent->d_type = DT_LNK; - pdev = dev->si_parent; - j = strlen(pdev->si_name) + 1; - MALLOC(de->de_symlink, char *, j, M_DEVFS, M_WAITOK); - bcopy(pdev->si_name, de->de_symlink, j); - } else { - de->de_inode = i; - de->de_uid = dev->si_uid; - de->de_gid = dev->si_gid; - de->de_mode = dev->si_mode; - de->de_dirent->d_type = DT_CHR; - } + if (*q != '/') + break; + de = devfs_find(dd, s, q - s); + if (de == NULL) + de = devfs_vmkdir(dm, s, q - s, dd, 0); + s = q + 1; + dd = de; + } + + de = devfs_newdirent(s, q - s); + if (cdp->cdp_c.si_flags & SI_ALIAS) { + de->de_uid = 0; + de->de_gid = 0; + de->de_mode = 0755; + de->de_dirent->d_type = DT_LNK; + pdev = cdp->cdp_c.si_parent; + j = strlen(pdev->si_name) + 1; + de->de_symlink = malloc(j, M_DEVFS, M_WAITOK); + bcopy(pdev->si_name, de->de_symlink, j); + } else { + de->de_uid = cdp->cdp_c.si_uid; + de->de_gid = cdp->cdp_c.si_gid; + de->de_mode = cdp->cdp_c.si_mode; + de->de_dirent->d_type = DT_CHR; + } + de->de_inode = cdp->cdp_inode; + de->de_cdp = cdp; #ifdef MAC - mac_create_devfs_device(dev->si_cred, dm->dm_mount, - dev, de); + mac_create_devfs_device(cdp->cdp_c.si_cred, dm->dm_mount, + &cdp->cdp_c, de); #endif - *dep = de; - de->de_dir = dd; - devfs_rules_apply(dm, de); - TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); - } + de->de_dir = dd; + TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); + devfs_rules_apply(dm, de); + dev_lock(); + /* XXX: could check that cdp is still active here */ + KASSERT(cdp->cdp_dirents[dm->dm_idx] == NULL, + ("%s %d\n", __func__, __LINE__)); + cdp->cdp_dirents[dm->dm_idx] = de; + KASSERT(de->de_cdp != (void *)0xdeadc0de, + ("%s %d\n", __func__, __LINE__)); + dev_unlock(); + return (1); } + dev_unlock(); + return (0); +} + +void +devfs_populate(struct devfs_mount *dm) +{ + + sx_assert(&dm->dm_lock, SX_XLOCKED); + if (dm->dm_generation == devfs_generation) + return; + while (devfs_populate_loop(dm, 0)) + continue; + dm->dm_generation = devfs_generation; +} + +void +devfs_cleanup(struct devfs_mount *dm) +{ + + sx_assert(&dm->dm_lock, SX_XLOCKED); + while (devfs_populate_loop(dm, 1)) + continue; + devfs_purge(dm, dm->dm_rootdir); } /* @@ -380,83 +466,33 @@ devfs_populate(struct devfs_mount *dm) void devfs_create(struct cdev *dev) { - int ino, i, *ip; - struct cdev **dp; - struct cdev **ot; - int *or; - int n; - - for (;;) { - /* Grab the next inode number */ - ino = devfs_nextino; - i = ino + 1; - /* wrap around when we reach the end */ - if (i >= NDEVFSINO + devfs_noverflow) - i = 3; - devfs_nextino = i; - - /* see if it was occupied */ - dp = devfs_itod(ino); - KASSERT(dp != NULL, ("DEVFS: No devptr inode %d", ino)); - if (*dp != NULL) - continue; - ip = devfs_itor(ino); - KASSERT(ip != NULL, ("DEVFS: No iptr inode %d", ino)); - if (*ip != 0) - continue; - break; - } - - *dp = dev; - dev->si_inode = ino; - if (i > devfs_topino) - devfs_topino = i; - - devfs_numino++; + struct cdev_priv *cdp; + + mtx_assert(&devmtx, MA_OWNED); + cdp = dev->si_priv; + cdp->cdp_flags |= CDP_ACTIVE; + cdp->cdp_inode = alloc_unrl(devfs_inos); + dev_refl(dev); + TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list); devfs_generation++; - - if (devfs_overflow != NULL || devfs_numino + 100 < NDEVFSINO) - return; - - /* - * Try to allocate overflow table - * XXX: we can probably be less panicy these days and a linked - * XXX: list of PAGESIZE/PTRSIZE entries might be a better idea. - * - * XXX: we may be into witness unlove here. - */ - n = devfs_noverflowwant; - ot = malloc(sizeof(*ot) * n, M_DEVFS, M_NOWAIT | M_ZERO); - if (ot == NULL) - return; - or = malloc(sizeof(*or) * n, M_DEVFS, M_NOWAIT | M_ZERO); - if (or == NULL) { - free(ot, M_DEVFS); - return; - } - devfs_overflow = ot; - devfs_refoverflow = or; - devfs_noverflow = n; - printf("DEVFS Overflow table with %d entries allocated\n", n); - return; } void devfs_destroy(struct cdev *dev) { - int ino; - struct cdev **dp; + struct cdev_priv *cdp; - ino = dev->si_inode; - dev->si_inode = 0; - if (ino == 0) - return; - dp = devfs_itod(ino); - KASSERT(*dp == dev, - ("DEVFS: destroying wrong cdev ino %d", ino)); - *dp = NULL; - devfs_numino--; + mtx_assert(&devmtx, MA_OWNED); + cdp = dev->si_priv; + cdp->cdp_flags &= ~CDP_ACTIVE; devfs_generation++; - if (ino < devfs_nextino) - devfs_nextino = ino; } + +static void +devfs_devs_init(void *junk __unused) +{ + + devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx); +} + +SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL); diff --git a/sys/fs/devfs/devfs_int.h b/sys/fs/devfs/devfs_int.h index 9c4c5f9..41cc0c2 100644 --- a/sys/fs/devfs/devfs_int.h +++ b/sys/fs/devfs/devfs_int.h @@ -37,9 +37,31 @@ #ifdef _KERNEL +struct devfs_dirent; + +struct cdev_priv { + struct cdev cdp_c; + TAILQ_ENTRY(cdev_priv) cdp_list; + + u_int cdp_inode; + + u_int cdp_flags; +#define CDP_ACTIVE (1 << 0) + + u_int cdp_inuse; + u_int cdp_maxdirent; + struct devfs_dirent **cdp_dirents; + struct devfs_dirent *cdp_dirent0; +}; + +struct cdev *devfs_alloc(void); +void devfs_free(struct cdev *); void devfs_create(struct cdev *dev); void devfs_destroy(struct cdev *dev); +extern struct unrhdr *devfs_inos; +extern struct mtx devmtx; + #endif /* _KERNEL */ #endif /* !_FS_DEVFS_DEVFS_INT_H_ */ diff --git a/sys/fs/devfs/devfs_rule.c b/sys/fs/devfs/devfs_rule.c index 5e25f4e..c1edfd3 100644 --- a/sys/fs/devfs/devfs_rule.c +++ b/sys/fs/devfs/devfs_rule.c @@ -70,12 +70,12 @@ #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/dirent.h> -#include <sys/vnode.h> #include <sys/ioccom.h> +#include <sys/lock.h> #include <sys/sx.h> #include <fs/devfs/devfs.h> - +#include <fs/devfs/devfs_int.h> /* * Kernel version of devfs_rule. @@ -176,6 +176,8 @@ devfs_rules_ioctl(struct devfs_mount *dm, u_long cmd, caddr_t data, struct threa devfs_rid rid; int error; + sx_assert(&dm->dm_lock, SX_XLOCKED); + /* * XXX: This returns an error regardless of whether we * actually support the cmd or not. @@ -185,6 +187,7 @@ devfs_rules_ioctl(struct devfs_mount *dm, u_long cmd, caddr_t data, struct threa return (error); sx_xlock(&sx_rules); + switch (cmd) { case DEVFSIO_RADD: dr = (struct devfs_rule *)data; @@ -366,6 +369,7 @@ devfs_rid_input(devfs_rid rid, struct devfs_mount *dm) * XXX: This method needs a function call for every nested * subdirectory in a devfs mount. If we plan to have many of these, * we might eventually run out of kernel stack space. + * XXX: a linear search could be done through the cdev list instead. */ static void devfs_rule_applyde_recursive(struct devfs_krule *dk, struct devfs_dirent *de) @@ -469,18 +473,13 @@ devfs_rule_delete(struct devfs_krule **dkp) static struct cdev * devfs_rule_getdev(struct devfs_dirent *de) { - struct cdev **devp, *dev; - devp = devfs_itod(de->de_inode); - if (devp != NULL) - dev = *devp; + if (de->de_cdp == NULL) + return (NULL); + if (de->de_cdp->cdp_flags & CDP_ACTIVE) + return (&de->de_cdp->cdp_c); else - dev = NULL; - /* If we think this dirent should have a struct cdev *, alert the user. */ - if (dev == NULL && de->de_dirent->d_type != DT_LNK && - de->de_dirent->d_type != DT_DIR) - printf("Warning: no struct cdev *for %s\n", de->de_dirent->d_name); - return (dev); + return (NULL); } /* diff --git a/sys/fs/devfs/devfs_vfsops.c b/sys/fs/devfs/devfs_vfsops.c index 307b4a2..6aad2b0 100644 --- a/sys/fs/devfs/devfs_vfsops.c +++ b/sys/fs/devfs/devfs_vfsops.c @@ -45,10 +45,14 @@ #include <sys/malloc.h> #include <sys/mount.h> #include <sys/proc.h> +#include <sys/sx.h> #include <sys/vnode.h> +#include <sys/limits.h> #include <fs/devfs/devfs.h> +static struct unrhdr *devfs_unr; + MALLOC_DEFINE(M_DEVFS, "DEVFS", "DEVFS data"); static vfs_mount_t devfs_mount; @@ -66,39 +70,35 @@ devfs_mount(struct mount *mp, struct thread *td) struct devfs_mount *fmp; struct vnode *rvp; + if (devfs_unr == NULL) + devfs_unr = new_unrhdr(0, INT_MAX, NULL); + error = 0; if (mp->mnt_flag & (MNT_UPDATE | MNT_ROOTFS)) return (EOPNOTSUPP); - MALLOC(fmp, struct devfs_mount *, sizeof(struct devfs_mount), - M_DEVFS, M_WAITOK | M_ZERO); - MALLOC(fmp->dm_dirent, struct devfs_dirent **, - sizeof(struct devfs_dirent *) * NDEVFSINO, - M_DEVFS, M_WAITOK | M_ZERO); - lockinit(&fmp->dm_lock, PVFS, "devfs", 0, 0); + fmp = malloc(sizeof *fmp, M_DEVFS, M_WAITOK | M_ZERO); + fmp->dm_idx = alloc_unr(devfs_unr); + sx_init(&fmp->dm_lock, "devfsmount"); mp->mnt_flag |= MNT_LOCAL; + mp->mnt_kern_flag |= MNTK_MPSAFE; #ifdef MAC mp->mnt_flag |= MNT_MULTILABEL; #endif fmp->dm_mount = mp; - mp->mnt_data = (qaddr_t) fmp; + mp->mnt_data = (void *) fmp; vfs_getnewfsid(mp); - fmp->dm_inode = DEVFSINOMOUNT; - - fmp->dm_rootdir = devfs_vmkdir("(root)", 6, NULL); - fmp->dm_rootdir->de_inode = 2; -#ifdef MAC - mac_create_devfs_directory(mp, "", 0, fmp->dm_rootdir); -#endif + fmp->dm_rootdir = devfs_vmkdir(fmp, NULL, 0, NULL, DEVFS_ROOTINO); devfs_rules_newmount(fmp, td); error = devfs_root(mp, LK_EXCLUSIVE, &rvp, td); if (error) { - lockdestroy(&fmp->dm_lock); - FREE(fmp, M_DEVFS); + sx_destroy(&fmp->dm_lock); + free_unr(devfs_unr, fmp->dm_idx); + free(fmp, M_DEVFS); return (error); } @@ -110,10 +110,7 @@ devfs_mount(struct mount *mp, struct thread *td) } static int -devfs_unmount(mp, mntflags, td) - struct mount *mp; - int mntflags; - struct thread *td; +devfs_unmount(struct mount *mp, int mntflags, struct thread *td) { int error; int flags = 0; @@ -124,10 +121,12 @@ devfs_unmount(mp, mntflags, td) error = vflush(mp, 1, flags, td); if (error) return (error); - devfs_purge(fmp->dm_rootdir); - mp->mnt_data = 0; - lockdestroy(&fmp->dm_lock); - free(fmp->dm_dirent, M_DEVFS); + sx_xlock(&fmp->dm_lock); + devfs_cleanup(fmp); + sx_xunlock(&fmp->dm_lock); + mp->mnt_data = NULL; + sx_destroy(&fmp->dm_lock); + free_unr(devfs_unr, fmp->dm_idx); free(fmp, M_DEVFS); return 0; } @@ -135,11 +134,7 @@ devfs_unmount(mp, mntflags, td) /* Return locked reference to root. */ static int -devfs_root(mp, flags, vpp, td) - struct mount *mp; - int flags; - struct vnode **vpp; - struct thread *td; +devfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td) { int error; struct vnode *vp; @@ -155,10 +150,7 @@ devfs_root(mp, flags, vpp, td) } static int -devfs_statfs(mp, sbp, td) - struct mount *mp; - struct statfs *sbp; - struct thread *td; +devfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) { sbp->f_flags = 0; diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index 6cf2f49..f14f480 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -37,7 +37,6 @@ /* * TODO: * remove empty directories - * mknod: hunt down DE_DELETED, compare name, reinstantiate. * mkdir: want it ? */ @@ -71,6 +70,8 @@ static struct vop_vector devfs_specops; static struct fileops devfs_ops_f; #include <fs/devfs/devfs.h> +#include <fs/devfs/devfs_int.h> + static int devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp) { @@ -139,9 +140,9 @@ loop: return (0); } if (de->de_dirent->d_type == DT_CHR) { - dev = *devfs_itod(de->de_inode); - if (dev == NULL) + if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) return (ENOENT); + dev = &de->de_cdp->cdp_c; } else { dev = NULL; } @@ -157,7 +158,8 @@ loop: dev_lock(); dev_refl(dev); vp->v_rdev = dev; - LIST_INSERT_HEAD(&dev->si_alist, de, de_alias); + KASSERT(vp->v_usecount == 1, + ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); dev->si_usecount += vp->v_usecount; dev_unlock(); VI_UNLOCK(vp); @@ -204,9 +206,6 @@ devfs_access(struct vop_access_args *ap) return (error); } -/* - * Special device advisory byte-level locks. - */ /* ARGSUSED */ static int devfs_advlock(struct vop_advlock_args *ap) @@ -215,9 +214,6 @@ devfs_advlock(struct vop_advlock_args *ap) return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } -/* - * Device close routine - */ /* ARGSUSED */ static int devfs_close(struct vop_close_args *ap) @@ -301,9 +297,6 @@ devfs_close_f(struct file *fp, struct thread *td) return (vnops.fo_close(fp, td)); } -/* - * Synch buffers associated with a block device - */ /* ARGSUSED */ static int devfs_fsync(struct vop_fsync_args *ap) @@ -370,7 +363,7 @@ devfs_getattr(struct vop_getattr_args *ap) fix(dev->si_ctime); vap->va_ctime = dev->si_ctime; - vap->va_rdev = dev->si_inode; + vap->va_rdev = dev->si_priv->cdp_inode; } vap->va_gen = 0; vap->va_flags = 0; @@ -380,9 +373,6 @@ devfs_getattr(struct vop_getattr_args *ap) return (error); } -/* - * Device ioctl operation. - */ /* ARGSUSED */ static int devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) @@ -446,7 +436,6 @@ devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struc return (error); } - /* ARGSUSED */ static int devfs_kqfilter_f(struct file *fp, struct knote *kn) @@ -520,65 +509,47 @@ devfs_lookupx(struct vop_lookup_args *ap) return (error); } - lockmgr(&dmp->dm_lock, LK_UPGRADE, 0, curthread); devfs_populate(dmp); - lockmgr(&dmp->dm_lock, LK_DOWNGRADE, 0, curthread); dd = dvp->v_data; - TAILQ_FOREACH(de, &dd->de_dlist, de_list) { - if (cnp->cn_namelen != de->de_dirent->d_namlen) - continue; - if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, - de->de_dirent->d_namlen) != 0) - continue; - if (de->de_flags & DE_WHITEOUT) - goto notfound; - goto found; - } - - if (nameiop == DELETE) - goto notfound; - - /* - * OK, we didn't have an entry for the name we were asked for - * so we try to see if anybody can create it on demand. - */ - pname = devfs_fqpn(specname, dvp, cnp); - if (pname == NULL) - goto notfound; + de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen); + while (de == NULL) { /* While(...) so we can use break */ - cdev = NULL; - EVENTHANDLER_INVOKE(dev_clone, td->td_ucred, pname, strlen(pname), - &cdev); - if (cdev == NULL) - goto notfound; - - lockmgr(&dmp->dm_lock, LK_UPGRADE, 0, curthread); - devfs_populate(dmp); - lockmgr(&dmp->dm_lock, LK_DOWNGRADE, 0, curthread); - - dde = devfs_itode(dmp, cdev->si_inode); - dev_rel(cdev); - - if (dde == NULL || *dde == NULL || *dde == DE_DELETED) - goto notfound; + if (nameiop == DELETE) + return (ENOENT); - if ((*dde)->de_flags & DE_WHITEOUT) - goto notfound; + /* + * OK, we didn't have an entry for the name we were asked for + * so we try to see if anybody can create it on demand. + */ + pname = devfs_fqpn(specname, dvp, cnp); + if (pname == NULL) + break; - de = *dde; - goto found; + cdev = NULL; + EVENTHANDLER_INVOKE(dev_clone, + td->td_ucred, pname, strlen(pname), &cdev); + if (cdev == NULL) + break; -notfound: + devfs_populate(dmp); - if ((nameiop == CREATE || nameiop == RENAME) && - (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { - cnp->cn_flags |= SAVENAME; - return (EJUSTRETURN); + dev_lock(); + dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx]; + if (dde != NULL && *dde != NULL) + de = *dde; + dev_unlock(); + dev_rel(cdev); + break; } - return (ENOENT); - -found: + if (de == NULL || de->de_flags & DE_WHITEOUT) { + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { + cnp->cn_flags |= SAVENAME; + return (EJUSTRETURN); + } + return (ENOENT); + } if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); @@ -589,10 +560,6 @@ found: *vpp = dvp; return (0); } - error = devfs_allocv(de, dvp->v_mount, vpp, td); - if (error) - return (error); - return (0); } error = devfs_allocv(de, dvp->v_mount, vpp, td); return (error); @@ -605,9 +572,9 @@ devfs_lookup(struct vop_lookup_args *ap) struct devfs_mount *dmp; dmp = VFSTODEVFS(ap->a_dvp->v_mount); - lockmgr(&dmp->dm_lock, LK_SHARED, 0, curthread); + sx_xlock(&dmp->dm_lock); j = devfs_lookupx(ap); - lockmgr(&dmp->dm_lock, LK_RELEASE, 0, curthread); + sx_xunlock(&dmp->dm_lock); return (j); } @@ -629,7 +596,7 @@ devfs_mknod(struct vop_mknod_args *ap) return (EOPNOTSUPP); dvp = ap->a_dvp; dmp = VFSTODEVFS(dvp->v_mount); - lockmgr(&dmp->dm_lock, LK_EXCLUSIVE, 0, curthread); + sx_xlock(&dmp->dm_lock); cnp = ap->a_cnp; vpp = ap->a_vpp; @@ -652,13 +619,10 @@ devfs_mknod(struct vop_mknod_args *ap) de->de_flags &= ~DE_WHITEOUT; error = devfs_allocv(de, dvp->v_mount, vpp, td); notfound: - lockmgr(&dmp->dm_lock, LK_RELEASE, 0, curthread); + sx_xunlock(&dmp->dm_lock); return (error); } -/* - * Open a special file. - */ /* ARGSUSED */ static int devfs_open(struct vop_open_args *ap) @@ -793,9 +757,6 @@ devfs_print(struct vop_print_args *ap) return (0); } -/* - * Vnode op for read - */ /* ARGSUSED */ static int devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) @@ -845,18 +806,14 @@ devfs_readdir(struct vop_readdir_args *ap) return (EINVAL); dmp = VFSTODEVFS(ap->a_vp->v_mount); - lockmgr(&dmp->dm_lock, LK_EXCLUSIVE, 0, curthread); + sx_xlock(&dmp->dm_lock); devfs_populate(dmp); - lockmgr(&dmp->dm_lock, LK_DOWNGRADE, 0, curthread); error = 0; de = ap->a_vp->v_data; off = 0; oldoff = uio->uio_offset; - if (ap->a_ncookies != NULL) { - *ap->a_ncookies = 0; - *ap->a_cookies = NULL; - } TAILQ_FOREACH(dd, &de->de_dlist, de_list) { + KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); if (dd->de_flags & DE_WHITEOUT) continue; if (dd->de_dirent->d_type == DT_DIR) @@ -874,7 +831,7 @@ devfs_readdir(struct vop_readdir_args *ap) } off += dp->d_reclen; } - lockmgr(&dmp->dm_lock, LK_RELEASE, 0, curthread); + sx_xunlock(&dmp->dm_lock); uio->uio_offset = off; return (error); } @@ -908,8 +865,6 @@ devfs_reclaim(struct vop_reclaim_args *ap) return (0); dev_lock(); - if (de != NULL) - LIST_REMOVE(de, de_alias); dev->si_usecount -= vp->v_usecount; dev_unlock(); dev_rel(dev); @@ -924,21 +879,16 @@ devfs_remove(struct vop_remove_args *ap) struct devfs_dirent *de; struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); - lockmgr(&dmp->dm_lock, LK_EXCLUSIVE, 0, curthread); + sx_xlock(&dmp->dm_lock); dd = ap->a_dvp->v_data; de = vp->v_data; if (de->de_dirent->d_type == DT_LNK) { TAILQ_REMOVE(&dd->de_dlist, de, de_list); - if (de->de_vnode) - de->de_vnode->v_data = NULL; -#ifdef MAC - mac_destroy_devfsdirent(de); -#endif - free(de, M_DEVFS); + devfs_delete(dmp, de); } else { de->de_flags |= DE_WHITEOUT; } - lockmgr(&dmp->dm_lock, LK_RELEASE, 0, curthread); + sx_xunlock(&dmp->dm_lock); return (0); } @@ -946,24 +896,43 @@ devfs_remove(struct vop_remove_args *ap) * Revoke is called on a tty when a terminal session ends. The vnode * is orphaned by setting v_op to deadfs so we need to let go of it * as well so that we create a new one next time around. + * + * XXX: locking :-( + * XXX: We mess around with other mountpoints without holding their sxlock. + * XXX: We hold the devlock() when we zero their vnode pointer, but is that + * XXX: enough ? */ static int devfs_revoke(struct vop_revoke_args *ap) { - struct vnode *vp = ap->a_vp; + struct vnode *vp = ap->a_vp, *vp2; struct cdev *dev; + struct cdev_priv *cdp; struct devfs_dirent *de; + int i; KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); dev = vp->v_rdev; + cdp = dev->si_priv; for (;;) { dev_lock(); - de = LIST_FIRST(&dev->si_alist); + vp2 = NULL; + for (i = 0; i <= cdp->cdp_maxdirent; i++) { + de = cdp->cdp_dirents[i]; + if (de == NULL) + continue; + vp2 = de->de_vnode; + de->de_vnode = NULL; + if (vp2 != NULL) + break; + } dev_unlock(); - if (de == NULL) - break; - vgone(de->de_vnode); + if (vp2 != NULL) { + vgone(vp2); + continue; + } + break; } return (0); } @@ -975,10 +944,10 @@ devfs_rioctl(struct vop_ioctl_args *ap) struct devfs_mount *dmp; dmp = VFSTODEVFS(ap->a_vp->v_mount); - lockmgr(&dmp->dm_lock, LK_EXCLUSIVE, 0, curthread); + sx_xlock(&dmp->dm_lock); devfs_populate(dmp); error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); - lockmgr(&dmp->dm_lock, LK_RELEASE, 0, curthread); + sx_xunlock(&dmp->dm_lock); return (error); } @@ -1120,24 +1089,21 @@ devfs_symlink(struct vop_symlink_args *ap) de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; - de->de_inode = dmp->dm_inode++; + de->de_inode = alloc_unr(devfs_inos); de->de_dirent->d_type = DT_LNK; i = strlen(ap->a_target) + 1; de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); bcopy(ap->a_target, de->de_symlink, i); - lockmgr(&dmp->dm_lock, LK_EXCLUSIVE, 0, td); + sx_xlock(&dmp->dm_lock); #ifdef MAC mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); #endif TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td); - lockmgr(&dmp->dm_lock, LK_RELEASE, 0, td); + sx_xunlock(&dmp->dm_lock); return (0); } -/* - * Vnode op for write - */ /* ARGSUSED */ static int devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) @@ -1176,7 +1142,7 @@ dev2udev(struct cdev *x) { if (x == NULL) return (NODEV); - return (x->si_inode); + return (x->si_priv->cdp_inode); } static struct fileops devfs_ops_f = { |