diff options
Diffstat (limited to 'sys/fs/devfs/devfs_devs.c')
-rw-r--r-- | sys/fs/devfs/devfs_devs.c | 546 |
1 files changed, 291 insertions, 255 deletions
diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c index 77a5140..5b06b8c 100644 --- a/sys/fs/devfs/devfs_devs.c +++ b/sys/fs/devfs/devfs_devs.c @@ -36,44 +36,42 @@ #include <sys/conf.h> #include <sys/dirent.h> #include <sys/kernel.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/mac.h> #include <sys/malloc.h> #include <sys/proc.h> +#include <sys/sx.h> #include <sys/sysctl.h> #include <sys/vnode.h> -#include <machine/atomic.h> +#include <sys/kdb.h> #include <fs/devfs/devfs.h> #include <fs/devfs/devfs_int.h> -static struct cdev *devfs_inot[NDEVFSINO]; -static struct cdev **devfs_overflow; -static int devfs_ref[NDEVFSINO]; -static int *devfs_refoverflow; -static int devfs_nextino = 3; -static int devfs_numino; -static int devfs_topino; -static int devfs_noverflowwant = NDEVFSOVERFLOW; -static int devfs_noverflow; -static unsigned devfs_generation; +/* + * The one true (but secret) list of active devices in the system. + * Locked by dev_lock()/devmtx + */ +static TAILQ_HEAD(,cdev_priv) cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list); + +struct unrhdr *devfs_inos; -static struct devfs_dirent *devfs_find (struct devfs_dirent *dd, const char *name, int namelen); + +static MALLOC_DEFINE(M_DEVFS2, "DEVFS2", "DEVFS data 2"); +static MALLOC_DEFINE(M_DEVFS3, "DEVFS3", "DEVFS data 3"); +static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage"); static SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem"); -SYSCTL_UINT(_vfs_devfs, OID_AUTO, noverflow, CTLFLAG_RW, - &devfs_noverflowwant, 0, "Size of DEVFS overflow table"); + +static unsigned devfs_generation; SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD, &devfs_generation, 0, "DEVFS generation number"); -SYSCTL_UINT(_vfs_devfs, OID_AUTO, inodes, CTLFLAG_RD, - &devfs_numino, 0, "DEVFS inodes"); -SYSCTL_UINT(_vfs_devfs, OID_AUTO, topinode, CTLFLAG_RD, - &devfs_topino, 0, "DEVFS highest inode#"); unsigned devfs_rule_depth = 1; SYSCTL_UINT(_vfs_devfs, OID_AUTO, rule_depth, CTLFLAG_RW, - &devfs_rule_depth, 0, "Max depth of ruleset include"); + &devfs_rule_depth, 0, "Max depth of ruleset include"); /* * Helper sysctl for devname(3). We're given a struct cdev * and return @@ -84,20 +82,24 @@ sysctl_devname(SYSCTL_HANDLER_ARGS) { int error; dev_t ud; - struct cdev *dev, **dp; + struct cdev_priv *cdp; error = SYSCTL_IN(req, &ud, sizeof (ud)); if (error) return (error); if (ud == NODEV) return(EINVAL); - dp = devfs_itod(ud); - if (dp == NULL) - return(ENOENT); - dev = *dp; - if (dev == NULL) +/* + ud ^ devfs_random(); +*/ + dev_lock(); + TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) + if (cdp->cdp_inode == ud) + break; + dev_unlock(); + if (cdp == NULL) return(ENOENT); - return(SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1)); + return(SYSCTL_OUT(req, cdp->cdp_c.si_name, strlen(cdp->cdp_c.si_name) + 1)); return (error); } @@ -107,77 +109,45 @@ SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY, SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD, 0, sizeof(struct cdev), "sizeof(struct cdev)"); -static int * -devfs_itor(int inode) -{ - if (inode < NDEVFSINO) - return (&devfs_ref[inode]); - else if (inode < NDEVFSINO + devfs_noverflow) - return (&devfs_refoverflow[inode - NDEVFSINO]); - else - panic ("YRK!"); -} +SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD, + 0, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)"); -static void -devfs_dropref(int inode) +struct cdev * +devfs_alloc(void) { - int *ip; + struct cdev_priv *cdp; + struct cdev *cdev; - ip = devfs_itor(inode); - atomic_add_int(ip, -1); -} + cdp = malloc(sizeof *cdp, M_CDEVP, M_USE_RESERVE | M_ZERO | M_WAITOK); -static int -devfs_getref(int inode) -{ - int *ip, i, j; - struct cdev **dp; - - ip = devfs_itor(inode); - dp = devfs_itod(inode); - for (;;) { - i = *ip; - j = i + 1; - if (!atomic_cmpset_int(ip, i, j)) - continue; - if (*dp != NULL) - return (1); - atomic_add_int(ip, -1); - return(0); - } -} + cdp->cdp_dirents = &cdp->cdp_dirent0; + cdp->cdp_dirent0 = NULL; + cdp->cdp_maxdirent = 0; -struct devfs_dirent ** -devfs_itode (struct devfs_mount *dm, int inode) -{ + cdev = &cdp->cdp_c; + cdev->si_priv = cdp; - if (inode < 0) - return (NULL); - if (inode < NDEVFSINO) - return (&dm->dm_dirent[inode]); - if (devfs_overflow == NULL) - return (NULL); - if (inode < NDEVFSINO + devfs_noverflow) - return (&dm->dm_overflow[inode - NDEVFSINO]); - return (NULL); + cdev->si_name = cdev->__si_namebuf; + LIST_INIT(&cdev->si_children); + return (cdev); } -struct cdev ** -devfs_itod (int inode) +void +devfs_free(struct cdev *cdev) { - - if (inode < 0) - return (NULL); - if (inode < NDEVFSINO) - return (&devfs_inot[inode]); - if (devfs_overflow == NULL) - return (NULL); - if (inode < NDEVFSINO + devfs_noverflow) - return (&devfs_overflow[inode - NDEVFSINO]); - return (NULL); + struct cdev_priv *cdp; + + cdp = cdev->si_priv; + if (cdev->si_cred != NULL) + crfree(cdev->si_cred); + if (cdp->cdp_inode > 0) + free_unr(devfs_inos, cdp->cdp_inode); + if (cdp->cdp_maxdirent > 0) + free(cdp->cdp_dirents, M_DEVFS2); + free(cdp, M_CDEVP); } -static struct devfs_dirent * +struct devfs_dirent * devfs_find(struct devfs_dirent *dd, const char *name, int namelen) { struct devfs_dirent *de; @@ -200,8 +170,8 @@ devfs_newdirent(char *name, int namelen) struct dirent d; d.d_namlen = namelen; - i = sizeof (*de) + GENERIC_DIRSIZ(&d); - de = malloc(i, M_DEVFS, M_WAITOK | M_ZERO); + i = sizeof (*de) + GENERIC_DIRSIZ(&d); + de = malloc(i, M_DEVFS3, M_WAITOK | M_ZERO); de->de_dirent = (struct dirent *)(de + 1); de->de_dirent->d_namlen = namelen; de->de_dirent->d_reclen = GENERIC_DIRSIZ(&d); @@ -217,158 +187,274 @@ devfs_newdirent(char *name, int namelen) } struct devfs_dirent * -devfs_vmkdir(char *name, int namelen, struct devfs_dirent *dotdot) +devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode) { struct devfs_dirent *dd; struct devfs_dirent *de; + /* Create the new directory */ dd = devfs_newdirent(name, namelen); - TAILQ_INIT(&dd->de_dlist); - dd->de_dirent->d_type = DT_DIR; dd->de_mode = 0555; dd->de_links = 2; dd->de_dir = dd; + if (inode != 0) + dd->de_inode = inode; + else + dd->de_inode = alloc_unr(devfs_inos); + /* Create the "." entry in the new directory */ de = devfs_newdirent(".", 1); de->de_dirent->d_type = DT_DIR; - de->de_dir = dd; de->de_flags |= DE_DOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); + de->de_dir = dd; + /* Create the ".." entry in the new directory */ de = devfs_newdirent("..", 2); de->de_dirent->d_type = DT_DIR; - if (dotdot == NULL) - de->de_dir = dd; - else - de->de_dir = dotdot; de->de_flags |= DE_DOTDOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); + if (dotdot == NULL) { + de->de_dir = dd; + } else { + de->de_dir = dotdot; + TAILQ_INSERT_TAIL(&dotdot->de_dlist, dd, de_list); + dotdot->de_links++; + } +#ifdef MAC + mac_create_devfs_directory(dmp->dm_mount, name, namelen, dd); +#endif return (dd); } -static void -devfs_delete(struct devfs_dirent *dd, struct devfs_dirent *de) +void +devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de) { if (de->de_symlink) { free(de->de_symlink, M_DEVFS); de->de_symlink = NULL; } - if (de->de_vnode) + if (de->de_vnode != NULL) { de->de_vnode->v_data = NULL; - TAILQ_REMOVE(&dd->de_dlist, de, de_list); + vgone(de->de_vnode); + de->de_vnode = NULL; + } #ifdef MAC mac_destroy_devfsdirent(de); #endif - free(de, M_DEVFS); + if (de->de_inode > DEVFS_ROOTINO) { + free_unr(devfs_inos, de->de_inode); + de->de_inode = 0; + } + free(de, M_DEVFS3); } -void -devfs_purge(struct devfs_dirent *dd) +/* + * Called on unmount. + * Recursively removes the entire tree + */ + +static void +devfs_purge(struct devfs_mount *dm, struct devfs_dirent *dd) { struct devfs_dirent *de; + sx_assert(&dm->dm_lock, SX_XLOCKED); for (;;) { de = TAILQ_FIRST(&dd->de_dlist); if (de == NULL) break; - devfs_delete(dd, de); + TAILQ_REMOVE(&dd->de_dlist, de, de_list); + if (de->de_flags & (DE_DOT|DE_DOTDOT)) + devfs_delete(dm, de); + else if (de->de_dirent->d_type == DT_DIR) + devfs_purge(dm, de); + else + devfs_delete(dm, de); } - FREE(dd, M_DEVFS); + devfs_delete(dm, dd); } +/* + * Each cdev_priv has an array of pointers to devfs_dirent which is indexed + * by the mount points dm_idx. + * This function extends the array when necessary, taking into account that + * the default array is 1 element and not malloc'ed. + */ +static void +devfs_metoo(struct cdev_priv *cdp, struct devfs_mount *dm) +{ + struct devfs_dirent **dep; + int siz; + + siz = (dm->dm_idx + 1) * sizeof *dep; + dep = malloc(siz, M_DEVFS2, M_WAITOK | M_ZERO); + dev_lock(); + if (dm->dm_idx <= cdp->cdp_maxdirent) { + /* We got raced */ + dev_unlock(); + free(dep, M_DEVFS2); + return; + } + memcpy(dep, cdp->cdp_dirents, (cdp->cdp_maxdirent + 1) * sizeof *dep); + if (cdp->cdp_maxdirent > 0) + free(cdp->cdp_dirents, M_DEVFS2); + cdp->cdp_dirents = dep; + /* + * XXX: if malloc told us how much we actually got this could + * XXX: be optimized. + */ + cdp->cdp_maxdirent = dm->dm_idx; + dev_unlock(); +} -void -devfs_populate(struct devfs_mount *dm) +static int +devfs_populate_loop(struct devfs_mount *dm, int cleanup) { - int i, j; - struct cdev *dev, *pdev; + struct cdev_priv *cdp; + struct devfs_dirent *de; struct devfs_dirent *dd; - struct devfs_dirent *de, **dep; + struct cdev *pdev; + int j; char *q, *s; - if (dm->dm_generation == devfs_generation) - return; - if (devfs_noverflow && dm->dm_overflow == NULL) { - i = devfs_noverflow * sizeof (struct devfs_dirent *); - MALLOC(dm->dm_overflow, struct devfs_dirent **, i, - M_DEVFS, M_WAITOK | M_ZERO); - } - while (dm->dm_generation != devfs_generation) { - dm->dm_generation = devfs_generation; - for (i = 0; i <= devfs_topino; i++) { - dev = *devfs_itod(i); - dep = devfs_itode(dm, i); - de = *dep; - if (dev == NULL && de == DE_DELETED) { - *dep = NULL; - continue; - } - if (dev == NULL && de != NULL) { - dd = de->de_dir; - *dep = NULL; - devfs_delete(dd, de); - devfs_dropref(i); - continue; - } - if (dev == NULL) - continue; - if (de != NULL) + sx_assert(&dm->dm_lock, SX_XLOCKED); + dev_lock(); + TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { + + KASSERT(cdp->cdp_dirents != NULL, ("NULL cdp_dirents")); + + /* + * If we are unmounting, or the device has been destroyed, + * clean up our dirent. + */ + if ((cleanup || !(cdp->cdp_flags & CDP_ACTIVE)) && + dm->dm_idx <= cdp->cdp_maxdirent && + cdp->cdp_dirents[dm->dm_idx] != NULL) { + de = cdp->cdp_dirents[dm->dm_idx]; + cdp->cdp_dirents[dm->dm_idx] = NULL; + cdp->cdp_inuse--; + KASSERT(cdp == de->de_cdp, + ("%s %d %s %p %p", __func__, __LINE__, + cdp->cdp_c.si_name, cdp, de->de_cdp)); + KASSERT(de->de_dir != NULL, ("Null de->de_dir")); + dev_unlock(); + + TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list); + de->de_cdp = NULL; + de->de_inode = 0; + devfs_delete(dm, de); + return (1); + } + /* + * GC any lingering devices + */ + if (!(cdp->cdp_flags & CDP_ACTIVE)) { + if (cdp->cdp_inuse > 0) continue; - if (!devfs_getref(i)) + TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); + dev_unlock(); + dev_rel(&cdp->cdp_c); + return (1); + } + /* + * Don't create any new dirents if we are unmounting + */ + if (cleanup) + continue; + KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); + + if (dm->dm_idx <= cdp->cdp_maxdirent && + cdp->cdp_dirents[dm->dm_idx] != NULL) { + de = cdp->cdp_dirents[dm->dm_idx]; + KASSERT(cdp == de->de_cdp, ("inconsistent cdp")); + continue; + } + + + cdp->cdp_inuse++; + dev_unlock(); + + if (dm->dm_idx > cdp->cdp_maxdirent) + devfs_metoo(cdp, dm); + + dd = dm->dm_rootdir; + s = cdp->cdp_c.si_name; + for (;;) { + for (q = s; *q != '/' && *q != '\0'; q++) continue; - dd = dm->dm_rootdir; - s = dev->si_name; - for (;;) { - for (q = s; *q != '/' && *q != '\0'; q++) - continue; - if (*q != '/') - break; - de = devfs_find(dd, s, q - s); - if (de == NULL) { - de = devfs_vmkdir(s, q - s, dd); -#ifdef MAC - mac_create_devfs_directory( - dm->dm_mount, s, q - s, de); -#endif - de->de_inode = dm->dm_inode++; - TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); - dd->de_links++; - } - s = q + 1; - dd = de; - } - de = devfs_newdirent(s, q - s); - if (dev->si_flags & SI_ALIAS) { - de->de_inode = dm->dm_inode++; - de->de_uid = 0; - de->de_gid = 0; - de->de_mode = 0755; - de->de_dirent->d_type = DT_LNK; - pdev = dev->si_parent; - j = strlen(pdev->si_name) + 1; - MALLOC(de->de_symlink, char *, j, M_DEVFS, M_WAITOK); - bcopy(pdev->si_name, de->de_symlink, j); - } else { - de->de_inode = i; - de->de_uid = dev->si_uid; - de->de_gid = dev->si_gid; - de->de_mode = dev->si_mode; - de->de_dirent->d_type = DT_CHR; - } + if (*q != '/') + break; + de = devfs_find(dd, s, q - s); + if (de == NULL) + de = devfs_vmkdir(dm, s, q - s, dd, 0); + s = q + 1; + dd = de; + } + + de = devfs_newdirent(s, q - s); + if (cdp->cdp_c.si_flags & SI_ALIAS) { + de->de_uid = 0; + de->de_gid = 0; + de->de_mode = 0755; + de->de_dirent->d_type = DT_LNK; + pdev = cdp->cdp_c.si_parent; + j = strlen(pdev->si_name) + 1; + de->de_symlink = malloc(j, M_DEVFS, M_WAITOK); + bcopy(pdev->si_name, de->de_symlink, j); + } else { + de->de_uid = cdp->cdp_c.si_uid; + de->de_gid = cdp->cdp_c.si_gid; + de->de_mode = cdp->cdp_c.si_mode; + de->de_dirent->d_type = DT_CHR; + } + de->de_inode = cdp->cdp_inode; + de->de_cdp = cdp; #ifdef MAC - mac_create_devfs_device(dev->si_cred, dm->dm_mount, - dev, de); + mac_create_devfs_device(cdp->cdp_c.si_cred, dm->dm_mount, + &cdp->cdp_c, de); #endif - *dep = de; - de->de_dir = dd; - devfs_rules_apply(dm, de); - TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); - } + de->de_dir = dd; + TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); + devfs_rules_apply(dm, de); + dev_lock(); + /* XXX: could check that cdp is still active here */ + KASSERT(cdp->cdp_dirents[dm->dm_idx] == NULL, + ("%s %d\n", __func__, __LINE__)); + cdp->cdp_dirents[dm->dm_idx] = de; + KASSERT(de->de_cdp != (void *)0xdeadc0de, + ("%s %d\n", __func__, __LINE__)); + dev_unlock(); + return (1); } + dev_unlock(); + return (0); +} + +void +devfs_populate(struct devfs_mount *dm) +{ + + sx_assert(&dm->dm_lock, SX_XLOCKED); + if (dm->dm_generation == devfs_generation) + return; + while (devfs_populate_loop(dm, 0)) + continue; + dm->dm_generation = devfs_generation; +} + +void +devfs_cleanup(struct devfs_mount *dm) +{ + + sx_assert(&dm->dm_lock, SX_XLOCKED); + while (devfs_populate_loop(dm, 1)) + continue; + devfs_purge(dm, dm->dm_rootdir); } /* @@ -380,83 +466,33 @@ devfs_populate(struct devfs_mount *dm) void devfs_create(struct cdev *dev) { - int ino, i, *ip; - struct cdev **dp; - struct cdev **ot; - int *or; - int n; - - for (;;) { - /* Grab the next inode number */ - ino = devfs_nextino; - i = ino + 1; - /* wrap around when we reach the end */ - if (i >= NDEVFSINO + devfs_noverflow) - i = 3; - devfs_nextino = i; - - /* see if it was occupied */ - dp = devfs_itod(ino); - KASSERT(dp != NULL, ("DEVFS: No devptr inode %d", ino)); - if (*dp != NULL) - continue; - ip = devfs_itor(ino); - KASSERT(ip != NULL, ("DEVFS: No iptr inode %d", ino)); - if (*ip != 0) - continue; - break; - } - - *dp = dev; - dev->si_inode = ino; - if (i > devfs_topino) - devfs_topino = i; - - devfs_numino++; + struct cdev_priv *cdp; + + mtx_assert(&devmtx, MA_OWNED); + cdp = dev->si_priv; + cdp->cdp_flags |= CDP_ACTIVE; + cdp->cdp_inode = alloc_unrl(devfs_inos); + dev_refl(dev); + TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list); devfs_generation++; - - if (devfs_overflow != NULL || devfs_numino + 100 < NDEVFSINO) - return; - - /* - * Try to allocate overflow table - * XXX: we can probably be less panicy these days and a linked - * XXX: list of PAGESIZE/PTRSIZE entries might be a better idea. - * - * XXX: we may be into witness unlove here. - */ - n = devfs_noverflowwant; - ot = malloc(sizeof(*ot) * n, M_DEVFS, M_NOWAIT | M_ZERO); - if (ot == NULL) - return; - or = malloc(sizeof(*or) * n, M_DEVFS, M_NOWAIT | M_ZERO); - if (or == NULL) { - free(ot, M_DEVFS); - return; - } - devfs_overflow = ot; - devfs_refoverflow = or; - devfs_noverflow = n; - printf("DEVFS Overflow table with %d entries allocated\n", n); - return; } void devfs_destroy(struct cdev *dev) { - int ino; - struct cdev **dp; + struct cdev_priv *cdp; - ino = dev->si_inode; - dev->si_inode = 0; - if (ino == 0) - return; - dp = devfs_itod(ino); - KASSERT(*dp == dev, - ("DEVFS: destroying wrong cdev ino %d", ino)); - *dp = NULL; - devfs_numino--; + mtx_assert(&devmtx, MA_OWNED); + cdp = dev->si_priv; + cdp->cdp_flags &= ~CDP_ACTIVE; devfs_generation++; - if (ino < devfs_nextino) - devfs_nextino = ino; } + +static void +devfs_devs_init(void *junk __unused) +{ + + devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx); +} + +SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL); |