summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2004-11-08 10:46:47 +0000
committerphk <phk@FreeBSD.org>2004-11-08 10:46:47 +0000
commit63cd9549c7364ad52248b1664e4c346890ee6448 (patch)
treec9cbf7fbe9e85ffb9c95ca6552b35574bb1404f7 /sys
parent8461ec98deac6d4effb03efc1356c91047b5d84b (diff)
downloadFreeBSD-src-63cd9549c7364ad52248b1664e4c346890ee6448.zip
FreeBSD-src-63cd9549c7364ad52248b1664e4c346890ee6448.tar.gz
Add optional device vnode bypass to DEVFS.
The tunable vfs.devfs.fops controls this feature and defaults to off. When enabled (vfs.devfs.fops=1 in loader), device vnodes opened through a filedescriptor gets a special fops vector which instead of the detour through the vnode layer goes directly to DEVFS. Amongst other things this allows us to run Giant free read/write to device drivers which have been weaned off D_NEEDGIANT. Currently this means /dev/null, /dev/zero, disks, (and maybe the random stuff ?) On a 700MHz K7 machine this doubles the speed of dd if=/dev/zero of=/dev/null bs=1 count=1000000 This roughly translates to shaving 2usec of each read/write syscall. The poll/kqfilter paths need more work before they are giant free, this work is ongoing in p4::phk_bufwork Please test this and report any problems, LORs etc.
Diffstat (limited to 'sys')
-rw-r--r--sys/fs/devfs/devfs_vnops.c287
1 files changed, 287 insertions, 0 deletions
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index f3b95fa..d3581aa 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -49,6 +49,9 @@
#include <sys/conf.h>
#include <sys/dirent.h>
#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mac.h>
@@ -59,11 +62,33 @@
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/time.h>
+#include <sys/ttycom.h>
#include <sys/unistd.h>
#include <sys/vnode.h>
#include <fs/devfs/devfs.h>
+static int devfs_fops = 0;
+
+static fo_rdwr_t devfs_read_f;
+static fo_rdwr_t devfs_write_f;
+static fo_ioctl_t devfs_ioctl_f;
+static fo_poll_t devfs_poll_f;
+static fo_kqfilter_t devfs_kqfilter_f;
+static fo_stat_t devfs_stat_f;
+static fo_close_t devfs_close_f;
+
+struct fileops devfs_ops_f = {
+ .fo_read = devfs_read_f,
+ .fo_write = devfs_write_f,
+ .fo_ioctl = devfs_ioctl_f,
+ .fo_poll = devfs_poll_f,
+ .fo_kqfilter = devfs_kqfilter_f,
+ .fo_stat = devfs_stat_f,
+ .fo_close = devfs_close_f,
+ .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
+};
+
static int devfs_access(struct vop_access_args *ap);
static int devfs_advlock(struct vop_advlock_args *ap);
static int devfs_close(struct vop_close_args *ap);
@@ -314,6 +339,18 @@ devfs_close(ap)
return (error);
}
+static int
+devfs_close_f(struct file *fp, struct thread *td)
+{
+ struct cdev *dev;
+
+ dev = fp->f_data;
+#if 0
+ printf("devfs_close_f(%s)\n", devtoname(dev));
+#endif
+ return (vnops.fo_close(fp, td));
+}
+
/*
* Synch buffers associated with a block device
*/
@@ -439,6 +476,67 @@ devfs_ioctl(ap)
return (error);
}
+static int
+devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
+{
+ struct cdev *dev;
+ struct cdevsw *dsw;
+ struct vnode *vp = fp->f_vnode;
+ struct vnode *vpold;
+ int error;
+
+ dev = fp->f_data;
+#if 0
+ printf("devfs_ioctl_f(%s)\n", devtoname(dev));
+#endif
+ KASSERT(dev->si_refcount > 0,
+ ("devfs_ioctl() on un-referenced struct cdev *(%s)",
+ devtoname(dev)));
+ dsw = dev_refthread(dev);
+ if (dsw == NULL)
+ return (ENXIO);
+
+ if (com == FIODTYPE) {
+ *(int *)data = dsw->d_flags & D_TYPEMASK;
+ dev_relthread(dev);
+ return (0);
+ }
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
+ error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
+ dev_relthread(dev);
+ if (error == ENOIOCTL)
+ error = ENOTTY;
+ if (error == 0 && com == TIOCSCTTY) {
+
+ /* Do nothing if reassigning same control tty */
+ sx_slock(&proctree_lock);
+ if (td->td_proc->p_session->s_ttyvp == vp) {
+ sx_sunlock(&proctree_lock);
+ return (0);
+ }
+
+ mtx_lock(&Giant);
+
+ vpold = td->td_proc->p_session->s_ttyvp;
+ VREF(vp);
+ SESS_LOCK(td->td_proc->p_session);
+ td->td_proc->p_session->s_ttyvp = vp;
+ SESS_UNLOCK(td->td_proc->p_session);
+
+ sx_sunlock(&proctree_lock);
+
+ /* Get rid of reference to old control tty */
+ if (vpold)
+ vrele(vpold);
+ mtx_unlock(&Giant);
+ }
+ return (error);
+}
+
+
/* ARGSUSED */
static int
devfs_kqfilter(ap)
@@ -468,6 +566,32 @@ devfs_kqfilter(ap)
}
static int
+devfs_kqfilter_f(struct file *fp, struct knote *kn)
+{
+ struct cdev *dev;
+ struct cdevsw *dsw;
+ int error;
+
+ dev = fp->f_data;
+#if 0
+ printf("devfs_kqfilter_f(%s)\n", devtoname(dev));
+#endif
+ KASSERT(dev->si_refcount > 0,
+ ("devfs_kqfilter() on un-referenced struct cdev *(%s)",
+ devtoname(dev)));
+ dsw = dev_refthread(dev);
+ if (dsw == NULL)
+ return(0);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
+ error = dsw->d_kqfilter(dev, kn);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
+ dev_relthread(dev);
+ return (error);
+}
+
+static int
devfs_lookupx(ap)
struct vop_lookup_args /* {
struct vnode * a_dvp;
@@ -697,8 +821,17 @@ devfs_open(ap)
struct thread *td = ap->a_td;
struct vnode *vp = ap->a_vp;
struct cdev *dev = vp->v_rdev;
+ struct file *fp;
int error;
struct cdevsw *dsw;
+ static int once;
+
+ if (!once) {
+ TUNABLE_INT_FETCH("vfs.devfs.fops", &devfs_fops);
+ if (devfs_fops)
+ printf("WARNING: DEVFS uses fops\n");
+ once = 1;
+ }
if (vp->v_type == VBLK)
return (ENXIO);
@@ -751,6 +884,23 @@ devfs_open(ap)
if (error)
return (error);
+ if (devfs_fops && ap->a_fdidx >= 0) {
+ /*
+ * This is a pretty disgustingly long chain, but I am not
+ * sure there is any better way. Passing the fdidx into
+ * VOP_OPEN() offers us more information than just passing
+ * the file *.
+ */
+ fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
+ if (fp->f_ops == &badfileops) {
+#if 0
+ printf("devfs_open(%s)\n", devtoname(dev));
+#endif
+ fp->f_ops = &devfs_ops_f;
+ fp->f_data = dev;
+ }
+ }
+
return (error);
}
@@ -817,6 +967,32 @@ devfs_poll(ap)
return(error);
}
+static int
+devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
+{
+ struct cdev *dev;
+ struct cdevsw *dsw;
+ int error;
+
+ dev = fp->f_data;
+#if 0
+ printf("devfs_poll_f(%s)\n", devtoname(dev));
+#endif
+ dsw = dev_refthread(dev);
+ if (dsw == NULL)
+ return (0);
+ KASSERT(dev->si_refcount > 0,
+ ("devfs_poll() on un-referenced struct cdev *(%s)",
+ devtoname(dev)));
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
+ error = dsw->d_poll(dev, events, td);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
+ dev_relthread(dev);
+ return(error);
+}
+
/*
* Print out the contents of a special device vnode.
*/
@@ -880,6 +1056,55 @@ devfs_read(ap)
}
static int
+devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
+{
+ struct cdev *dev;
+ int ioflag, error, resid;
+ struct cdevsw *dsw;
+ struct vnode *vp;
+
+ dev = fp->f_data;
+#if 0
+ /*
+ * Enabling this one is dangerous, syslog will log once for each
+ * read from /dev/klog so...
+ */
+ printf("devfs_read_f(%s)\n", devtoname(dev));
+#endif
+ KASSERT(dev->si_refcount > 0,
+ ("specread() on un-referenced struct cdev *(%s)", devtoname(dev)));
+ dsw = dev_refthread(dev);
+ if (dsw == NULL)
+ return (ENXIO);
+
+ vp = fp->f_vnode;
+ resid = uio->uio_resid;
+
+ ioflag = 0;
+ if (fp->f_flag & FNONBLOCK)
+ ioflag |= IO_NDELAY;
+ if (fp->f_flag & O_DIRECT)
+ ioflag |= IO_DIRECT;
+
+ if ((flags & FOF_OFFSET) == 0)
+ uio->uio_offset = fp->f_offset;
+
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
+ error = dsw->d_read(dev, uio, ioflag);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
+ dev_relthread(dev);
+ if (uio->uio_resid != resid || (error == 0 && resid != 0))
+ vfs_timestamp(&dev->si_atime);
+
+ if ((flags & FOF_OFFSET) == 0)
+ fp->f_offset = uio->uio_offset;
+ fp->f_nextoff = uio->uio_offset;
+ return (error);
+}
+
+static int
devfs_readdir(ap)
struct vop_readdir_args /* {
struct vnode *a_vp;
@@ -1199,6 +1424,18 @@ devfs_setlabel(ap)
#endif
static int
+devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
+{
+ struct cdev *dev;
+
+ dev = fp->f_data;
+#if 0
+ printf("devfs_stat_f(%s)\n", devtoname(dev));
+#endif
+ return (vnops.fo_stat(fp, sb, cred, td));
+}
+
+static int
devfs_symlink(ap)
struct vop_symlink_args /* {
struct vnode *a_dvp;
@@ -1288,6 +1525,56 @@ devfs_write(ap)
return (error);
}
+static int
+devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
+{
+ struct cdev *dev;
+ struct vnode *vp;
+ int error, ioflag, resid;
+ struct cdevsw *dsw;
+
+ dev = fp->f_data;
+#if 0
+ printf("devfs_write_f(%s)\n", devtoname(dev));
+#endif
+ KASSERT(dev->si_refcount > 0,
+ ("devfs_write() on un-referenced struct cdev *(%s)",
+ devtoname(dev)));
+ dsw = dev_refthread(dev);
+ if (dsw == NULL)
+ return (ENXIO);
+
+ KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
+ vp = fp->f_vnode;
+ ioflag = IO_UNIT;
+ if (fp->f_flag & FNONBLOCK)
+ ioflag |= IO_NDELAY;
+ if (fp->f_flag & O_DIRECT)
+ ioflag |= IO_DIRECT;
+ if ((fp->f_flag & O_FSYNC) ||
+ (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
+ ioflag |= IO_SYNC;
+ if ((flags & FOF_OFFSET) == 0)
+ uio->uio_offset = fp->f_offset;
+
+ resid = uio->uio_resid;
+
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
+ error = dsw->d_write(dev, uio, ioflag);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
+ dev_relthread(dev);
+ if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
+ vfs_timestamp(&dev->si_ctime);
+ dev->si_mtime = dev->si_ctime;
+ }
+
+ if ((flags & FOF_OFFSET) == 0)
+ fp->f_offset = uio->uio_offset;
+ fp->f_nextoff = uio->uio_offset;
+ return (error);
+}
static struct vnodeopv_entry_desc devfs_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) vop_defaultop },
OpenPOWER on IntegriCloud