summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordyson <dyson@FreeBSD.org>1996-01-28 23:38:26 +0000
committerdyson <dyson@FreeBSD.org>1996-01-28 23:38:26 +0000
commit026024aadddf151f1052a03c6d81ca98a747d629 (patch)
treeb0fe4dbaf274f606e4d720acd7dfed35e9591020
parent72b11e691f1b04a70923bce5e14a27b8c2d3be54 (diff)
downloadFreeBSD-src-026024aadddf151f1052a03c6d81ca98a747d629.zip
FreeBSD-src-026024aadddf151f1052a03c6d81ca98a747d629.tar.gz
Added new files to support the new fast pipes. After the follow-on
commits, pipe performance should increase significantly. The pipe(2) system call is currently supported, while fifofs will be added later.
-rw-r--r--sys/kern/sys_pipe.c573
-rw-r--r--sys/sys/pipe.h73
2 files changed, 646 insertions, 0 deletions
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
new file mode 100644
index 0000000..ea45688
--- /dev/null
+++ b/sys/kern/sys_pipe.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 1996 John S. Dyson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice immediately at the beginning of the file, without modification,
+ * this list of conditions, and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Absolutely no warranty of function or purpose is made by the author
+ * John S. Dyson.
+ * 4. This work was done expressly for inclusion into FreeBSD. Other use
+ * is allowed if this notation is included.
+ * 5. Modifications may be freely made to this file if the above conditions
+ * are met.
+ *
+ * $Id$
+ */
+
+#ifndef OLD_PIPE
+
+/*
+ * This file contains a high-performance replacement for the socket-based
+ * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
+ * all features of sockets, but does do everything that pipes normally
+ * do.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/protosw.h>
+#include <sys/stat.h>
+#include <sys/filedesc.h>
+#include <sys/malloc.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/select.h>
+#include <sys/signalvar.h>
+#include <sys/errno.h>
+#include <sys/queue.h>
+#include <sys/vmmeter.h>
+#include <sys/kernel.h>
+#include <sys/sysproto.h>
+#include <sys/pipe.h>
+
+#include <vm/vm.h>
+#include <vm/vm_prot.h>
+#include <vm/vm_param.h>
+#include <vm/lock.h>
+#include <vm/vm_object.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+static int pipe_read __P((struct file *fp, struct uio *uio,
+ struct ucred *cred));
+static int pipe_write __P((struct file *fp, struct uio *uio,
+ struct ucred *cred));
+static int pipe_close __P((struct file *fp, struct proc *p));
+static int pipe_select __P((struct file *fp, int which, struct proc *p));
+static int pipe_ioctl __P((struct file *fp, int cmd, caddr_t data, struct proc *p));
+
+static struct fileops pipeops =
+ { pipe_read, pipe_write, pipe_ioctl, pipe_select, pipe_close };
+
+/*
+ * Default pipe buffer size(s), this can be kind-of large now because pipe
+ * space is pageable. The pipe code will try to maintain locality of
+ * reference for performance reasons, so small amounts of outstanding I/O
+ * will not wipe the cache.
+ */
+#define PIPESIZE (16384)
+#define MINPIPESIZE (PIPESIZE/3)
+#define MAXPIPESIZE (2*PIPESIZE/3)
+
+static void pipeclose __P((struct pipe *cpipe));
+static void pipebufferinit __P((struct pipe *cpipe));
+static void pipeinit __P((struct pipe *cpipe));
+static __inline int pipelock __P((struct pipe *cpipe));
+static __inline void pipeunlock __P((struct pipe *cpipe));
+
+/*
+ * The pipe system call for the DTYPE_PIPE type of pipes
+ */
+
+/* ARGSUSED */
+int
+pipe(p, uap, retval)
+ struct proc *p;
+ struct pipe_args /* {
+ int dummy;
+ } */ *uap;
+ int retval[];
+{
+ register struct filedesc *fdp = p->p_fd;
+ struct file *rf, *wf;
+ struct pipe *rpipe, *wpipe;
+ int fd, error;
+
+ rpipe = malloc( sizeof (*rpipe), M_TEMP, M_WAITOK);
+ pipeinit(rpipe);
+ wpipe = malloc( sizeof (*wpipe), M_TEMP, M_WAITOK);
+ pipeinit(wpipe);
+
+ error = falloc(p, &rf, &fd);
+ if (error)
+ goto free2;
+ retval[0] = fd;
+ rf->f_flag = FREAD | FWRITE;
+ rf->f_type = DTYPE_PIPE;
+ rf->f_ops = &pipeops;
+ rf->f_data = (caddr_t)rpipe;
+ error = falloc(p, &wf, &fd);
+ if (error)
+ goto free3;
+ wf->f_flag = FREAD | FWRITE;
+ wf->f_type = DTYPE_PIPE;
+ wf->f_ops = &pipeops;
+ wf->f_data = (caddr_t)wpipe;
+ retval[1] = fd;
+
+ rpipe->pipe_peer = wpipe;
+ wpipe->pipe_peer = rpipe;
+
+ return (0);
+free3:
+ ffree(rf);
+ fdp->fd_ofiles[retval[0]] = 0;
+free2:
+ (void)pipeclose(wpipe);
+free1:
+ (void)pipeclose(rpipe);
+ return (error);
+}
+
+/*
+ * initialize and allocate VM and memory for pipe
+ */
+static void
+pipeinit(cpipe)
+ struct pipe *cpipe;
+{
+ int npages;
+
+ npages = round_page(PIPESIZE)/PAGE_SIZE;
+
+ /*
+ * Create an object, I don't like the idea of paging to/from
+ * kernel_object.
+ */
+ cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages);
+
+ /*
+ * Insert the object into the kernel map, and allocate kva for it.
+ * The map entry is, by default, pageable.
+ */
+ if (vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0,
+ (vm_offset_t *) &cpipe->pipe_buffer.buffer, PIPESIZE, 1,
+ VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)
+ panic("pipeinit: cannot allocate pipe -- out of kvm");
+
+ cpipe->pipe_buffer.in = 0;
+ cpipe->pipe_buffer.out = 0;
+ cpipe->pipe_buffer.cnt = 0;
+ cpipe->pipe_buffer.size = PIPESIZE;
+
+ cpipe->pipe_state = 0;
+ cpipe->pipe_peer = NULL;
+ cpipe->pipe_busy = 0;
+ cpipe->pipe_ctime = time;
+ cpipe->pipe_atime = time;
+ cpipe->pipe_mtime = time;
+ bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel);
+}
+
+
+/*
+ * lock a pipe for I/O, blocking other access
+ */
+static __inline int
+pipelock(cpipe)
+ struct pipe *cpipe;
+{
+ while (cpipe->pipe_state & PIPE_LOCK) {
+ cpipe->pipe_state |= PIPE_LWANT;
+ if (tsleep( &cpipe->pipe_state, PRIBIO|PCATCH, "pipelk", 0)) {
+ return ERESTART;
+ }
+ }
+ cpipe->pipe_state |= PIPE_LOCK;
+ return 0;
+}
+
+/*
+ * unlock a pipe I/O lock
+ */
+static __inline void
+pipeunlock(cpipe)
+ struct pipe *cpipe;
+{
+ cpipe->pipe_state &= ~PIPE_LOCK;
+ if (cpipe->pipe_state & PIPE_LWANT) {
+ cpipe->pipe_state &= ~PIPE_LWANT;
+ wakeup(&cpipe->pipe_state);
+ }
+ return;
+}
+
+/* ARGSUSED */
+static int
+pipe_read(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+
+ struct pipe *rpipe = (struct pipe *) fp->f_data;
+ int error = 0;
+ int nread = 0;
+
+ ++rpipe->pipe_busy;
+ while (uio->uio_resid) {
+ if (rpipe->pipe_buffer.cnt > 0) {
+ int size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
+ if (size > rpipe->pipe_buffer.cnt)
+ size = rpipe->pipe_buffer.cnt;
+ if (size > uio->uio_resid)
+ size = uio->uio_resid;
+ if ((error = pipelock(rpipe)) == 0) {
+ error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
+ size, uio);
+ pipeunlock(rpipe);
+ }
+ if (error) {
+ break;
+ }
+ rpipe->pipe_buffer.out += size;
+ if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
+ rpipe->pipe_buffer.out = 0;
+
+ rpipe->pipe_buffer.cnt -= size;
+ nread += size;
+ rpipe->pipe_atime = time;
+ } else {
+ /*
+ * detect EOF condition
+ */
+ if (rpipe->pipe_state & PIPE_EOF) {
+ break;
+ }
+ /*
+ * If the "write-side" has been blocked, wake it up now.
+ */
+ if (rpipe->pipe_state & PIPE_WANTW) {
+ rpipe->pipe_state &= ~PIPE_WANTW;
+ wakeup(rpipe);
+ }
+ if ((nread > 0) || (rpipe->pipe_state & PIPE_NBIO))
+ break;
+ if (rpipe->pipe_peer == NULL)
+ break;
+
+ /*
+ * If there is no more to read in the pipe, reset
+ * it's pointers to the beginning. This improves
+ * cache hit stats.
+ */
+
+ if ((error = pipelock(rpipe)) == 0) {
+ if (rpipe->pipe_buffer.cnt == 0) {
+ rpipe->pipe_buffer.in = 0;
+ rpipe->pipe_buffer.out = 0;
+ }
+ pipeunlock(rpipe);
+ } else {
+ break;
+ }
+ rpipe->pipe_state |= PIPE_WANTR;
+ if (tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) {
+ error = ERESTART;
+ break;
+ }
+ }
+ }
+
+ --rpipe->pipe_busy;
+ if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
+ rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
+ wakeup(rpipe);
+ } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
+ /*
+ * If there is no more to read in the pipe, reset
+ * it's pointers to the beginning. This improves
+ * cache hit stats.
+ */
+ if ((error == 0) && (error = pipelock(rpipe)) == 0) {
+ if (rpipe->pipe_buffer.cnt == 0) {
+ rpipe->pipe_buffer.in = 0;
+ rpipe->pipe_buffer.out = 0;
+ }
+ pipeunlock(rpipe);
+ }
+
+ /*
+ * If the "write-side" has been blocked, wake it up now.
+ */
+ if (rpipe->pipe_state & PIPE_WANTW) {
+ rpipe->pipe_state &= ~PIPE_WANTW;
+ wakeup(rpipe);
+ }
+ }
+ if (rpipe->pipe_state & PIPE_SEL) {
+ rpipe->pipe_state &= ~PIPE_SEL;
+ selwakeup(&rpipe->pipe_sel);
+ }
+ return error;
+}
+
+/* ARGSUSED */
+static int
+pipe_write(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+ struct pipe *rpipe = (struct pipe *) fp->f_data;
+ struct pipe *wpipe = rpipe->pipe_peer;
+ int error = 0;
+
+ /*
+ * detect loss of pipe read side, issue SIGPIPE if lost.
+ */
+ if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)) {
+ psignal(curproc, SIGPIPE);
+ return 0;
+ }
+
+ ++wpipe->pipe_busy;
+ while (uio->uio_resid) {
+ int space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
+ if (space > 0) {
+ int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in;
+ if (size > space)
+ size = space;
+ if (size > uio->uio_resid)
+ size = uio->uio_resid;
+ if ((error = pipelock(wpipe)) == 0) {
+ error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
+ size, uio);
+ pipeunlock(wpipe);
+ }
+ if (error)
+ break;
+
+ wpipe->pipe_buffer.in += size;
+ if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size)
+ wpipe->pipe_buffer.in = 0;
+
+ wpipe->pipe_buffer.cnt += size;
+ wpipe->pipe_mtime = time;
+ } else {
+ /*
+ * If the "read-side" has been blocked, wake it up now.
+ */
+ if (wpipe->pipe_state & PIPE_WANTR) {
+ wpipe->pipe_state &= ~PIPE_WANTR;
+ wakeup(wpipe);
+ }
+ /*
+ * don't block on non-blocking I/O
+ */
+ if (wpipe->pipe_state & PIPE_NBIO) {
+ break;
+ }
+ wpipe->pipe_state |= PIPE_WANTW;
+ if (tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) {
+ error = ERESTART;
+ break;
+ }
+ /*
+ * If read side wants to go away, we just issue a signal
+ * to ourselves.
+ */
+ if (wpipe->pipe_state & PIPE_EOF) {
+ psignal(curproc, SIGPIPE);
+ break;
+ }
+ }
+ }
+
+ --wpipe->pipe_busy;
+ if ((wpipe->pipe_busy == 0) &&
+ (wpipe->pipe_state & PIPE_WANT)) {
+ wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR);
+ wakeup(wpipe);
+ } else if (wpipe->pipe_buffer.cnt > 0) {
+ /*
+ * If we have put any characters in the buffer, we wake up
+ * the reader.
+ */
+ if (wpipe->pipe_state & PIPE_WANTR) {
+ wpipe->pipe_state &= ~PIPE_WANTR;
+ wakeup(wpipe);
+ }
+ }
+ if (wpipe->pipe_state & PIPE_SEL) {
+ wpipe->pipe_state &= ~PIPE_SEL;
+ selwakeup(&wpipe->pipe_sel);
+ }
+ return error;
+}
+
+/*
+ * we implement a very minimal set of ioctls for compatibility with sockets.
+ */
+int
+pipe_ioctl(fp, cmd, data, p)
+ struct file *fp;
+ int cmd;
+ register caddr_t data;
+ struct proc *p;
+{
+ register struct pipe *mpipe = (struct pipe *)fp->f_data;
+
+ switch (cmd) {
+
+ case FIONBIO:
+ if (*(int *)data)
+ mpipe->pipe_state |= PIPE_NBIO;
+ else
+ mpipe->pipe_state &= ~PIPE_NBIO;
+ return (0);
+
+ case FIOASYNC:
+ if (*(int *)data) {
+ mpipe->pipe_state |= PIPE_ASYNC;
+ } else {
+ mpipe->pipe_state &= ~PIPE_ASYNC;
+ }
+ return (0);
+
+ case FIONREAD:
+ *(int *)data = mpipe->pipe_buffer.cnt;
+ return (0);
+
+ case SIOCSPGRP:
+ mpipe->pipe_pgid = *(int *)data;
+ return (0);
+
+ case SIOCGPGRP:
+ *(int *)data = mpipe->pipe_pgid;
+ return (0);
+
+ }
+ return ENOSYS;
+}
+
+int
+pipe_select(fp, which, p)
+ struct file *fp;
+ int which;
+ struct proc *p;
+{
+ register struct pipe *rpipe = (struct pipe *)fp->f_data;
+ struct pipe *wpipe;
+ register int s = splnet();
+
+ wpipe = rpipe->pipe_peer;
+ switch (which) {
+
+ case FREAD:
+ if (rpipe->pipe_buffer.cnt > 0) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &rpipe->pipe_sel);
+ rpipe->pipe_state |= PIPE_SEL;
+ break;
+
+ case FWRITE:
+ if (wpipe == 0) {
+ splx(s);
+ return (1);
+ }
+ if (wpipe->pipe_buffer.cnt < wpipe->pipe_buffer.size) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &wpipe->pipe_sel);
+ wpipe->pipe_state |= PIPE_SEL;
+ break;
+
+ case 0:
+ selrecord(p, &rpipe->pipe_sel);
+ rpipe->pipe_state |= PIPE_SEL;
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+int
+pipe_stat(pipe, ub)
+ register struct pipe *pipe;
+ register struct stat *ub;
+{
+ bzero((caddr_t)ub, sizeof (*ub));
+ ub->st_mode = S_IFSOCK;
+ ub->st_blksize = pipe->pipe_buffer.size / 2;
+ ub->st_size = pipe->pipe_buffer.cnt;
+ ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
+ TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec);
+ TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec);
+ TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec);
+ return 0;
+}
+
+/* ARGSUSED */
+static int
+pipe_close(fp, p)
+ struct file *fp;
+ struct proc *p;
+{
+ int error = 0;
+ struct pipe *cpipe = (struct pipe *)fp->f_data;
+ pipeclose(cpipe);
+ fp->f_data = NULL;
+ return 0;
+}
+
+/*
+ * shutdown the pipe
+ */
+static void
+pipeclose(cpipe)
+ struct pipe *cpipe;
+{
+ if (cpipe) {
+ /*
+ * If the other side is blocked, wake it up saying that
+ * we want to close it down.
+ */
+ while (cpipe->pipe_busy) {
+ wakeup(cpipe);
+ cpipe->pipe_state |= PIPE_WANT|PIPE_EOF;
+ tsleep(cpipe, PRIBIO, "pipecl", 0);
+ }
+
+ /*
+ * Disconnect from peer
+ */
+ if (cpipe->pipe_peer) {
+ cpipe->pipe_peer->pipe_state |= PIPE_EOF;
+ wakeup(cpipe->pipe_peer);
+ cpipe->pipe_peer->pipe_peer = NULL;
+ }
+
+ /*
+ * free resources
+ */
+ kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer,
+ cpipe->pipe_buffer.size);
+ free(cpipe, M_TEMP);
+ }
+}
+#endif
diff --git a/sys/sys/pipe.h b/sys/sys/pipe.h
new file mode 100644
index 0000000..2815805
--- /dev/null
+++ b/sys/sys/pipe.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1996 John S. Dyson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice immediately at the beginning of the file, without modification,
+ * this list of conditions, and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Absolutely no warranty of function or purpose is made by the author
+ * John S. Dyson.
+ * 4. This work was done expressly for inclusion into FreeBSD. Other use
+ * is allowed if this notation is included.
+ * 5. Modifications may be freely made to this file if the above conditions
+ * are met.
+ *
+ * $Id$
+ */
+
+#ifndef OLD_PIPE
+
+struct vm_object;
+
+/*
+ * pipe buffer information
+ * Seperate in, out, cnt is used to simplify calculations.
+ */
+struct pipebuf {
+ u_int cnt; /* number of chars currently in buffer */
+ u_int in; /* in pointer */
+ u_int out; /* out pointer */
+ u_int size; /* size of buffer */
+ caddr_t buffer; /* kva of buffer */
+ struct vm_object *object; /* VM object containing buffer */
+};
+
+/*
+ * pipe_state bits
+ */
+#define PIPE_NBIO 0x1 /* non-blocking I/O */
+#define PIPE_ASYNC 0x4 /* Async? I/O */
+#define PIPE_WANTR 0x8 /* Reader wants some characters */
+#define PIPE_WANTW 0x10 /* Writer wants space to put characters */
+#define PIPE_WANT 0x20 /* Pipe is wanted to be run-down */
+#define PIPE_SEL 0x40 /* Pipe has a select active */
+#define PIPE_EOF 0x80 /* Pipe is in EOF condition */
+#define PIPE_LOCK 0x100 /* Process has exclusive access to pointers/data */
+#define PIPE_LWANT 0x200 /* Process wants exclusive access to pointers/data */
+
+/*
+ * Per-pipe data structure
+ * Two of these are linked together to produce bi-directional
+ * pipes.
+ */
+struct pipe {
+ struct pipebuf pipe_buffer; /* data storage */
+ struct selinfo pipe_sel; /* for compat with select */
+ struct timeval pipe_atime; /* time of last access */
+ struct timeval pipe_mtime; /* time of last modify */
+ struct timeval pipe_ctime; /* time of status change */
+ int pipe_pgid;
+ struct pipe *pipe_peer; /* link with other direction */
+ u_int pipe_state; /* pipe status info */
+ int pipe_busy; /* busy flag, mostly to handle rundown sanely */
+};
+
+int pipe_stat __P((struct pipe *pipe, struct stat *ub));
+
+#endif
OpenPOWER on IntegriCloud