summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorrgrimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
committerrgrimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
commit8fb65ce818b3e3c6f165b583b910af24000768a5 (patch)
treeba751e4f2166aefec707c9d7401c7ff432506642 /sys/kern
parenta6ce65d368e623088a4c1a29865889f431b15420 (diff)
downloadFreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.zip
FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.tar.gz
BSD 4.4 Lite Kernel Sources
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/Make.tags.inc18
-rw-r--r--sys/kern/Makefile50
-rw-r--r--sys/kern/init_main.c394
-rw-r--r--sys/kern/init_sysent.c480
-rw-r--r--sys/kern/kern_acct.c116
-rw-r--r--sys/kern/kern_clock.c528
-rw-r--r--sys/kern/kern_descrip.c914
-rw-r--r--sys/kern/kern_exec.c64
-rw-r--r--sys/kern/kern_exit.c492
-rw-r--r--sys/kern/kern_fork.c315
-rw-r--r--sys/kern/kern_ktrace.c466
-rw-r--r--sys/kern/kern_malloc.c381
-rw-r--r--sys/kern/kern_physio.c93
-rw-r--r--sys/kern/kern_proc.c401
-rw-r--r--sys/kern/kern_prot.c566
-rw-r--r--sys/kern/kern_resource.c476
-rw-r--r--sys/kern/kern_sig.c1197
-rw-r--r--sys/kern/kern_subr.c210
-rw-r--r--sys/kern/kern_synch.c666
-rw-r--r--sys/kern/kern_sysctl.c787
-rw-r--r--sys/kern/kern_time.c416
-rw-r--r--sys/kern/kern_xxx.c138
-rw-r--r--sys/kern/makesyscalls.sh171
-rw-r--r--sys/kern/subr_autoconf.c342
-rw-r--r--sys/kern/subr_log.c232
-rw-r--r--sys/kern/subr_prf.c601
-rw-r--r--sys/kern/subr_prof.c256
-rw-r--r--sys/kern/subr_rmap.c81
-rw-r--r--sys/kern/subr_xxx.c100
-rw-r--r--sys/kern/sys_generic.c683
-rw-r--r--sys/kern/sys_process.c74
-rw-r--r--sys/kern/sys_socket.c197
-rw-r--r--sys/kern/syscalls.c251
-rw-r--r--sys/kern/syscalls.master276
-rw-r--r--sys/kern/tty.c1923
-rw-r--r--sys/kern/tty_compat.c411
-rw-r--r--sys/kern/tty_conf.c126
-rw-r--r--sys/kern/tty_pty.c691
-rw-r--r--sys/kern/tty_subr.c159
-rw-r--r--sys/kern/tty_tb.c366
-rw-r--r--sys/kern/tty_tty.c147
-rw-r--r--sys/kern/uipc_domain.c220
-rw-r--r--sys/kern/uipc_mbuf.c655
-rw-r--r--sys/kern/uipc_proto.c72
-rw-r--r--sys/kern/uipc_socket.c1024
-rw-r--r--sys/kern/uipc_socket2.c755
-rw-r--r--sys/kern/uipc_syscalls.c1217
-rw-r--r--sys/kern/uipc_usrreq.c823
-rw-r--r--sys/kern/vfs_bio.c339
-rw-r--r--sys/kern/vfs_cache.c328
-rw-r--r--sys/kern/vfs_cluster.c746
-rw-r--r--sys/kern/vfs_conf.c260
-rw-r--r--sys/kern/vfs_init.c246
-rw-r--r--sys/kern/vfs_lookup.c506
-rw-r--r--sys/kern/vfs_subr.c1322
-rw-r--r--sys/kern/vfs_syscalls.c2107
-rw-r--r--sys/kern/vfs_vnops.c422
-rw-r--r--sys/kern/vnode_if.sh433
-rw-r--r--sys/kern/vnode_if.src296
59 files changed, 28026 insertions, 0 deletions
diff --git a/sys/kern/Make.tags.inc b/sys/kern/Make.tags.inc
new file mode 100644
index 0000000..1563c41
--- /dev/null
+++ b/sys/kern/Make.tags.inc
@@ -0,0 +1,18 @@
+# @(#)Make.tags.inc 8.1 (Berkeley) 6/11/93
+
+# Common files for "make tags".
+# Included by the Makefile for each architecture.
+
+# Put the ../sys stuff near the end so that subroutine definitions win when
+# there is a struct tag with the same name (eg., vmmeter). The real
+# solution would probably be for ctags to generate "struct vmmeter" tags.
+
+COMM= /sys/conf/*.[ch] \
+ /sys/dev/*.[ch] /sys/dev/scsi/*.[ch] \
+ /sys/kern/*.[ch] /sys/libkern/*.[ch] \
+ /sys/miscfs/*/*.[ch] \
+ /sys/net/*.[ch] /sys/netccitt/*.[ch] /sys/netinet/*.[ch] \
+ /sys/netiso/*.[ch] /sys/netns/*.[ch] \
+ /sys/nfs/*.[ch] /sys/sys/*.[ch] \
+ /sys/ufs/*/*.[ch] \
+ /sys/vm/*.[ch]
diff --git a/sys/kern/Makefile b/sys/kern/Makefile
new file mode 100644
index 0000000..cfe962a
--- /dev/null
+++ b/sys/kern/Makefile
@@ -0,0 +1,50 @@
+# @(#)Makefile 8.2 (Berkeley) 3/21/94
+
+# Makefile for kernel tags files, init_sysent, etc.
+
+ARCH= hp300 i386 luna68k news3400 pmax sparc tahoe vax
+
+all:
+ @echo "make tags, make links or init_sysent.c only"
+
+init_sysent.c syscalls.c ../sys/syscall.h: makesyscalls.sh syscalls.master
+ -mv -f init_sysent.c init_sysent.c.bak
+ -mv -f syscalls.c syscalls.c.bak
+ -mv -f ../sys/syscall.h ../sys/syscall.h.bak
+ sh makesyscalls.sh syscalls.master
+
+# Kernel tags:
+# Tags files are built in the top-level directory for each architecture,
+# with a makefile listing the architecture-dependent files, etc. The list
+# of common files is in ./Make.tags.inc. Links to the correct tags file
+# are placed in each source directory. We need to have links to tags files
+# from the generic directories that are relative to the machine type, even
+# via remote mounts; therefore we use symlinks to $SYSTAGS, which points at
+# ${SYSDIR}/${MACHINE}/tags.
+
+SYSTAGS=/var/db/sys_tags
+SYSDIR=/sys
+
+# Directories in which to place tags links (other than machine-dependent)
+DGEN= conf \
+ dev dev/scsi \
+ hp hp/dev hp/hpux \
+ kern libkern \
+ miscfs miscfs/deadfs miscfs/fdesc miscfs/fifofs miscfs/kernfs \
+ miscfs/lofs miscfs/nullfs miscfs/portal miscfs/procfs \
+ miscfs/specfs miscfs/umapfs miscfs/union \
+ net netccitt netinet netiso netns nfs scripts sys \
+ ufs ufs/ffs ufs/lfs ufs/mfs ufs/ufs \
+ vm
+
+tags::
+ -for i in ${ARCH}; do \
+ (cd ../$$i && make ${MFLAGS} tags); done
+
+links::
+ rm -f ${SYSTAGS}
+ ln -s ${SYSDIR}/${MACHINE}/tags ${SYSTAGS}
+ -for i in ${DGEN}; do \
+ (cd ../$$i && { rm -f tags; ln -s ${SYSTAGS} tags; }) done
+ -for i in ${ARCH}; do \
+ (cd ../$$i && make ${MFLAGS} SYSTAGS=${SYSTAGS} links); done
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
new file mode 100644
index 0000000..c649715
--- /dev/null
+++ b/sys/kern/init_main.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)init_main.c 8.9 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/exec.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/map.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/clist.h>
+#include <sys/device.h>
+#include <sys/protosw.h>
+#include <sys/reboot.h>
+#include <sys/user.h>
+
+#include <ufs/ufs/quota.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+
+#ifdef HPFPLIB
+char copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California.\nCopyright (c) 1992 Hewlett-Packard Company\nCopyright (c) 1992 Motorola Inc.\nAll rights reserved.\n\n";
+#else
+char copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California. All rights reserved.\n\n";
+#endif
+
+/* Components of the first process -- never freed. */
+struct session session0;
+struct pgrp pgrp0;
+struct proc proc0;
+struct pcred cred0;
+struct filedesc0 filedesc0;
+struct plimit limit0;
+struct vmspace vmspace0;
+struct proc *curproc = &proc0;
+struct proc *initproc, *pageproc;
+
+int cmask = CMASK;
+extern struct user *proc0paddr;
+
+struct vnode *rootvp, *swapdev_vp;
+int boothowto;
+struct timeval boottime;
+struct timeval runtime;
+
+static void start_init __P((struct proc *p, void *framep));
+
+/*
+ * System startup; initialize the world, create process 0, mount root
+ * filesystem, and fork to create init and pagedaemon. Most of the
+ * hard work is done in the lower-level initialization routines including
+ * startup(), which does memory initialization and autoconfiguration.
+ */
+main(framep)
+ void *framep;
+{
+ register struct proc *p;
+ register struct filedesc0 *fdp;
+ register struct pdevinit *pdev;
+ register int i;
+ int s, rval[2];
+ extern int (*mountroot) __P((void));
+ extern struct pdevinit pdevinit[];
+ extern void roundrobin __P((void *));
+ extern void schedcpu __P((void *));
+
+ /*
+ * Initialize the current process pointer (curproc) before
+ * any possible traps/probes to simplify trap processing.
+ */
+ p = &proc0;
+ curproc = p;
+ /*
+ * Attempt to find console and initialize
+ * in case of early panic or other messages.
+ */
+ consinit();
+ printf(copyright);
+
+ vm_mem_init();
+ kmeminit();
+ cpu_startup();
+
+ /*
+ * Create process 0 (the swapper).
+ */
+ allproc = (volatile struct proc *)p;
+ p->p_prev = (struct proc **)&allproc;
+ p->p_pgrp = &pgrp0;
+ pgrphash[0] = &pgrp0;
+ pgrp0.pg_mem = p;
+ pgrp0.pg_session = &session0;
+ session0.s_count = 1;
+ session0.s_leader = p;
+
+ p->p_flag = P_INMEM | P_SYSTEM;
+ p->p_stat = SRUN;
+ p->p_nice = NZERO;
+ bcopy("swapper", p->p_comm, sizeof ("swapper"));
+
+ /* Create credentials. */
+ cred0.p_refcnt = 1;
+ p->p_cred = &cred0;
+ p->p_ucred = crget();
+ p->p_ucred->cr_ngroups = 1; /* group 0 */
+
+ /* Create the file descriptor table. */
+ fdp = &filedesc0;
+ p->p_fd = &fdp->fd_fd;
+ fdp->fd_fd.fd_refcnt = 1;
+ fdp->fd_fd.fd_cmask = cmask;
+ fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
+ fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
+ fdp->fd_fd.fd_nfiles = NDFILE;
+
+ /* Create the limits structures. */
+ p->p_limit = &limit0;
+ for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
+ limit0.pl_rlimit[i].rlim_cur =
+ limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
+ limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
+ limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
+ i = ptoa(cnt.v_free_count);
+ limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
+ limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
+ limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
+ limit0.p_refcnt = 1;
+
+ /* Allocate a prototype map so we have something to fork. */
+ p->p_vmspace = &vmspace0;
+ vmspace0.vm_refcnt = 1;
+ pmap_pinit(&vmspace0.vm_pmap);
+ vm_map_init(&p->p_vmspace->vm_map, round_page(VM_MIN_ADDRESS),
+ trunc_page(VM_MAX_ADDRESS), TRUE);
+ vmspace0.vm_map.pmap = &vmspace0.vm_pmap;
+ p->p_addr = proc0paddr; /* XXX */
+
+ /*
+ * We continue to place resource usage info and signal
+ * actions in the user struct so they're pageable.
+ */
+ p->p_stats = &p->p_addr->u_stats;
+ p->p_sigacts = &p->p_addr->u_sigacts;
+
+ /*
+ * Initialize per uid information structure and charge
+ * root for one process.
+ */
+ usrinfoinit();
+ (void)chgproccnt(0, 1);
+
+ rqinit();
+
+ /* Configure virtual memory system, set vm rlimits. */
+ vm_init_limits(p);
+
+ /* Initialize the file systems. */
+ vfsinit();
+
+ /* Start real time and statistics clocks. */
+ initclocks();
+
+ /* Initialize mbuf's. */
+ mbinit();
+
+ /* Initialize clists. */
+ clist_init();
+
+#ifdef SYSVSHM
+ /* Initialize System V style shared memory. */
+ shminit();
+#endif
+
+ /* Attach pseudo-devices. */
+ for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
+ (*pdev->pdev_attach)(pdev->pdev_count);
+
+ /*
+ * Initialize protocols. Block reception of incoming packets
+ * until everything is ready.
+ */
+ s = splimp();
+ ifinit();
+ domaininit();
+ splx(s);
+
+#ifdef GPROF
+ /* Initialize kernel profiling. */
+ kmstartup();
+#endif
+
+ /* Kick off timeout driven events by calling first time. */
+ roundrobin(NULL);
+ schedcpu(NULL);
+
+ /* Mount the root file system. */
+ if ((*mountroot)())
+ panic("cannot mount root");
+
+ /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */
+ if (VFS_ROOT(mountlist.tqh_first, &rootvnode))
+ panic("cannot find root vnode");
+ fdp->fd_fd.fd_cdir = rootvnode;
+ VREF(fdp->fd_fd.fd_cdir);
+ VOP_UNLOCK(rootvnode);
+ fdp->fd_fd.fd_rdir = NULL;
+ swapinit();
+
+ /*
+ * Now can look at time, having had a chance to verify the time
+ * from the file system. Reset p->p_rtime as it may have been
+ * munched in mi_switch() after the time got set.
+ */
+ p->p_stats->p_start = runtime = mono_time = boottime = time;
+ p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
+
+ /* Initialize signal state for process 0. */
+ siginit(p);
+
+ /* Create process 1 (init(8)). */
+ if (fork(p, NULL, rval))
+ panic("fork init");
+ if (rval[1]) {
+ start_init(curproc, framep);
+ return;
+ }
+
+ /* Create process 2 (the pageout daemon). */
+ if (fork(p, NULL, rval))
+ panic("fork pager");
+ if (rval[1]) {
+ /*
+ * Now in process 2.
+ */
+ p = curproc;
+ pageproc = p;
+ p->p_flag |= P_INMEM | P_SYSTEM; /* XXX */
+ bcopy("pagedaemon", curproc->p_comm, sizeof ("pagedaemon"));
+ vm_pageout();
+ /* NOTREACHED */
+ }
+
+ /* The scheduler is an infinite loop. */
+ scheduler();
+ /* NOTREACHED */
+}
+
+/*
+ * List of paths to try when searching for "init".
+ */
+static char *initpaths[] = {
+ "/sbin/init",
+ "/sbin/oinit",
+ "/sbin/init.bak",
+ NULL,
+};
+
+/*
+ * Start the initial user process; try exec'ing each pathname in "initpaths".
+ * The program is invoked with one argument containing the boot flags.
+ */
+static void
+start_init(p, framep)
+ struct proc *p;
+ void *framep;
+{
+ vm_offset_t addr;
+ struct execve_args args;
+ int options, i, retval[2], error;
+ char **pathp, *path, *ucp, **uap, *arg0, *arg1;
+
+ initproc = p;
+
+ /*
+ * We need to set the system call frame as if we were entered through
+ * a syscall() so that when we call execve() below, it will be able
+ * to set the entry point (see setregs) when it tries to exec. The
+ * startup code in "locore.s" has allocated space for the frame and
+ * passed a pointer to that space as main's argument.
+ */
+ cpu_set_init_frame(p, framep);
+
+ /*
+ * Need just enough stack to hold the faked-up "execve()" arguments.
+ */
+ addr = trunc_page(VM_MAX_ADDRESS - PAGE_SIZE);
+ if (vm_allocate(&p->p_vmspace->vm_map, &addr, PAGE_SIZE, FALSE) != 0)
+ panic("init: couldn't allocate argument space");
+ p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
+
+ for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
+ /*
+ * Move out the boot flag argument.
+ */
+ options = 0;
+ ucp = (char *)USRSTACK;
+ (void)subyte(--ucp, 0); /* trailing zero */
+ if (boothowto & RB_SINGLE) {
+ (void)subyte(--ucp, 's');
+ options = 1;
+ }
+#ifdef notyet
+ if (boothowto & RB_FASTBOOT) {
+ (void)subyte(--ucp, 'f');
+ options = 1;
+ }
+#endif
+ if (options == 0)
+ (void)subyte(--ucp, '-');
+ (void)subyte(--ucp, '-'); /* leading hyphen */
+ arg1 = ucp;
+
+ /*
+ * Move out the file name (also arg 0).
+ */
+ for (i = strlen(path) + 1; i >= 0; i--)
+ (void)subyte(--ucp, path[i]);
+ arg0 = ucp;
+
+ /*
+ * Move out the arg pointers.
+ */
+ uap = (char **)((int)ucp & ~(NBPW-1));
+ (void)suword((caddr_t)--uap, 0); /* terminator */
+ (void)suword((caddr_t)--uap, (int)arg1);
+ (void)suword((caddr_t)--uap, (int)arg0);
+
+ /*
+ * Point at the arguments.
+ */
+ args.fname = arg0;
+ args.argp = uap;
+ args.envp = NULL;
+
+ /*
+ * Now try to exec the program. If can't for any reason
+ * other than it doesn't exist, complain.
+ */
+ if ((error = execve(p, &args, &retval)) == 0)
+ return;
+ if (error != ENOENT)
+ printf("exec %s: error %d\n", path, error);
+ }
+ printf("init: not found\n");
+ panic("no init");
+}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
new file mode 100644
index 0000000..4b25c06
--- /dev/null
+++ b/sys/kern/init_sysent.c
@@ -0,0 +1,480 @@
+/*
+ * System call switch table.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from @(#)syscalls.master 8.2 (Berkeley) 1/13/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+int nosys();
+
+int nosys();
+int exit();
+int fork();
+int read();
+int write();
+int open();
+int close();
+int wait4();
+int link();
+int unlink();
+int chdir();
+int fchdir();
+int mknod();
+int chmod();
+int chown();
+int obreak();
+int getfsstat();
+int getpid();
+int mount();
+int unmount();
+int setuid();
+int getuid();
+int geteuid();
+int ptrace();
+int recvmsg();
+int sendmsg();
+int recvfrom();
+int accept();
+int getpeername();
+int getsockname();
+int access();
+int chflags();
+int fchflags();
+int sync();
+int kill();
+int getppid();
+int dup();
+int pipe();
+int getegid();
+int profil();
+#ifdef KTRACE
+int ktrace();
+#else
+#endif
+int sigaction();
+int getgid();
+int sigprocmask();
+int getlogin();
+int setlogin();
+int acct();
+int sigpending();
+int sigaltstack();
+int ioctl();
+int reboot();
+int revoke();
+int symlink();
+int readlink();
+int execve();
+int umask();
+int chroot();
+int msync();
+int vfork();
+int sbrk();
+int sstk();
+int ovadvise();
+int munmap();
+int mprotect();
+int madvise();
+int mincore();
+int getgroups();
+int setgroups();
+int getpgrp();
+int setpgid();
+int setitimer();
+int swapon();
+int getitimer();
+int getdtablesize();
+int dup2();
+int fcntl();
+int select();
+int fsync();
+int setpriority();
+int socket();
+int connect();
+int getpriority();
+int sigreturn();
+int bind();
+int setsockopt();
+int listen();
+int sigsuspend();
+#ifdef TRACE
+int vtrace();
+#else
+#endif
+int gettimeofday();
+int getrusage();
+int getsockopt();
+#ifdef vax
+int resuba();
+#else
+#endif
+int readv();
+int writev();
+int settimeofday();
+int fchown();
+int fchmod();
+int rename();
+int flock();
+int mkfifo();
+int sendto();
+int shutdown();
+int socketpair();
+int mkdir();
+int rmdir();
+int utimes();
+int adjtime();
+int setsid();
+int quotactl();
+#ifdef NFS
+int nfssvc();
+#else
+#endif
+int statfs();
+int fstatfs();
+#ifdef NFS
+int getfh();
+#else
+#endif
+#ifdef SYSVSHM
+int shmsys();
+#else
+#endif
+int setgid();
+int setegid();
+int seteuid();
+#ifdef LFS
+int lfs_bmapv();
+int lfs_markv();
+int lfs_segclean();
+int lfs_segwait();
+#else
+#endif
+int stat();
+int fstat();
+int lstat();
+int pathconf();
+int fpathconf();
+int getrlimit();
+int setrlimit();
+int getdirentries();
+int mmap();
+int nosys();
+int lseek();
+int truncate();
+int ftruncate();
+int __sysctl();
+int mlock();
+int munlock();
+
+#ifdef COMPAT_43
+#define compat(n, name) n, __CONCAT(o,name)
+
+int ocreat();
+int olseek();
+int ostat();
+int olstat();
+#ifdef KTRACE
+#else
+#endif
+int ofstat();
+int ogetkerninfo();
+int ogetpagesize();
+int ommap();
+int owait();
+int ogethostname();
+int osethostname();
+int oaccept();
+int osend();
+int orecv();
+int osigvec();
+int osigblock();
+int osigsetmask();
+int osigstack();
+int orecvmsg();
+int osendmsg();
+#ifdef TRACE
+#else
+#endif
+#ifdef vax
+#else
+#endif
+int orecvfrom();
+int osetreuid();
+int osetregid();
+int otruncate();
+int oftruncate();
+int ogetpeername();
+int ogethostid();
+int osethostid();
+int ogetrlimit();
+int osetrlimit();
+int okillpg();
+int oquota();
+int ogetsockname();
+#ifdef NFS
+#else
+#endif
+int ogetdirentries();
+#ifdef NFS
+#else
+#endif
+#ifdef SYSVSHM
+#else
+#endif
+#ifdef LFS
+#else
+#endif
+
+#else /* COMPAT_43 */
+#define compat(n, name) 0, nosys
+#endif /* COMPAT_43 */
+
+struct sysent sysent[] = {
+ { 0, nosys }, /* 0 = syscall */
+ { 1, exit }, /* 1 = exit */
+ { 0, fork }, /* 2 = fork */
+ { 3, read }, /* 3 = read */
+ { 3, write }, /* 4 = write */
+ { 3, open }, /* 5 = open */
+ { 1, close }, /* 6 = close */
+ { 4, wait4 }, /* 7 = wait4 */
+ { compat(2,creat) }, /* 8 = old creat */
+ { 2, link }, /* 9 = link */
+ { 1, unlink }, /* 10 = unlink */
+ { 0, nosys }, /* 11 = obsolete execv */
+ { 1, chdir }, /* 12 = chdir */
+ { 1, fchdir }, /* 13 = fchdir */
+ { 3, mknod }, /* 14 = mknod */
+ { 2, chmod }, /* 15 = chmod */
+ { 3, chown }, /* 16 = chown */
+ { 1, obreak }, /* 17 = break */
+ { 3, getfsstat }, /* 18 = getfsstat */
+ { compat(3,lseek) }, /* 19 = old lseek */
+ { 0, getpid }, /* 20 = getpid */
+ { 4, mount }, /* 21 = mount */
+ { 2, unmount }, /* 22 = unmount */
+ { 1, setuid }, /* 23 = setuid */
+ { 0, getuid }, /* 24 = getuid */
+ { 0, geteuid }, /* 25 = geteuid */
+ { 4, ptrace }, /* 26 = ptrace */
+ { 3, recvmsg }, /* 27 = recvmsg */
+ { 3, sendmsg }, /* 28 = sendmsg */
+ { 6, recvfrom }, /* 29 = recvfrom */
+ { 3, accept }, /* 30 = accept */
+ { 3, getpeername }, /* 31 = getpeername */
+ { 3, getsockname }, /* 32 = getsockname */
+ { 2, access }, /* 33 = access */
+ { 2, chflags }, /* 34 = chflags */
+ { 2, fchflags }, /* 35 = fchflags */
+ { 0, sync }, /* 36 = sync */
+ { 2, kill }, /* 37 = kill */
+ { compat(2,stat) }, /* 38 = old stat */
+ { 0, getppid }, /* 39 = getppid */
+ { compat(2,lstat) }, /* 40 = old lstat */
+ { 2, dup }, /* 41 = dup */
+ { 0, pipe }, /* 42 = pipe */
+ { 0, getegid }, /* 43 = getegid */
+ { 4, profil }, /* 44 = profil */
+#ifdef KTRACE
+ { 4, ktrace }, /* 45 = ktrace */
+#else
+ { 0, nosys }, /* 45 = ktrace */
+#endif
+ { 3, sigaction }, /* 46 = sigaction */
+ { 0, getgid }, /* 47 = getgid */
+ { 2, sigprocmask }, /* 48 = sigprocmask */
+ { 2, getlogin }, /* 49 = getlogin */
+ { 1, setlogin }, /* 50 = setlogin */
+ { 1, acct }, /* 51 = acct */
+ { 0, sigpending }, /* 52 = sigpending */
+ { 2, sigaltstack }, /* 53 = sigaltstack */
+ { 3, ioctl }, /* 54 = ioctl */
+ { 1, reboot }, /* 55 = reboot */
+ { 1, revoke }, /* 56 = revoke */
+ { 2, symlink }, /* 57 = symlink */
+ { 3, readlink }, /* 58 = readlink */
+ { 3, execve }, /* 59 = execve */
+ { 1, umask }, /* 60 = umask */
+ { 1, chroot }, /* 61 = chroot */
+ { compat(2,fstat) }, /* 62 = old fstat */
+ { compat(4,getkerninfo) }, /* 63 = old getkerninfo */
+ { compat(0,getpagesize) }, /* 64 = old getpagesize */
+ { 2, msync }, /* 65 = msync */
+ { 0, vfork }, /* 66 = vfork */
+ { 0, nosys }, /* 67 = obsolete vread */
+ { 0, nosys }, /* 68 = obsolete vwrite */
+ { 1, sbrk }, /* 69 = sbrk */
+ { 1, sstk }, /* 70 = sstk */
+ { compat(7,mmap) }, /* 71 = old mmap */
+ { 1, ovadvise }, /* 72 = vadvise */
+ { 2, munmap }, /* 73 = munmap */
+ { 3, mprotect }, /* 74 = mprotect */
+ { 3, madvise }, /* 75 = madvise */
+ { 0, nosys }, /* 76 = obsolete vhangup */
+ { 0, nosys }, /* 77 = obsolete vlimit */
+ { 3, mincore }, /* 78 = mincore */
+ { 2, getgroups }, /* 79 = getgroups */
+ { 2, setgroups }, /* 80 = setgroups */
+ { 0, getpgrp }, /* 81 = getpgrp */
+ { 2, setpgid }, /* 82 = setpgid */
+ { 3, setitimer }, /* 83 = setitimer */
+ { compat(0,wait) }, /* 84 = old wait */
+ { 1, swapon }, /* 85 = swapon */
+ { 2, getitimer }, /* 86 = getitimer */
+ { compat(2,gethostname) }, /* 87 = old gethostname */
+ { compat(2,sethostname) }, /* 88 = old sethostname */
+ { 0, getdtablesize }, /* 89 = getdtablesize */
+ { 2, dup2 }, /* 90 = dup2 */
+ { 0, nosys }, /* 91 = getdopt */
+ { 3, fcntl }, /* 92 = fcntl */
+ { 5, select }, /* 93 = select */
+ { 0, nosys }, /* 94 = setdopt */
+ { 1, fsync }, /* 95 = fsync */
+ { 3, setpriority }, /* 96 = setpriority */
+ { 3, socket }, /* 97 = socket */
+ { 3, connect }, /* 98 = connect */
+ { compat(3,accept) }, /* 99 = old accept */
+ { 2, getpriority }, /* 100 = getpriority */
+ { compat(4,send) }, /* 101 = old send */
+ { compat(4,recv) }, /* 102 = old recv */
+ { 1, sigreturn }, /* 103 = sigreturn */
+ { 3, bind }, /* 104 = bind */
+ { 5, setsockopt }, /* 105 = setsockopt */
+ { 2, listen }, /* 106 = listen */
+ { 0, nosys }, /* 107 = obsolete vtimes */
+ { compat(3,sigvec) }, /* 108 = old sigvec */
+ { compat(1,sigblock) }, /* 109 = old sigblock */
+ { compat(1,sigsetmask) }, /* 110 = old sigsetmask */
+ { 1, sigsuspend }, /* 111 = sigsuspend */
+ { compat(2,sigstack) }, /* 112 = old sigstack */
+ { compat(3,recvmsg) }, /* 113 = old recvmsg */
+ { compat(3,sendmsg) }, /* 114 = old sendmsg */
+#ifdef TRACE
+ { 2, vtrace }, /* 115 = vtrace */
+#else
+ { 0, nosys }, /* 115 = obsolete vtrace */
+#endif
+ { 2, gettimeofday }, /* 116 = gettimeofday */
+ { 2, getrusage }, /* 117 = getrusage */
+ { 5, getsockopt }, /* 118 = getsockopt */
+#ifdef vax
+ { 1, resuba }, /* 119 = resuba */
+#else
+ { 0, nosys }, /* 119 = nosys */
+#endif
+ { 3, readv }, /* 120 = readv */
+ { 3, writev }, /* 121 = writev */
+ { 2, settimeofday }, /* 122 = settimeofday */
+ { 3, fchown }, /* 123 = fchown */
+ { 2, fchmod }, /* 124 = fchmod */
+ { compat(6,recvfrom) }, /* 125 = old recvfrom */
+ { compat(2,setreuid) }, /* 126 = old setreuid */
+ { compat(2,setregid) }, /* 127 = old setregid */
+ { 2, rename }, /* 128 = rename */
+ { compat(2,truncate) }, /* 129 = old truncate */
+ { compat(2,ftruncate) }, /* 130 = old ftruncate */
+ { 2, flock }, /* 131 = flock */
+ { 2, mkfifo }, /* 132 = mkfifo */
+ { 6, sendto }, /* 133 = sendto */
+ { 2, shutdown }, /* 134 = shutdown */
+ { 5, socketpair }, /* 135 = socketpair */
+ { 2, mkdir }, /* 136 = mkdir */
+ { 1, rmdir }, /* 137 = rmdir */
+ { 2, utimes }, /* 138 = utimes */
+ { 0, nosys }, /* 139 = obsolete 4.2 sigreturn */
+ { 2, adjtime }, /* 140 = adjtime */
+ { compat(3,getpeername) }, /* 141 = old getpeername */
+ { compat(0,gethostid) }, /* 142 = old gethostid */
+ { compat(1,sethostid) }, /* 143 = old sethostid */
+ { compat(2,getrlimit) }, /* 144 = old getrlimit */
+ { compat(2,setrlimit) }, /* 145 = old setrlimit */
+ { compat(2,killpg) }, /* 146 = old killpg */
+ { 0, setsid }, /* 147 = setsid */
+ { 4, quotactl }, /* 148 = quotactl */
+ { compat(4,quota) }, /* 149 = old quota */
+ { compat(3,getsockname) }, /* 150 = old getsockname */
+ { 0, nosys }, /* 151 = nosys */
+ { 0, nosys }, /* 152 = nosys */
+ { 0, nosys }, /* 153 = nosys */
+ { 0, nosys }, /* 154 = nosys */
+#ifdef NFS
+ { 2, nfssvc }, /* 155 = nfssvc */
+#else
+ { 0, nosys }, /* 155 = nosys */
+#endif
+ { compat(4,getdirentries) }, /* 156 = old getdirentries */
+ { 2, statfs }, /* 157 = statfs */
+ { 2, fstatfs }, /* 158 = fstatfs */
+ { 0, nosys }, /* 159 = nosys */
+ { 0, nosys }, /* 160 = nosys */
+#ifdef NFS
+ { 2, getfh }, /* 161 = getfh */
+#else
+ { 0, nosys }, /* 161 = nosys */
+#endif
+ { 0, nosys }, /* 162 = nosys */
+ { 0, nosys }, /* 163 = nosys */
+ { 0, nosys }, /* 164 = nosys */
+ { 0, nosys }, /* 165 = nosys */
+ { 0, nosys }, /* 166 = nosys */
+ { 0, nosys }, /* 167 = nosys */
+ { 0, nosys }, /* 168 = nosys */
+ { 0, nosys }, /* 169 = nosys */
+ { 0, nosys }, /* 170 = nosys */
+#ifdef SYSVSHM
+ { 4, shmsys }, /* 171 = shmsys */
+#else
+ { 0, nosys }, /* 171 = nosys */
+#endif
+ { 0, nosys }, /* 172 = nosys */
+ { 0, nosys }, /* 173 = nosys */
+ { 0, nosys }, /* 174 = nosys */
+ { 0, nosys }, /* 175 = nosys */
+ { 0, nosys }, /* 176 = nosys */
+ { 0, nosys }, /* 177 = nosys */
+ { 0, nosys }, /* 178 = nosys */
+ { 0, nosys }, /* 179 = nosys */
+ { 0, nosys }, /* 180 = nosys */
+ { 1, setgid }, /* 181 = setgid */
+ { 1, setegid }, /* 182 = setegid */
+ { 1, seteuid }, /* 183 = seteuid */
+#ifdef LFS
+ { 3, lfs_bmapv }, /* 184 = lfs_bmapv */
+ { 3, lfs_markv }, /* 185 = lfs_markv */
+ { 2, lfs_segclean }, /* 186 = lfs_segclean */
+ { 2, lfs_segwait }, /* 187 = lfs_segwait */
+#else
+ { 0, nosys }, /* 184 = nosys */
+ { 0, nosys }, /* 185 = nosys */
+ { 0, nosys }, /* 186 = nosys */
+ { 0, nosys }, /* 187 = nosys */
+#endif
+ { 2, stat }, /* 188 = stat */
+ { 2, fstat }, /* 189 = fstat */
+ { 2, lstat }, /* 190 = lstat */
+ { 2, pathconf }, /* 191 = pathconf */
+ { 2, fpathconf }, /* 192 = fpathconf */
+ { 0, nosys }, /* 193 = nosys */
+ { 2, getrlimit }, /* 194 = getrlimit */
+ { 2, setrlimit }, /* 195 = setrlimit */
+ { 4, getdirentries }, /* 196 = getdirentries */
+ { 8, mmap }, /* 197 = mmap */
+ { 0, nosys }, /* 198 = __syscall */
+ { 5, lseek }, /* 199 = lseek */
+ { 4, truncate }, /* 200 = truncate */
+ { 4, ftruncate }, /* 201 = ftruncate */
+ { 6, __sysctl }, /* 202 = __sysctl */
+ { 2, mlock }, /* 203 = mlock */
+ { 2, munlock }, /* 204 = munlock */
+ { 0, nosys }, /* 205 = nosys */
+ { 0, nosys }, /* 206 = nosys */
+ { 0, nosys }, /* 207 = nosys */
+ { 0, nosys }, /* 208 = nosys */
+ { 0, nosys }, /* 209 = nosys */
+ { 0, nosys }, /* 210 = nosys */
+};
+
+int nsysent = sizeof(sysent) / sizeof(sysent[0]);
diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c
new file mode 100644
index 0000000..b752279
--- /dev/null
+++ b/sys/kern/kern_acct.c
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)kern_acct.c 8.1 (Berkeley) 6/14/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+
+struct acct_args {
+ char *fname;
+};
+acct(a1, a2, a3)
+ struct proc *a1;
+ struct acct_args *a2;
+ int *a3;
+{
+ /*
+ * Body deleted.
+ */
+ return (ENOSYS);
+}
+
+acct_process(a1)
+ struct proc *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+/*
+ * Periodically check the file system to see if accounting
+ * should be turned on or off.
+ */
+
+/*
+ * Values associated with enabling and disabling accounting
+ */
+int acctsuspend = 2; /* stop accounting when < 2% free space left */
+int acctresume = 4; /* resume when free space risen to > 4% */
+int acctchkfreq = 15; /* frequency (in seconds) to check space */
+
+/*
+ * SHOULD REPLACE THIS WITH A DRIVER THAT CAN BE READ TO SIMPLIFY.
+ */
+struct vnode *acctp;
+struct vnode *savacctp;
+
+/* ARGSUSED */
+void
+acctwatch(a)
+ void *a;
+{
+ struct statfs sb;
+
+ if (savacctp) {
+ (void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0);
+ if (sb.f_bavail > acctresume * sb.f_blocks / 100) {
+ acctp = savacctp;
+ savacctp = NULL;
+ log(LOG_NOTICE, "Accounting resumed\n");
+ }
+ } else {
+ if (acctp == NULL)
+ return;
+ (void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0);
+ if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) {
+ savacctp = acctp;
+ acctp = NULL;
+ log(LOG_NOTICE, "Accounting suspended\n");
+ }
+ }
+ timeout(acctwatch, NULL, acctchkfreq * hz);
+}
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
new file mode 100644
index 0000000..f42900c
--- /dev/null
+++ b/sys/kern/kern_clock.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
new file mode 100644
index 0000000..543946d
--- /dev/null
+++ b/sys/kern/kern_descrip.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+#include <sys/unistd.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Descriptor management.
+ */
+struct file *filehead; /* head of list of open files */
+int nfiles; /* actual number of open files */
+
+/*
+ * System calls on descriptors.
+ */
+struct getdtablesize_args {
+ int dummy;
+};
+/* ARGSUSED */
+getdtablesize(p, uap, retval)
+ struct proc *p;
+ struct getdtablesize_args *uap;
+ int *retval;
+{
+
+ *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+ return (0);
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+struct dup_args {
+ u_int fd;
+};
+/* ARGSUSED */
+dup(p, uap, retval)
+ struct proc *p;
+ struct dup_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp;
+ u_int old;
+ int new, error;
+
+ old = uap->fd;
+ /*
+ * XXX Compatibility
+ */
+ if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
+
+ fdp = p->p_fd;
+ if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
+ return (EBADF);
+ if (error = fdalloc(p, 0, &new))
+ return (error);
+ return (finishdup(fdp, (int)old, new, retval));
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ */
+struct dup2_args {
+ u_int from;
+ u_int to;
+};
+/* ARGSUSED */
+dup2(p, uap, retval)
+ struct proc *p;
+ struct dup2_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register u_int old = uap->from, new = uap->to;
+ int i, error;
+
+ if (old >= fdp->fd_nfiles ||
+ fdp->fd_ofiles[old] == NULL ||
+ new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+ new >= maxfiles)
+ return (EBADF);
+ if (old == new) {
+ *retval = new;
+ return (0);
+ }
+ if (new >= fdp->fd_nfiles) {
+ if (error = fdalloc(p, new, &i))
+ return (error);
+ if (new != i)
+ panic("dup2: fdalloc");
+ } else if (fdp->fd_ofiles[new]) {
+ if (fdp->fd_ofileflags[new] & UF_MAPPED)
+ (void) munmapfd(p, new);
+ /*
+ * dup2() must succeed even if the close has an error.
+ */
+ (void) closef(fdp->fd_ofiles[new], p);
+ }
+ return (finishdup(fdp, (int)old, (int)new, retval));
+}
+
+/*
+ * The file control system call.
+ */
+struct fcntl_args {
+ int fd;
+ int cmd;
+ int arg;
+};
+/* ARGSUSED */
+fcntl(p, uap, retval)
+ struct proc *p;
+ register struct fcntl_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register char *pop;
+ struct vnode *vp;
+ int i, tmp, error, flg = F_POSIX;
+ struct flock fl;
+ u_int newmin;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ pop = &fdp->fd_ofileflags[uap->fd];
+ switch (uap->cmd) {
+
+ case F_DUPFD:
+ newmin = uap->arg;
+ if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+ newmin >= maxfiles)
+ return (EINVAL);
+ if (error = fdalloc(p, newmin, &i))
+ return (error);
+ return (finishdup(fdp, uap->fd, i, retval));
+
+ case F_GETFD:
+ *retval = *pop & 1;
+ return (0);
+
+ case F_SETFD:
+ *pop = (*pop &~ 1) | (uap->arg & 1);
+ return (0);
+
+ case F_GETFL:
+ *retval = OFLAGS(fp->f_flag);
+ return (0);
+
+ case F_SETFL:
+ fp->f_flag &= ~FCNTLFLAGS;
+ fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
+ tmp = fp->f_flag & FNONBLOCK;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+ if (error)
+ return (error);
+ tmp = fp->f_flag & FASYNC;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+ if (!error)
+ return (0);
+ fp->f_flag &= ~FNONBLOCK;
+ tmp = 0;
+ (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+ return (error);
+
+ case F_GETOWN:
+ if (fp->f_type == DTYPE_SOCKET) {
+ *retval = ((struct socket *)fp->f_data)->so_pgid;
+ return (0);
+ }
+ error = (*fp->f_ops->fo_ioctl)
+ (fp, (int)TIOCGPGRP, (caddr_t)retval, p);
+ *retval = -*retval;
+ return (error);
+
+ case F_SETOWN:
+ if (fp->f_type == DTYPE_SOCKET) {
+ ((struct socket *)fp->f_data)->so_pgid = uap->arg;
+ return (0);
+ }
+ if (uap->arg <= 0) {
+ uap->arg = -uap->arg;
+ } else {
+ struct proc *p1 = pfind(uap->arg);
+ if (p1 == 0)
+ return (ESRCH);
+ uap->arg = p1->p_pgrp->pg_id;
+ }
+ return ((*fp->f_ops->fo_ioctl)
+ (fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p));
+
+ case F_SETLKW:
+ flg |= F_WAIT;
+ /* Fall into F_SETLK */
+
+ case F_SETLK:
+ if (fp->f_type != DTYPE_VNODE)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ /* Copy in the lock structure */
+ error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+ if (error)
+ return (error);
+ if (fl.l_whence == SEEK_CUR)
+ fl.l_start += fp->f_offset;
+ switch (fl.l_type) {
+
+ case F_RDLCK:
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ p->p_flag |= P_ADVLOCK;
+ return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+ case F_WRLCK:
+ if ((fp->f_flag & FWRITE) == 0)
+ return (EBADF);
+ p->p_flag |= P_ADVLOCK;
+ return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+ case F_UNLCK:
+ return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
+ F_POSIX));
+
+ default:
+ return (EINVAL);
+ }
+
+ case F_GETLK:
+ if (fp->f_type != DTYPE_VNODE)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ /* Copy in the lock structure */
+ error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+ if (error)
+ return (error);
+ if (fl.l_whence == SEEK_CUR)
+ fl.l_start += fp->f_offset;
+ if (error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX))
+ return (error);
+ return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
+
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Common code for dup, dup2, and fcntl(F_DUPFD).
+ */
+int
+finishdup(fdp, old, new, retval)
+ register struct filedesc *fdp;
+ register int old, new, *retval;
+{
+ register struct file *fp;
+
+ fp = fdp->fd_ofiles[old];
+ fdp->fd_ofiles[new] = fp;
+ fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+ fp->f_count++;
+ if (new > fdp->fd_lastfile)
+ fdp->fd_lastfile = new;
+ *retval = new;
+ return (0);
+}
+
+/*
+ * Close a file descriptor.
+ */
+struct close_args {
+ int fd;
+};
+/* ARGSUSED */
+close(p, uap, retval)
+ struct proc *p;
+ struct close_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register int fd = uap->fd;
+ register u_char *pf;
+
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+ pf = (u_char *)&fdp->fd_ofileflags[fd];
+ if (*pf & UF_MAPPED)
+ (void) munmapfd(p, fd);
+ fdp->fd_ofiles[fd] = NULL;
+ while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+ fdp->fd_lastfile--;
+ if (fd < fdp->fd_freefile)
+ fdp->fd_freefile = fd;
+ *pf = 0;
+ return (closef(fp, p));
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Return status information about a file descriptor.
+ */
+struct ofstat_args {
+ int fd;
+ struct ostat *sb;
+};
+/* ARGSUSED */
+ofstat(p, uap, retval)
+ struct proc *p;
+ register struct ofstat_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct stat ub;
+ struct ostat oub;
+ int error;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ switch (fp->f_type) {
+
+ case DTYPE_VNODE:
+ error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &ub);
+ break;
+
+ default:
+ panic("ofstat");
+ /*NOTREACHED*/
+ }
+ cvtstat(&ub, &oub);
+ if (error == 0)
+ error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
+ return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Return status information about a file descriptor.
+ */
+struct fstat_args {
+ int fd;
+ struct stat *sb;
+};
+/* ARGSUSED */
+fstat(p, uap, retval)
+ struct proc *p;
+ register struct fstat_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct stat ub;
+ int error;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ switch (fp->f_type) {
+
+ case DTYPE_VNODE:
+ error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &ub);
+ break;
+
+ default:
+ panic("fstat");
+ /*NOTREACHED*/
+ }
+ if (error == 0)
+ error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
+ return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+struct fpathconf_args {
+ int fd;
+ int name;
+};
+/* ARGSUSED */
+fpathconf(p, uap, retval)
+ struct proc *p;
+ register struct fpathconf_args *uap;
+ int *retval;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct file *fp;
+ struct vnode *vp;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ switch (fp->f_type) {
+
+ case DTYPE_SOCKET:
+ if (uap->name != _PC_PIPE_BUF)
+ return (EINVAL);
+ *retval = PIPE_BUF;
+ return (0);
+
+ case DTYPE_VNODE:
+ vp = (struct vnode *)fp->f_data;
+ return (VOP_PATHCONF(vp, uap->name, retval));
+
+ default:
+ panic("fpathconf");
+ }
+ /*NOTREACHED*/
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int fdexpand;
+
+fdalloc(p, want, result)
+ struct proc *p;
+ int want;
+ int *result;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register int i;
+ int lim, last, nfiles;
+ struct file **newofile;
+ char *newofileflags;
+
+ /*
+ * Search for a free descriptor starting at the higher
+ * of want or fd_freefile. If that fails, consider
+ * expanding the ofile array.
+ */
+ lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+ for (;;) {
+ last = min(fdp->fd_nfiles, lim);
+ if ((i = want) < fdp->fd_freefile)
+ i = fdp->fd_freefile;
+ for (; i < last; i++) {
+ if (fdp->fd_ofiles[i] == NULL) {
+ fdp->fd_ofileflags[i] = 0;
+ if (i > fdp->fd_lastfile)
+ fdp->fd_lastfile = i;
+ if (want <= fdp->fd_freefile)
+ fdp->fd_freefile = i;
+ *result = i;
+ return (0);
+ }
+ }
+
+ /*
+ * No space in current array. Expand?
+ */
+ if (fdp->fd_nfiles >= lim)
+ return (EMFILE);
+ if (fdp->fd_nfiles < NDEXTENT)
+ nfiles = NDEXTENT;
+ else
+ nfiles = 2 * fdp->fd_nfiles;
+ MALLOC(newofile, struct file **, nfiles * OFILESIZE,
+ M_FILEDESC, M_WAITOK);
+ newofileflags = (char *) &newofile[nfiles];
+ /*
+ * Copy the existing ofile and ofileflags arrays
+ * and zero the new portion of each array.
+ */
+ bcopy(fdp->fd_ofiles, newofile,
+ (i = sizeof(struct file *) * fdp->fd_nfiles));
+ bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
+ bcopy(fdp->fd_ofileflags, newofileflags,
+ (i = sizeof(char) * fdp->fd_nfiles));
+ bzero(newofileflags + i, nfiles * sizeof(char) - i);
+ if (fdp->fd_nfiles > NDFILE)
+ FREE(fdp->fd_ofiles, M_FILEDESC);
+ fdp->fd_ofiles = newofile;
+ fdp->fd_ofileflags = newofileflags;
+ fdp->fd_nfiles = nfiles;
+ fdexpand++;
+ }
+}
+
+/*
+ * Check to see whether n user file descriptors
+ * are available to the process p.
+ */
+fdavail(p, n)
+ struct proc *p;
+ register int n;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file **fpp;
+ register int i, lim;
+
+ lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+ if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+ return (1);
+ fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+ for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++)
+ if (*fpp == NULL && --n <= 0)
+ return (1);
+ return (0);
+}
+
+/*
+ * Create a new open file structure and allocate
+ * a file decriptor for the process that refers to it.
+ */
+falloc(p, resultfp, resultfd)
+ register struct proc *p;
+ struct file **resultfp;
+ int *resultfd;
+{
+ register struct file *fp, *fq, **fpp;
+ int error, i;
+
+ if (error = fdalloc(p, 0, &i))
+ return (error);
+ if (nfiles >= maxfiles) {
+ tablefull("file");
+ return (ENFILE);
+ }
+ /*
+ * Allocate a new file descriptor.
+ * If the process has file descriptor zero open, add to the list
+ * of open files at that point, otherwise put it at the front of
+ * the list of open files.
+ */
+ nfiles++;
+ MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
+ bzero(fp, sizeof(struct file));
+ if (fq = p->p_fd->fd_ofiles[0])
+ fpp = &fq->f_filef;
+ else
+ fpp = &filehead;
+ p->p_fd->fd_ofiles[i] = fp;
+ if (fq = *fpp)
+ fq->f_fileb = &fp->f_filef;
+ fp->f_filef = fq;
+ fp->f_fileb = fpp;
+ *fpp = fp;
+ fp->f_count = 1;
+ fp->f_cred = p->p_ucred;
+ crhold(fp->f_cred);
+ if (resultfp)
+ *resultfp = fp;
+ if (resultfd)
+ *resultfd = i;
+ return (0);
+}
+
+/*
+ * Free a file descriptor.
+ */
+ffree(fp)
+ register struct file *fp;
+{
+ register struct file *fq;
+
+ if (fq = fp->f_filef)
+ fq->f_fileb = fp->f_fileb;
+ *fp->f_fileb = fq;
+ crfree(fp->f_cred);
+#ifdef DIAGNOSTIC
+ fp->f_filef = NULL;
+ fp->f_fileb = NULL;
+ fp->f_count = 0;
+#endif
+ nfiles--;
+ FREE(fp, M_FILE);
+}
+
+/*
+ * Copy a filedesc structure.
+ */
+struct filedesc *
+fdcopy(p)
+ struct proc *p;
+{
+ register struct filedesc *newfdp, *fdp = p->p_fd;
+ register struct file **fpp;
+ register int i;
+
+ MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
+ M_FILEDESC, M_WAITOK);
+ bcopy(fdp, newfdp, sizeof(struct filedesc));
+ VREF(newfdp->fd_cdir);
+ if (newfdp->fd_rdir)
+ VREF(newfdp->fd_rdir);
+ newfdp->fd_refcnt = 1;
+
+ /*
+ * If the number of open files fits in the internal arrays
+ * of the open file structure, use them, otherwise allocate
+ * additional memory for the number of descriptors currently
+ * in use.
+ */
+ if (newfdp->fd_lastfile < NDFILE) {
+ newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
+ newfdp->fd_ofileflags =
+ ((struct filedesc0 *) newfdp)->fd_dfileflags;
+ i = NDFILE;
+ } else {
+ /*
+ * Compute the smallest multiple of NDEXTENT needed
+ * for the file descriptors currently in use,
+ * allowing the table to shrink.
+ */
+ i = newfdp->fd_nfiles;
+ while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
+ i /= 2;
+ MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
+ M_FILEDESC, M_WAITOK);
+ newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
+ }
+ newfdp->fd_nfiles = i;
+ bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
+ bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
+ fpp = newfdp->fd_ofiles;
+ for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
+ if (*fpp != NULL)
+ (*fpp)->f_count++;
+ return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(p)
+ struct proc *p;
+{
+ register struct filedesc *fdp = p->p_fd;
+ struct file **fpp;
+ register int i;
+
+ if (--fdp->fd_refcnt > 0)
+ return;
+ fpp = fdp->fd_ofiles;
+ for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
+ if (*fpp)
+ (void) closef(*fpp, p);
+ if (fdp->fd_nfiles > NDFILE)
+ FREE(fdp->fd_ofiles, M_FILEDESC);
+ vrele(fdp->fd_cdir);
+ if (fdp->fd_rdir)
+ vrele(fdp->fd_rdir);
+ FREE(fdp, M_FILEDESC);
+}
+
+/*
+ * Internal form of close.
+ * Decrement reference count on file structure.
+ * Note: p may be NULL when closing a file
+ * that was being passed in a message.
+ */
+closef(fp, p)
+ register struct file *fp;
+ register struct proc *p;
+{
+ struct vnode *vp;
+ struct flock lf;
+ int error;
+
+ if (fp == NULL)
+ return (0);
+ /*
+ * POSIX record locking dictates that any close releases ALL
+ * locks owned by this process. This is handled by setting
+ * a flag in the unlock to free ONLY locks obeying POSIX
+ * semantics, and not to free BSD-style file locks.
+ * If the descriptor was in a message, POSIX-style locks
+ * aren't passed with the descriptor.
+ */
+ if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = (struct vnode *)fp->f_data;
+ (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
+ }
+ if (--fp->f_count > 0)
+ return (0);
+ if (fp->f_count < 0)
+ panic("closef: count < 0");
+ if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = (struct vnode *)fp->f_data;
+ (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+ }
+ if (fp->f_ops)
+ error = (*fp->f_ops->fo_close)(fp, p);
+ else
+ error = 0;
+ ffree(fp);
+ return (error);
+}
+
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on
+ * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+struct flock_args {
+ int fd;
+ int how;
+};
+/* ARGSUSED */
+flock(p, uap, retval)
+ struct proc *p;
+ register struct flock_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vnode *vp;
+ struct flock lf;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EOPNOTSUPP);
+ vp = (struct vnode *)fp->f_data;
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (uap->how & LOCK_UN) {
+ lf.l_type = F_UNLCK;
+ fp->f_flag &= ~FHASLOCK;
+ return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
+ }
+ if (uap->how & LOCK_EX)
+ lf.l_type = F_WRLCK;
+ else if (uap->how & LOCK_SH)
+ lf.l_type = F_RDLCK;
+ else
+ return (EBADF);
+ fp->f_flag |= FHASLOCK;
+ if (uap->how & LOCK_NB)
+ return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
+ return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
+}
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process. Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ */
+/* ARGSUSED */
+fdopen(dev, mode, type, p)
+ dev_t dev;
+ int mode, type;
+ struct proc *p;
+{
+
+ /*
+ * XXX Kludge: set curproc->p_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be released
+ * by vn_open. Open will detect this special error and take the
+ * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+ * will simply report the error.
+ */
+ p->p_dupfd = minor(dev);
+ return (ENODEV);
+}
+
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+dupfdopen(fdp, indx, dfd, mode, error)
+ register struct filedesc *fdp;
+ register int indx, dfd;
+ int mode;
+ int error;
+{
+ register struct file *wfp;
+ struct file *fp;
+
+ /*
+ * If the to-be-dup'd fd number is greater than the allowed number
+ * of file descriptors, or the fd to be dup'd has already been
+ * closed, reject. Note, check for new == old is necessary as
+ * falloc could allocate an already closed to-be-dup'd descriptor
+ * as the new descriptor.
+ */
+ fp = fdp->fd_ofiles[indx];
+ if ((u_int)dfd >= fdp->fd_nfiles ||
+ (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
+ return (EBADF);
+
+ /*
+ * There are two cases of interest here.
+ *
+ * For ENODEV simply dup (dfd) to file descriptor
+ * (indx) and return.
+ *
+ * For ENXIO steal away the file structure from (dfd) and
+ * store it in (indx). (dfd) is effectively closed by
+ * this operation.
+ *
+ * Any other error code is just returned.
+ */
+ switch (error) {
+ case ENODEV:
+ /*
+ * Check that the mode the file is being opened for is a
+ * subset of the mode of the existing descriptor.
+ */
+ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
+ return (EACCES);
+ fdp->fd_ofiles[indx] = wfp;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ wfp->f_count++;
+ if (indx > fdp->fd_lastfile)
+ fdp->fd_lastfile = indx;
+ return (0);
+
+ case ENXIO:
+ /*
+ * Steal away the file pointer from dfd, and stuff it into indx.
+ */
+ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+ fdp->fd_ofiles[dfd] = NULL;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ fdp->fd_ofileflags[dfd] = 0;
+ /*
+ * Complete the clean up of the filedesc structure by
+ * recomputing the various hints.
+ */
+ if (indx > fdp->fd_lastfile)
+ fdp->fd_lastfile = indx;
+ else
+ while (fdp->fd_lastfile > 0 &&
+ fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+ fdp->fd_lastfile--;
+ if (dfd < fdp->fd_freefile)
+ fdp->fd_freefile = dfd;
+ return (0);
+
+ default:
+ return (error);
+ }
+ /* NOTREACHED */
+}
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
new file mode 100644
index 0000000..fbb4444
--- /dev/null
+++ b/sys/kern/kern_exec.c
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+
+/*
+ * exec system call
+ */
+struct execve_args {
+ char *fname;
+ char **argp;
+ char **envp;
+};
+/* ARGSUSED */
+execve(a1, a2, a3)
+ struct proc *a1;
+ struct execve_args *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (ENOSYS);
+}
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
new file mode 100644
index 0000000..03353c7
--- /dev/null
+++ b/sys/kern/kern_exit.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <sys/ptrace.h>
+
+#include <machine/cpu.h>
+#ifdef COMPAT_43
+#include <machine/reg.h>
+#include <machine/psl.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+__dead void cpu_exit __P((struct proc *));
+__dead void exit1 __P((struct proc *, int));
+
+/*
+ * exit --
+ * Death of process.
+ */
+struct rexit_args {
+ int rval;
+};
+__dead void
+exit(p, uap, retval)
+ struct proc *p;
+ struct rexit_args *uap;
+ int *retval;
+{
+
+ exit1(p, W_EXITCODE(uap->rval, 0));
+ /* NOTREACHED */
+}
+
+/*
+ * Exit: deallocate address space and other resources, change proc state
+ * to zombie, and unlink proc from allproc and parent's lists. Save exit
+ * status and rusage for wait(). Check for child processes and orphan them.
+ */
+__dead void
+exit1(p, rv)
+ register struct proc *p;
+ int rv;
+{
+ register struct proc *q, *nq;
+ register struct proc **pp;
+ register struct vmspace *vm;
+
+ if (p->p_pid == 1)
+ panic("init died (signal %d, exit %d)",
+ WTERMSIG(rv), WEXITSTATUS(rv));
+#ifdef PGINPROF
+ vmsizmon();
+#endif
+ if (p->p_flag & P_PROFIL)
+ stopprofclock(p);
+ MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
+ M_ZOMBIE, M_WAITOK);
+ /*
+ * If parent is waiting for us to exit or exec,
+ * P_PPWAIT is set; we will wakeup the parent below.
+ */
+ p->p_flag &= ~(P_TRACED | P_PPWAIT);
+ p->p_flag |= P_WEXIT;
+ p->p_sigignore = ~0;
+ p->p_siglist = 0;
+ untimeout(realitexpire, (caddr_t)p);
+
+ /*
+ * Close open files and release open-file table.
+ * This may block!
+ */
+ fdfree(p);
+
+ /* The next two chunks should probably be moved to vmspace_exit. */
+ vm = p->p_vmspace;
+#ifdef SYSVSHM
+ if (vm->vm_shm)
+ shmexit(p);
+#endif
+ /*
+ * Release user portion of address space.
+ * This releases references to vnodes,
+ * which could cause I/O if the file has been unlinked.
+ * Need to do this early enough that we can still sleep.
+ * Can't free the entire vmspace as the kernel stack
+ * may be mapped within that space also.
+ */
+ if (vm->vm_refcnt == 1)
+ (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
+ VM_MAXUSER_ADDRESS);
+
+ if (SESS_LEADER(p)) {
+ register struct session *sp = p->p_session;
+
+ if (sp->s_ttyvp) {
+ /*
+ * Controlling process.
+ * Signal foreground pgrp,
+ * drain controlling terminal
+ * and revoke access to controlling terminal.
+ */
+ if (sp->s_ttyp->t_session == sp) {
+ if (sp->s_ttyp->t_pgrp)
+ pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
+ (void) ttywait(sp->s_ttyp);
+ /*
+ * The tty could have been revoked
+ * if we blocked.
+ */
+ if (sp->s_ttyvp)
+ vgoneall(sp->s_ttyvp);
+ }
+ if (sp->s_ttyvp)
+ vrele(sp->s_ttyvp);
+ sp->s_ttyvp = NULL;
+ /*
+ * s_ttyp is not zero'd; we use this to indicate
+ * that the session once had a controlling terminal.
+ * (for logging and informational purposes)
+ */
+ }
+ sp->s_leader = NULL;
+ }
+ fixjobc(p, p->p_pgrp, 0);
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+ (void)acct_process(p);
+#ifdef KTRACE
+ /*
+ * release trace file
+ */
+ p->p_traceflag = 0; /* don't trace the vrele() */
+ if (p->p_tracep)
+ vrele(p->p_tracep);
+#endif
+ /*
+ * Remove proc from allproc queue and pidhash chain.
+ * Place onto zombproc. Unlink from parent's child list.
+ */
+ if (*p->p_prev = p->p_next)
+ p->p_next->p_prev = p->p_prev;
+ if (p->p_next = zombproc)
+ p->p_next->p_prev = &p->p_next;
+ p->p_prev = &zombproc;
+ zombproc = p;
+ p->p_stat = SZOMB;
+
+ for (pp = &pidhash[PIDHASH(p->p_pid)]; *pp; pp = &(*pp)->p_hash)
+ if (*pp == p) {
+ *pp = p->p_hash;
+ goto done;
+ }
+ panic("exit");
+done:
+
+ if (p->p_cptr) /* only need this if any child is S_ZOMB */
+ wakeup((caddr_t) initproc);
+ for (q = p->p_cptr; q != NULL; q = nq) {
+ nq = q->p_osptr;
+ if (nq != NULL)
+ nq->p_ysptr = NULL;
+ if (initproc->p_cptr)
+ initproc->p_cptr->p_ysptr = q;
+ q->p_osptr = initproc->p_cptr;
+ q->p_ysptr = NULL;
+ initproc->p_cptr = q;
+
+ q->p_pptr = initproc;
+ /*
+ * Traced processes are killed
+ * since their existence means someone is screwing up.
+ */
+ if (q->p_flag & P_TRACED) {
+ q->p_flag &= ~P_TRACED;
+ psignal(q, SIGKILL);
+ }
+ }
+ p->p_cptr = NULL;
+
+ /*
+ * Save exit status and final rusage info, adding in child rusage
+ * info and self times.
+ */
+ p->p_xstat = rv;
+ *p->p_ru = p->p_stats->p_ru;
+ calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
+ ruadd(p->p_ru, &p->p_stats->p_cru);
+
+ /*
+ * Notify parent that we're gone.
+ */
+ psignal(p->p_pptr, SIGCHLD);
+ wakeup((caddr_t)p->p_pptr);
+#if defined(tahoe)
+ /* move this to cpu_exit */
+ p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL;
+#endif
+ /*
+ * Clear curproc after we've done all operations
+ * that could block, and before tearing down the rest
+ * of the process state that might be used from clock, etc.
+ * Also, can't clear curproc while we're still runnable,
+ * as we're not on a run queue (we are current, just not
+ * a proper proc any longer!).
+ *
+ * Other substructures are freed from wait().
+ */
+ curproc = NULL;
+ if (--p->p_limit->p_refcnt == 0)
+ FREE(p->p_limit, M_SUBPROC);
+
+ /*
+ * Finally, call machine-dependent code to release the remaining
+ * resources including address space, the kernel stack and pcb.
+ * The address space is released by "vmspace_free(p->p_vmspace)";
+ * This is machine-dependent, as we may have to change stacks
+ * or ensure that the current one isn't reallocated before we
+ * finish. cpu_exit will end with a call to cpu_swtch(), finishing
+ * our execution (pun intended).
+ */
+ cpu_exit(p);
+}
+
+struct wait_args {
+ int pid;
+ int *status;
+ int options;
+ struct rusage *rusage;
+#ifdef COMPAT_43
+ int compat; /* pseudo */
+#endif
+};
+
+#ifdef COMPAT_43
+#if defined(hp300) || defined(luna68k)
+#include <machine/frame.h>
+#define GETPS(rp) ((struct frame *)(rp))->f_sr
+#else
+#define GETPS(rp) (rp)[PS]
+#endif
+
+owait(p, uap, retval)
+ struct proc *p;
+ register struct wait_args *uap;
+ int *retval;
+{
+
+#ifdef PSL_ALLCC
+ if ((GETPS(p->p_md.md_regs) & PSL_ALLCC) != PSL_ALLCC) {
+ uap->options = 0;
+ uap->rusage = NULL;
+ } else {
+ uap->options = p->p_md.md_regs[R0];
+ uap->rusage = (struct rusage *)p->p_md.md_regs[R1];
+ }
+#else
+ uap->options = 0;
+ uap->rusage = NULL;
+#endif
+ uap->pid = WAIT_ANY;
+ uap->status = NULL;
+ uap->compat = 1;
+ return (wait1(p, uap, retval));
+}
+
+wait4(p, uap, retval)
+ struct proc *p;
+ struct wait_args *uap;
+ int *retval;
+{
+
+ uap->compat = 0;
+ return (wait1(p, uap, retval));
+}
+#else
+#define wait1 wait4
+#endif
+
+int
+wait1(q, uap, retval)
+ register struct proc *q;
+ register struct wait_args *uap;
+ int retval[];
+{
+ register int nfound;
+ register struct proc *p, *t;
+ int status, error;
+
+ if (uap->pid == 0)
+ uap->pid = -q->p_pgid;
+#ifdef notyet
+ if (uap->options &~ (WUNTRACED|WNOHANG))
+ return (EINVAL);
+#endif
+loop:
+ nfound = 0;
+ for (p = q->p_cptr; p; p = p->p_osptr) {
+ if (uap->pid != WAIT_ANY &&
+ p->p_pid != uap->pid && p->p_pgid != -uap->pid)
+ continue;
+ nfound++;
+ if (p->p_stat == SZOMB) {
+ retval[0] = p->p_pid;
+#ifdef COMPAT_43
+ if (uap->compat)
+ retval[1] = p->p_xstat;
+ else
+#endif
+ if (uap->status) {
+ status = p->p_xstat; /* convert to int */
+ if (error = copyout((caddr_t)&status,
+ (caddr_t)uap->status, sizeof(status)))
+ return (error);
+ }
+ if (uap->rusage && (error = copyout((caddr_t)p->p_ru,
+ (caddr_t)uap->rusage, sizeof (struct rusage))))
+ return (error);
+ /*
+ * If we got the child via a ptrace 'attach',
+ * we need to give it back to the old parent.
+ */
+ if (p->p_oppid && (t = pfind(p->p_oppid))) {
+ p->p_oppid = 0;
+ proc_reparent(p, t);
+ psignal(t, SIGCHLD);
+ wakeup((caddr_t)t);
+ return (0);
+ }
+ p->p_xstat = 0;
+ ruadd(&q->p_stats->p_cru, p->p_ru);
+ FREE(p->p_ru, M_ZOMBIE);
+
+ /*
+ * Decrement the count of procs running with this uid.
+ */
+ (void)chgproccnt(p->p_cred->p_ruid, -1);
+
+ /*
+ * Free up credentials.
+ */
+ if (--p->p_cred->p_refcnt == 0) {
+ crfree(p->p_cred->pc_ucred);
+ FREE(p->p_cred, M_SUBPROC);
+ }
+
+ /*
+ * Release reference to text vnode
+ */
+ if (p->p_textvp)
+ vrele(p->p_textvp);
+
+ /*
+ * Finally finished with old proc entry.
+ * Unlink it from its process group and free it.
+ */
+ leavepgrp(p);
+ if (*p->p_prev = p->p_next) /* off zombproc */
+ p->p_next->p_prev = p->p_prev;
+ if (q = p->p_ysptr)
+ q->p_osptr = p->p_osptr;
+ if (q = p->p_osptr)
+ q->p_ysptr = p->p_ysptr;
+ if ((q = p->p_pptr)->p_cptr == p)
+ q->p_cptr = p->p_osptr;
+
+ /*
+ * Give machine-dependent layer a chance
+ * to free anything that cpu_exit couldn't
+ * release while still running in process context.
+ */
+ cpu_wait(p);
+ FREE(p, M_PROC);
+ nprocs--;
+ return (0);
+ }
+ if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
+ (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
+ p->p_flag |= P_WAITED;
+ retval[0] = p->p_pid;
+#ifdef COMPAT_43
+ if (uap->compat) {
+ retval[1] = W_STOPCODE(p->p_xstat);
+ error = 0;
+ } else
+#endif
+ if (uap->status) {
+ status = W_STOPCODE(p->p_xstat);
+ error = copyout((caddr_t)&status,
+ (caddr_t)uap->status, sizeof(status));
+ } else
+ error = 0;
+ return (error);
+ }
+ }
+ if (nfound == 0)
+ return (ECHILD);
+ if (uap->options & WNOHANG) {
+ retval[0] = 0;
+ return (0);
+ }
+ if (error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0))
+ return (error);
+ goto loop;
+}
+
+/*
+ * make process 'parent' the new parent of process 'child'.
+ */
+void
+proc_reparent(child, parent)
+ register struct proc *child;
+ register struct proc *parent;
+{
+ register struct proc *o;
+ register struct proc *y;
+
+ if (child->p_pptr == parent)
+ return;
+
+ /* fix up the child linkage for the old parent */
+ o = child->p_osptr;
+ y = child->p_ysptr;
+ if (y)
+ y->p_osptr = o;
+ if (o)
+ o->p_ysptr = y;
+ if (child->p_pptr->p_cptr == child)
+ child->p_pptr->p_cptr = o;
+
+ /* fix up child linkage for new parent */
+ o = parent->p_cptr;
+ if (o)
+ o->p_ysptr = child;
+ child->p_osptr = o;
+ child->p_ysptr = NULL;
+ parent->p_cptr = child;
+ child->p_pptr = parent;
+}
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
new file mode 100644
index 0000000..8bec2fa
--- /dev/null
+++ b/sys/kern/kern_fork.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/acct.h>
+#include <sys/ktrace.h>
+
+struct fork_args {
+ int dummy;
+};
+/* ARGSUSED */
+fork(p, uap, retval)
+ struct proc *p;
+ struct fork_args *uap;
+ int retval[];
+{
+
+ return (fork1(p, 0, retval));
+}
+
+/* ARGSUSED */
+vfork(p, uap, retval)
+ struct proc *p;
+ struct fork_args *uap;
+ int retval[];
+{
+
+ return (fork1(p, 1, retval));
+}
+
+int nprocs = 1; /* process 0 */
+
+fork1(p1, isvfork, retval)
+ register struct proc *p1;
+ int isvfork, retval[];
+{
+ register struct proc *p2;
+ register uid_t uid;
+ struct proc *newproc;
+ struct proc **hash;
+ int count;
+ static int nextpid, pidchecked = 0;
+
+ /*
+ * Although process entries are dynamically created, we still keep
+ * a global limit on the maximum number we will create. Don't allow
+ * a nonprivileged user to use the last process; don't let root
+ * exceed the limit. The variable nprocs is the current number of
+ * processes, maxproc is the limit.
+ */
+ uid = p1->p_cred->p_ruid;
+ if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
+ tablefull("proc");
+ return (EAGAIN);
+ }
+ /*
+ * Increment the count of procs running with this uid. Don't allow
+ * a nonprivileged user to exceed their current limit.
+ */
+ count = chgproccnt(uid, 1);
+ if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
+ (void)chgproccnt(uid, -1);
+ return (EAGAIN);
+ }
+
+ /* Allocate new proc. */
+ MALLOC(newproc, struct proc *, sizeof(struct proc), M_PROC, M_WAITOK);
+
+ /*
+ * Find an unused process ID. We remember a range of unused IDs
+ * ready to use (from nextpid+1 through pidchecked-1).
+ */
+ nextpid++;
+retry:
+ /*
+ * If the process ID prototype has wrapped around,
+ * restart somewhat above 0, as the low-numbered procs
+ * tend to include daemons that don't exit.
+ */
+ if (nextpid >= PID_MAX) {
+ nextpid = 100;
+ pidchecked = 0;
+ }
+ if (nextpid >= pidchecked) {
+ int doingzomb = 0;
+
+ pidchecked = PID_MAX;
+ /*
+ * Scan the active and zombie procs to check whether this pid
+ * is in use. Remember the lowest pid that's greater
+ * than nextpid, so we can avoid checking for a while.
+ */
+ p2 = (struct proc *)allproc;
+again:
+ for (; p2 != NULL; p2 = p2->p_next) {
+ while (p2->p_pid == nextpid ||
+ p2->p_pgrp->pg_id == nextpid) {
+ nextpid++;
+ if (nextpid >= pidchecked)
+ goto retry;
+ }
+ if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
+ pidchecked = p2->p_pid;
+ if (p2->p_pgrp->pg_id > nextpid &&
+ pidchecked > p2->p_pgrp->pg_id)
+ pidchecked = p2->p_pgrp->pg_id;
+ }
+ if (!doingzomb) {
+ doingzomb = 1;
+ p2 = zombproc;
+ goto again;
+ }
+ }
+
+
+ /*
+ * Link onto allproc (this should probably be delayed).
+ * Heavy use of volatile here to prevent the compiler from
+ * rearranging code. Yes, it *is* terribly ugly, but at least
+ * it works.
+ */
+ nprocs++;
+ p2 = newproc;
+#define Vp2 ((volatile struct proc *)p2)
+ Vp2->p_stat = SIDL; /* protect against others */
+ Vp2->p_pid = nextpid;
+ /*
+ * This is really:
+ * p2->p_next = allproc;
+ * allproc->p_prev = &p2->p_next;
+ * p2->p_prev = &allproc;
+ * allproc = p2;
+ * The assignment via allproc is legal since it is never NULL.
+ */
+ *(volatile struct proc **)&Vp2->p_next = allproc;
+ *(volatile struct proc ***)&allproc->p_prev =
+ (volatile struct proc **)&Vp2->p_next;
+ *(volatile struct proc ***)&Vp2->p_prev = &allproc;
+ allproc = Vp2;
+#undef Vp2
+ p2->p_forw = p2->p_back = NULL; /* shouldn't be necessary */
+
+ /* Insert on the hash chain. */
+ hash = &pidhash[PIDHASH(p2->p_pid)];
+ p2->p_hash = *hash;
+ *hash = p2;
+
+ /*
+ * Make a proc table entry for the new process.
+ * Start by zeroing the section of proc that is zero-initialized,
+ * then copy the section that is copied directly from the parent.
+ */
+ bzero(&p2->p_startzero,
+ (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
+ bcopy(&p1->p_startcopy, &p2->p_startcopy,
+ (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
+
+ /*
+ * Duplicate sub-structures as needed.
+ * Increase reference counts on shared objects.
+ * The p_stats and p_sigacts substructs are set in vm_fork.
+ */
+ p2->p_flag = P_INMEM;
+ if (p1->p_flag & P_PROFIL)
+ startprofclock(p2);
+ MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred),
+ M_SUBPROC, M_WAITOK);
+ bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
+ p2->p_cred->p_refcnt = 1;
+ crhold(p1->p_ucred);
+
+ /* bump references to the text vnode (for procfs) */
+ p2->p_textvp = p1->p_textvp;
+ if (p2->p_textvp)
+ VREF(p2->p_textvp);
+
+ p2->p_fd = fdcopy(p1);
+ /*
+ * If p_limit is still copy-on-write, bump refcnt,
+ * otherwise get a copy that won't be modified.
+ * (If PL_SHAREMOD is clear, the structure is shared
+ * copy-on-write.)
+ */
+ if (p1->p_limit->p_lflags & PL_SHAREMOD)
+ p2->p_limit = limcopy(p1->p_limit);
+ else {
+ p2->p_limit = p1->p_limit;
+ p2->p_limit->p_refcnt++;
+ }
+
+ if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
+ p2->p_flag |= P_CONTROLT;
+ if (isvfork)
+ p2->p_flag |= P_PPWAIT;
+ p2->p_pgrpnxt = p1->p_pgrpnxt;
+ p1->p_pgrpnxt = p2;
+ p2->p_pptr = p1;
+ p2->p_osptr = p1->p_cptr;
+ if (p1->p_cptr)
+ p1->p_cptr->p_ysptr = p2;
+ p1->p_cptr = p2;
+#ifdef KTRACE
+ /*
+ * Copy traceflag and tracefile if enabled.
+ * If not inherited, these were zeroed above.
+ */
+ if (p1->p_traceflag&KTRFAC_INHERIT) {
+ p2->p_traceflag = p1->p_traceflag;
+ if ((p2->p_tracep = p1->p_tracep) != NULL)
+ VREF(p2->p_tracep);
+ }
+#endif
+
+ /*
+ * This begins the section where we must prevent the parent
+ * from being swapped.
+ */
+ p1->p_flag |= P_NOSWAP;
+ /*
+ * Set return values for child before vm_fork,
+ * so they can be copied to child stack.
+ * We return parent pid, and mark as child in retval[1].
+ * NOTE: the kernel stack may be at a different location in the child
+ * process, and thus addresses of automatic variables (including retval)
+ * may be invalid after vm_fork returns in the child process.
+ */
+ retval[0] = p1->p_pid;
+ retval[1] = 1;
+ if (vm_fork(p1, p2, isvfork)) {
+ /*
+ * Child process. Set start time and get to work.
+ */
+ (void) splclock();
+ p2->p_stats->p_start = time;
+ (void) spl0();
+ p2->p_acflag = AFORK;
+ return (0);
+ }
+
+ /*
+ * Make child runnable and add to run queue.
+ */
+ (void) splhigh();
+ p2->p_stat = SRUN;
+ setrunqueue(p2);
+ (void) spl0();
+
+ /*
+ * Now can be swapped.
+ */
+ p1->p_flag &= ~P_NOSWAP;
+
+ /*
+ * Preserve synchronization semantics of vfork. If waiting for
+ * child to exec or exit, set P_PPWAIT on child, and sleep on our
+ * proc (in case of exit).
+ */
+ if (isvfork)
+ while (p2->p_flag & P_PPWAIT)
+ tsleep(p1, PWAIT, "ppwait", 0);
+
+ /*
+ * Return child pid to parent process,
+ * marking us as parent via retval[1].
+ */
+ retval[0] = p2->p_pid;
+ retval[1] = 0;
+ return (0);
+}
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
new file mode 100644
index 0000000..763cfb2
--- /dev/null
+++ b/sys/kern/kern_ktrace.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93
+ */
+
+#ifdef KTRACE
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ktrace.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+
+struct ktr_header *
+ktrgetheader(type)
+ int type;
+{
+ register struct ktr_header *kth;
+ struct proc *p = curproc; /* XXX */
+
+ MALLOC(kth, struct ktr_header *, sizeof (struct ktr_header),
+ M_TEMP, M_WAITOK);
+ kth->ktr_type = type;
+ microtime(&kth->ktr_time);
+ kth->ktr_pid = p->p_pid;
+ bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN);
+ return (kth);
+}
+
+ktrsyscall(vp, code, narg, args)
+ struct vnode *vp;
+ int code, narg, args[];
+{
+ struct ktr_header *kth;
+ struct ktr_syscall *ktp;
+ register len = sizeof(struct ktr_syscall) + (narg * sizeof(int));
+ struct proc *p = curproc; /* XXX */
+ int *argp, i;
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_SYSCALL);
+ MALLOC(ktp, struct ktr_syscall *, len, M_TEMP, M_WAITOK);
+ ktp->ktr_code = code;
+ ktp->ktr_narg = narg;
+ argp = (int *)((char *)ktp + sizeof(struct ktr_syscall));
+ for (i = 0; i < narg; i++)
+ *argp++ = args[i];
+ kth->ktr_buf = (caddr_t)ktp;
+ kth->ktr_len = len;
+ ktrwrite(vp, kth);
+ FREE(ktp, M_TEMP);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrsysret(vp, code, error, retval)
+ struct vnode *vp;
+ int code, error, retval;
+{
+ struct ktr_header *kth;
+ struct ktr_sysret ktp;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_SYSRET);
+ ktp.ktr_code = code;
+ ktp.ktr_error = error;
+ ktp.ktr_retval = retval; /* what about val2 ? */
+
+ kth->ktr_buf = (caddr_t)&ktp;
+ kth->ktr_len = sizeof(struct ktr_sysret);
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrnamei(vp, path)
+ struct vnode *vp;
+ char *path;
+{
+ struct ktr_header *kth;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_NAMEI);
+ kth->ktr_len = strlen(path);
+ kth->ktr_buf = path;
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrgenio(vp, fd, rw, iov, len, error)
+ struct vnode *vp;
+ int fd;
+ enum uio_rw rw;
+ register struct iovec *iov;
+ int len, error;
+{
+ struct ktr_header *kth;
+ register struct ktr_genio *ktp;
+ register caddr_t cp;
+ register int resid = len, cnt;
+ struct proc *p = curproc; /* XXX */
+
+ if (error)
+ return;
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_GENIO);
+ MALLOC(ktp, struct ktr_genio *, sizeof(struct ktr_genio) + len,
+ M_TEMP, M_WAITOK);
+ ktp->ktr_fd = fd;
+ ktp->ktr_rw = rw;
+ cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio));
+ while (resid > 0) {
+ if ((cnt = iov->iov_len) > resid)
+ cnt = resid;
+ if (copyin(iov->iov_base, cp, (unsigned)cnt))
+ goto done;
+ cp += cnt;
+ resid -= cnt;
+ iov++;
+ }
+ kth->ktr_buf = (caddr_t)ktp;
+ kth->ktr_len = sizeof (struct ktr_genio) + len;
+
+ ktrwrite(vp, kth);
+done:
+ FREE(kth, M_TEMP);
+ FREE(ktp, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrpsig(vp, sig, action, mask, code)
+ struct vnode *vp;
+ int sig;
+ sig_t action;
+ int mask, code;
+{
+ struct ktr_header *kth;
+ struct ktr_psig kp;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_PSIG);
+ kp.signo = (char)sig;
+ kp.action = action;
+ kp.mask = mask;
+ kp.code = code;
+ kth->ktr_buf = (caddr_t)&kp;
+ kth->ktr_len = sizeof (struct ktr_psig);
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrcsw(vp, out, user)
+ struct vnode *vp;
+ int out, user;
+{
+ struct ktr_header *kth;
+ struct ktr_csw kc;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_CSW);
+ kc.out = out;
+ kc.user = user;
+ kth->ktr_buf = (caddr_t)&kc;
+ kth->ktr_len = sizeof (struct ktr_csw);
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+/* Interface and common routines */
+
+/*
+ * ktrace system call
+ */
+struct ktrace_args {
+ char *fname;
+ int ops;
+ int facs;
+ int pid;
+};
+/* ARGSUSED */
+ktrace(curp, uap, retval)
+ struct proc *curp;
+ register struct ktrace_args *uap;
+ int *retval;
+{
+ register struct vnode *vp = NULL;
+ register struct proc *p;
+ struct pgrp *pg;
+ int facs = uap->facs & ~KTRFAC_ROOT;
+ int ops = KTROP(uap->ops);
+ int descend = uap->ops & KTRFLAG_DESCEND;
+ int ret = 0;
+ int error = 0;
+ struct nameidata nd;
+
+ curp->p_traceflag |= KTRFAC_ACTIVE;
+ if (ops != KTROP_CLEAR) {
+ /*
+ * an operation which requires a file argument.
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->fname, curp);
+ if (error = vn_open(&nd, FREAD|FWRITE, 0)) {
+ curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ return (error);
+ }
+ vp = nd.ni_vp;
+ VOP_UNLOCK(vp);
+ if (vp->v_type != VREG) {
+ (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
+ curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ return (EACCES);
+ }
+ }
+ /*
+ * Clear all uses of the tracefile
+ */
+ if (ops == KTROP_CLEARFILE) {
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_tracep == vp) {
+ if (ktrcanset(curp, p)) {
+ p->p_tracep = NULL;
+ p->p_traceflag = 0;
+ (void) vn_close(vp, FREAD|FWRITE,
+ p->p_ucred, p);
+ } else
+ error = EPERM;
+ }
+ }
+ goto done;
+ }
+ /*
+ * need something to (un)trace (XXX - why is this here?)
+ */
+ if (!facs) {
+ error = EINVAL;
+ goto done;
+ }
+ /*
+ * do it
+ */
+ if (uap->pid < 0) {
+ /*
+ * by process group
+ */
+ pg = pgfind(-uap->pid);
+ if (pg == NULL) {
+ error = ESRCH;
+ goto done;
+ }
+ for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt)
+ if (descend)
+ ret |= ktrsetchildren(curp, p, ops, facs, vp);
+ else
+ ret |= ktrops(curp, p, ops, facs, vp);
+
+ } else {
+ /*
+ * by pid
+ */
+ p = pfind(uap->pid);
+ if (p == NULL) {
+ error = ESRCH;
+ goto done;
+ }
+ if (descend)
+ ret |= ktrsetchildren(curp, p, ops, facs, vp);
+ else
+ ret |= ktrops(curp, p, ops, facs, vp);
+ }
+ if (!ret)
+ error = EPERM;
+done:
+ if (vp != NULL)
+ (void) vn_close(vp, FWRITE, curp->p_ucred, curp);
+ curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ return (error);
+}
+
+int
+ktrops(curp, p, ops, facs, vp)
+ struct proc *p, *curp;
+ int ops, facs;
+ struct vnode *vp;
+{
+
+ if (!ktrcanset(curp, p))
+ return (0);
+ if (ops == KTROP_SET) {
+ if (p->p_tracep != vp) {
+ /*
+ * if trace file already in use, relinquish
+ */
+ if (p->p_tracep != NULL)
+ vrele(p->p_tracep);
+ VREF(vp);
+ p->p_tracep = vp;
+ }
+ p->p_traceflag |= facs;
+ if (curp->p_ucred->cr_uid == 0)
+ p->p_traceflag |= KTRFAC_ROOT;
+ } else {
+ /* KTROP_CLEAR */
+ if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
+ /* no more tracing */
+ p->p_traceflag = 0;
+ if (p->p_tracep != NULL) {
+ vrele(p->p_tracep);
+ p->p_tracep = NULL;
+ }
+ }
+ }
+
+ return (1);
+}
+
+ktrsetchildren(curp, top, ops, facs, vp)
+ struct proc *curp, *top;
+ int ops, facs;
+ struct vnode *vp;
+{
+ register struct proc *p;
+ register int ret = 0;
+
+ p = top;
+ for (;;) {
+ ret |= ktrops(curp, p, ops, facs, vp);
+ /*
+ * If this process has children, descend to them next,
+ * otherwise do any siblings, and if done with this level,
+ * follow back up the tree (but not past top).
+ */
+ if (p->p_cptr)
+ p = p->p_cptr;
+ else if (p == top)
+ return (ret);
+ else if (p->p_osptr)
+ p = p->p_osptr;
+ else for (;;) {
+ p = p->p_pptr;
+ if (p == top)
+ return (ret);
+ if (p->p_osptr) {
+ p = p->p_osptr;
+ break;
+ }
+ }
+ }
+ /*NOTREACHED*/
+}
+
+ktrwrite(vp, kth)
+ struct vnode *vp;
+ register struct ktr_header *kth;
+{
+ struct uio auio;
+ struct iovec aiov[2];
+ register struct proc *p = curproc; /* XXX */
+ int error;
+
+ if (vp == NULL)
+ return;
+ auio.uio_iov = &aiov[0];
+ auio.uio_offset = 0;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ aiov[0].iov_base = (caddr_t)kth;
+ aiov[0].iov_len = sizeof(struct ktr_header);
+ auio.uio_resid = sizeof(struct ktr_header);
+ auio.uio_iovcnt = 1;
+ auio.uio_procp = (struct proc *)0;
+ if (kth->ktr_len > 0) {
+ auio.uio_iovcnt++;
+ aiov[1].iov_base = kth->ktr_buf;
+ aiov[1].iov_len = kth->ktr_len;
+ auio.uio_resid += kth->ktr_len;
+ }
+ VOP_LOCK(vp);
+ error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred);
+ VOP_UNLOCK(vp);
+ if (!error)
+ return;
+ /*
+ * If error encountered, give up tracing on this vnode.
+ */
+ log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
+ error);
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_tracep == vp) {
+ p->p_tracep = NULL;
+ p->p_traceflag = 0;
+ vrele(vp);
+ }
+ }
+}
+
+/*
+ * Return true if caller has permission to set the ktracing state
+ * of target. Essentially, the target can't possess any
+ * more permissions than the caller. KTRFAC_ROOT signifies that
+ * root previously set the tracing status on the target process, and
+ * so, only root may further change it.
+ *
+ * TODO: check groups. use caller effective gid.
+ */
+ktrcanset(callp, targetp)
+ struct proc *callp, *targetp;
+{
+ register struct pcred *caller = callp->p_cred;
+ register struct pcred *target = targetp->p_cred;
+
+ if ((caller->pc_ucred->cr_uid == target->p_ruid &&
+ target->p_ruid == target->p_svuid &&
+ caller->p_rgid == target->p_rgid && /* XXX */
+ target->p_rgid == target->p_svgid &&
+ (targetp->p_traceflag & KTRFAC_ROOT) == 0) ||
+ caller->pc_ucred->cr_uid == 0)
+ return (1);
+
+ return (0);
+}
+
+#endif
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
new file mode 100644
index 0000000..c6276bc
--- /dev/null
+++ b/sys/kern/kern_malloc.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+struct kmembuckets bucket[MINBUCKET + 16];
+struct kmemstats kmemstats[M_LAST];
+struct kmemusage *kmemusage;
+char *kmembase, *kmemlimit;
+char *memname[] = INITKMEMNAMES;
+
+#ifdef DIAGNOSTIC
+/*
+ * This structure provides a set of masks to catch unaligned frees.
+ */
+long addrmask[] = { 0,
+ 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
+ 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
+ 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+ 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
+};
+
+/*
+ * The WEIRD_ADDR is used as known text to copy into free objects so
+ * that modifications after frees can be detected.
+ */
+#define WEIRD_ADDR 0xdeadbeef
+#define MAX_COPY 32
+
+/*
+ * Normally the first word of the structure is used to hold the list
+ * pointer for free objects. However, when running with diagnostics,
+ * we use the third and fourth fields, so as to catch modifications
+ * in the most commonly trashed first two words.
+ */
+struct freelist {
+ long spare0;
+ short type;
+ long spare1;
+ caddr_t next;
+};
+#else /* !DIAGNOSTIC */
+struct freelist {
+ caddr_t next;
+};
+#endif /* DIAGNOSTIC */
+
+/*
+ * Allocate a block of memory
+ */
+void *
+malloc(size, type, flags)
+ unsigned long size;
+ int type, flags;
+{
+ register struct kmembuckets *kbp;
+ register struct kmemusage *kup;
+ register struct freelist *freep;
+ long indx, npg, allocsize;
+ int s;
+ caddr_t va, cp, savedlist;
+#ifdef DIAGNOSTIC
+ long *end, *lp;
+ int copysize;
+ char *savedtype;
+#endif
+#ifdef KMEMSTATS
+ register struct kmemstats *ksp = &kmemstats[type];
+
+ if (((unsigned long)type) > M_LAST)
+ panic("malloc - bogus type");
+#endif
+ indx = BUCKETINDX(size);
+ kbp = &bucket[indx];
+ s = splimp();
+#ifdef KMEMSTATS
+ while (ksp->ks_memuse >= ksp->ks_limit) {
+ if (flags & M_NOWAIT) {
+ splx(s);
+ return ((void *) NULL);
+ }
+ if (ksp->ks_limblocks < 65535)
+ ksp->ks_limblocks++;
+ tsleep((caddr_t)ksp, PSWP+2, memname[type], 0);
+ }
+ ksp->ks_size |= 1 << indx;
+#endif
+#ifdef DIAGNOSTIC
+ copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY;
+#endif
+ if (kbp->kb_next == NULL) {
+ kbp->kb_last = NULL;
+ if (size > MAXALLOCSAVE)
+ allocsize = roundup(size, CLBYTES);
+ else
+ allocsize = 1 << indx;
+ npg = clrnd(btoc(allocsize));
+ va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg),
+ !(flags & M_NOWAIT));
+ if (va == NULL) {
+ splx(s);
+ return ((void *) NULL);
+ }
+#ifdef KMEMSTATS
+ kbp->kb_total += kbp->kb_elmpercl;
+#endif
+ kup = btokup(va);
+ kup->ku_indx = indx;
+ if (allocsize > MAXALLOCSAVE) {
+ if (npg > 65535)
+ panic("malloc: allocation too large");
+ kup->ku_pagecnt = npg;
+#ifdef KMEMSTATS
+ ksp->ks_memuse += allocsize;
+#endif
+ goto out;
+ }
+#ifdef KMEMSTATS
+ kup->ku_freecnt = kbp->kb_elmpercl;
+ kbp->kb_totalfree += kbp->kb_elmpercl;
+#endif
+ /*
+ * Just in case we blocked while allocating memory,
+ * and someone else also allocated memory for this
+ * bucket, don't assume the list is still empty.
+ */
+ savedlist = kbp->kb_next;
+ kbp->kb_next = cp = va + (npg * NBPG) - allocsize;
+ for (;;) {
+ freep = (struct freelist *)cp;
+#ifdef DIAGNOSTIC
+ /*
+ * Copy in known text to detect modification
+ * after freeing.
+ */
+ end = (long *)&cp[copysize];
+ for (lp = (long *)cp; lp < end; lp++)
+ *lp = WEIRD_ADDR;
+ freep->type = M_FREE;
+#endif /* DIAGNOSTIC */
+ if (cp <= va)
+ break;
+ cp -= allocsize;
+ freep->next = cp;
+ }
+ freep->next = savedlist;
+ if (kbp->kb_last == NULL)
+ kbp->kb_last = (caddr_t)freep;
+ }
+ va = kbp->kb_next;
+ kbp->kb_next = ((struct freelist *)va)->next;
+#ifdef DIAGNOSTIC
+ freep = (struct freelist *)va;
+ savedtype = (unsigned)freep->type < M_LAST ?
+ memname[freep->type] : "???";
+ if (kbp->kb_next &&
+ !kernacc(kbp->kb_next, sizeof(struct freelist), 0)) {
+ printf("%s of object 0x%x size %d %s %s (invalid addr 0x%x)\n",
+ "Data modified on freelist: word 2.5", va, size,
+ "previous type", savedtype, kbp->kb_next);
+ kbp->kb_next = NULL;
+ }
+#if BYTE_ORDER == BIG_ENDIAN
+ freep->type = WEIRD_ADDR >> 16;
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ freep->type = (short)WEIRD_ADDR;
+#endif
+ if (((long)(&freep->next)) & 0x2)
+ freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16));
+ else
+ freep->next = (caddr_t)WEIRD_ADDR;
+ end = (long *)&va[copysize];
+ for (lp = (long *)va; lp < end; lp++) {
+ if (*lp == WEIRD_ADDR)
+ continue;
+ printf("%s %d of object 0x%x size %d %s %s (0x%x != 0x%x)\n",
+ "Data modified on freelist: word", lp - (long *)va,
+ va, size, "previous type", savedtype, *lp, WEIRD_ADDR);
+ break;
+ }
+ freep->spare0 = 0;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+ kup = btokup(va);
+ if (kup->ku_indx != indx)
+ panic("malloc: wrong bucket");
+ if (kup->ku_freecnt == 0)
+ panic("malloc: lost data");
+ kup->ku_freecnt--;
+ kbp->kb_totalfree--;
+ ksp->ks_memuse += 1 << indx;
+out:
+ kbp->kb_calls++;
+ ksp->ks_inuse++;
+ ksp->ks_calls++;
+ if (ksp->ks_memuse > ksp->ks_maxused)
+ ksp->ks_maxused = ksp->ks_memuse;
+#else
+out:
+#endif
+ splx(s);
+ return ((void *) va);
+}
+
+/*
+ * Free a block of memory allocated by malloc.
+ */
+void
+free(addr, type)
+ void *addr;
+ int type;
+{
+ register struct kmembuckets *kbp;
+ register struct kmemusage *kup;
+ register struct freelist *freep;
+ long size;
+ int s;
+#ifdef DIAGNOSTIC
+ caddr_t cp;
+ long *end, *lp, alloc, copysize;
+#endif
+#ifdef KMEMSTATS
+ register struct kmemstats *ksp = &kmemstats[type];
+#endif
+
+ kup = btokup(addr);
+ size = 1 << kup->ku_indx;
+ kbp = &bucket[kup->ku_indx];
+ s = splimp();
+#ifdef DIAGNOSTIC
+ /*
+ * Check for returns of data that do not point to the
+ * beginning of the allocation.
+ */
+ if (size > NBPG * CLSIZE)
+ alloc = addrmask[BUCKETINDX(NBPG * CLSIZE)];
+ else
+ alloc = addrmask[kup->ku_indx];
+ if (((u_long)addr & alloc) != 0)
+ panic("free: unaligned addr 0x%x, size %d, type %s, mask %d\n",
+ addr, size, memname[type], alloc);
+#endif /* DIAGNOSTIC */
+ if (size > MAXALLOCSAVE) {
+ kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt));
+#ifdef KMEMSTATS
+ size = kup->ku_pagecnt << PGSHIFT;
+ ksp->ks_memuse -= size;
+ kup->ku_indx = 0;
+ kup->ku_pagecnt = 0;
+ if (ksp->ks_memuse + size >= ksp->ks_limit &&
+ ksp->ks_memuse < ksp->ks_limit)
+ wakeup((caddr_t)ksp);
+ ksp->ks_inuse--;
+ kbp->kb_total -= 1;
+#endif
+ splx(s);
+ return;
+ }
+ freep = (struct freelist *)addr;
+#ifdef DIAGNOSTIC
+ /*
+ * Check for multiple frees. Use a quick check to see if
+ * it looks free before laboriously searching the freelist.
+ */
+ if (freep->spare0 == WEIRD_ADDR) {
+ for (cp = kbp->kb_next; cp; cp = *(caddr_t *)cp) {
+ if (addr != cp)
+ continue;
+ printf("multiply freed item 0x%x\n", addr);
+ panic("free: duplicated free");
+ }
+ }
+ /*
+ * Copy in known text to detect modification after freeing
+ * and to make it look free. Also, save the type being freed
+ * so we can list likely culprit if modification is detected
+ * when the object is reallocated.
+ */
+ copysize = size < MAX_COPY ? size : MAX_COPY;
+ end = (long *)&((caddr_t)addr)[copysize];
+ for (lp = (long *)addr; lp < end; lp++)
+ *lp = WEIRD_ADDR;
+ freep->type = type;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+ kup->ku_freecnt++;
+ if (kup->ku_freecnt >= kbp->kb_elmpercl)
+ if (kup->ku_freecnt > kbp->kb_elmpercl)
+ panic("free: multiple frees");
+ else if (kbp->kb_totalfree > kbp->kb_highwat)
+ kbp->kb_couldfree++;
+ kbp->kb_totalfree++;
+ ksp->ks_memuse -= size;
+ if (ksp->ks_memuse + size >= ksp->ks_limit &&
+ ksp->ks_memuse < ksp->ks_limit)
+ wakeup((caddr_t)ksp);
+ ksp->ks_inuse--;
+#endif
+ if (kbp->kb_next == NULL)
+ kbp->kb_next = addr;
+ else
+ ((struct freelist *)kbp->kb_last)->next = addr;
+ freep->next = NULL;
+ kbp->kb_last = addr;
+ splx(s);
+}
+
+/*
+ * Initialize the kernel memory allocator
+ */
+kmeminit()
+{
+ register long indx;
+ int npg;
+
+#if ((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0)
+ ERROR!_kmeminit:_MAXALLOCSAVE_not_power_of_2
+#endif
+#if (MAXALLOCSAVE > MINALLOCSIZE * 32768)
+ ERROR!_kmeminit:_MAXALLOCSAVE_too_big
+#endif
+#if (MAXALLOCSAVE < CLBYTES)
+ ERROR!_kmeminit:_MAXALLOCSAVE_too_small
+#endif
+ npg = VM_KMEM_SIZE/ NBPG;
+ kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
+ (vm_size_t)(npg * sizeof(struct kmemusage)));
+ kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
+ (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * NBPG), FALSE);
+#ifdef KMEMSTATS
+ for (indx = 0; indx < MINBUCKET + 16; indx++) {
+ if (1 << indx >= CLBYTES)
+ bucket[indx].kb_elmpercl = 1;
+ else
+ bucket[indx].kb_elmpercl = CLBYTES / (1 << indx);
+ bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl;
+ }
+ for (indx = 0; indx < M_LAST; indx++)
+ kmemstats[indx].ks_limit = npg * NBPG * 6 / 10;
+#endif
+}
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
new file mode 100644
index 0000000..1eaae35
--- /dev/null
+++ b/sys/kern/kern_physio.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)kern_physio.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+
+physio(a1, a2, a3, a4, a5, a6)
+ int (*a1)();
+ struct buf *a2;
+ dev_t a3;
+ int a4;
+ u_int (*a5)();
+ struct uio *a6;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+u_int
+minphys(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+/*
+ * Do a read on a device for a user process.
+ */
+rawread(dev, uio)
+ dev_t dev;
+ struct uio *uio;
+{
+ return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
+ dev, B_READ, minphys, uio));
+}
+
+/*
+ * Do a write on a device for a user process.
+ */
+rawwrite(dev, uio)
+ dev_t dev;
+ struct uio *uio;
+{
+ return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
+ dev, B_WRITE, minphys, uio));
+}
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
new file mode 100644
index 0000000..91d9e21
--- /dev/null
+++ b/sys/kern/kern_proc.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_proc.c 8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <ufs/ufs/quota.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+/*
+ * Structure associated with user cacheing.
+ */
+struct uidinfo {
+ struct uidinfo *ui_next;
+ struct uidinfo **ui_prev;
+ uid_t ui_uid;
+ long ui_proccnt;
+} **uihashtbl;
+u_long uihash; /* size of hash table - 1 */
+#define UIHASH(uid) ((uid) & uihash)
+
+/*
+ * Allocate a hash table.
+ */
+usrinfoinit()
+{
+
+ uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
+}
+
+/*
+ * Change the count associated with number of processes
+ * a given user is using.
+ */
+int
+chgproccnt(uid, diff)
+ uid_t uid;
+ int diff;
+{
+ register struct uidinfo **uipp, *uip, *uiq;
+
+ uipp = &uihashtbl[UIHASH(uid)];
+ for (uip = *uipp; uip; uip = uip->ui_next)
+ if (uip->ui_uid == uid)
+ break;
+ if (uip) {
+ uip->ui_proccnt += diff;
+ if (uip->ui_proccnt > 0)
+ return (uip->ui_proccnt);
+ if (uip->ui_proccnt < 0)
+ panic("chgproccnt: procs < 0");
+ if (uiq = uip->ui_next)
+ uiq->ui_prev = uip->ui_prev;
+ *uip->ui_prev = uiq;
+ FREE(uip, M_PROC);
+ return (0);
+ }
+ if (diff <= 0) {
+ if (diff == 0)
+ return(0);
+ panic("chgproccnt: lost user");
+ }
+ MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
+ if (uiq = *uipp)
+ uiq->ui_prev = &uip->ui_next;
+ uip->ui_next = uiq;
+ uip->ui_prev = uipp;
+ *uipp = uip;
+ uip->ui_uid = uid;
+ uip->ui_proccnt = diff;
+ return (diff);
+}
+
+/*
+ * Is p an inferior of the current process?
+ */
+inferior(p)
+ register struct proc *p;
+{
+
+ for (; p != curproc; p = p->p_pptr)
+ if (p->p_pid == 0)
+ return (0);
+ return (1);
+}
+
+/*
+ * Locate a process by number
+ */
+struct proc *
+pfind(pid)
+ register pid_t pid;
+{
+ register struct proc *p;
+
+ for (p = pidhash[PIDHASH(pid)]; p != NULL; p = p->p_hash)
+ if (p->p_pid == pid)
+ return (p);
+ return (NULL);
+}
+
+/*
+ * Locate a process group by number
+ */
+struct pgrp *
+pgfind(pgid)
+ register pid_t pgid;
+{
+ register struct pgrp *pgrp;
+
+ for (pgrp = pgrphash[PIDHASH(pgid)];
+ pgrp != NULL; pgrp = pgrp->pg_hforw)
+ if (pgrp->pg_id == pgid)
+ return (pgrp);
+ return (NULL);
+}
+
+/*
+ * Move p to a new or existing process group (and session)
+ */
+enterpgrp(p, pgid, mksess)
+ register struct proc *p;
+ pid_t pgid;
+ int mksess;
+{
+ register struct pgrp *pgrp = pgfind(pgid);
+ register struct proc **pp;
+ int n;
+
+#ifdef DIAGNOSTIC
+ if (pgrp != NULL && mksess) /* firewalls */
+ panic("enterpgrp: setsid into non-empty pgrp");
+ if (SESS_LEADER(p))
+ panic("enterpgrp: session leader attempted setpgrp");
+#endif
+ if (pgrp == NULL) {
+ pid_t savepid = p->p_pid;
+ struct proc *np;
+ /*
+ * new process group
+ */
+#ifdef DIAGNOSTIC
+ if (p->p_pid != pgid)
+ panic("enterpgrp: new pgrp and pid != pgid");
+#endif
+ MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP,
+ M_WAITOK);
+ if ((np = pfind(savepid)) == NULL || np != p)
+ return (ESRCH);
+ if (mksess) {
+ register struct session *sess;
+
+ /*
+ * new session
+ */
+ MALLOC(sess, struct session *, sizeof(struct session),
+ M_SESSION, M_WAITOK);
+ sess->s_leader = p;
+ sess->s_count = 1;
+ sess->s_ttyvp = NULL;
+ sess->s_ttyp = NULL;
+ bcopy(p->p_session->s_login, sess->s_login,
+ sizeof(sess->s_login));
+ p->p_flag &= ~P_CONTROLT;
+ pgrp->pg_session = sess;
+#ifdef DIAGNOSTIC
+ if (p != curproc)
+ panic("enterpgrp: mksession and p != curproc");
+#endif
+ } else {
+ pgrp->pg_session = p->p_session;
+ pgrp->pg_session->s_count++;
+ }
+ pgrp->pg_id = pgid;
+ pgrp->pg_hforw = pgrphash[n = PIDHASH(pgid)];
+ pgrphash[n] = pgrp;
+ pgrp->pg_jobc = 0;
+ pgrp->pg_mem = NULL;
+ } else if (pgrp == p->p_pgrp)
+ return (0);
+
+ /*
+ * Adjust eligibility of affected pgrps to participate in job control.
+ * Increment eligibility counts before decrementing, otherwise we
+ * could reach 0 spuriously during the first call.
+ */
+ fixjobc(p, pgrp, 1);
+ fixjobc(p, p->p_pgrp, 0);
+
+ /*
+ * unlink p from old process group
+ */
+ for (pp = &p->p_pgrp->pg_mem; *pp; pp = &(*pp)->p_pgrpnxt) {
+ if (*pp == p) {
+ *pp = p->p_pgrpnxt;
+ break;
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (pp == NULL)
+ panic("enterpgrp: can't find p on old pgrp");
+#endif
+ /*
+ * delete old if empty
+ */
+ if (p->p_pgrp->pg_mem == 0)
+ pgdelete(p->p_pgrp);
+ /*
+ * link into new one
+ */
+ p->p_pgrp = pgrp;
+ p->p_pgrpnxt = pgrp->pg_mem;
+ pgrp->pg_mem = p;
+ return (0);
+}
+
+/*
+ * remove process from process group
+ */
+leavepgrp(p)
+ register struct proc *p;
+{
+ register struct proc **pp = &p->p_pgrp->pg_mem;
+
+ for (; *pp; pp = &(*pp)->p_pgrpnxt) {
+ if (*pp == p) {
+ *pp = p->p_pgrpnxt;
+ break;
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (pp == NULL)
+ panic("leavepgrp: can't find p in pgrp");
+#endif
+ if (!p->p_pgrp->pg_mem)
+ pgdelete(p->p_pgrp);
+ p->p_pgrp = 0;
+ return (0);
+}
+
+/*
+ * delete a process group
+ */
+pgdelete(pgrp)
+ register struct pgrp *pgrp;
+{
+ register struct pgrp **pgp = &pgrphash[PIDHASH(pgrp->pg_id)];
+
+ if (pgrp->pg_session->s_ttyp != NULL &&
+ pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
+ pgrp->pg_session->s_ttyp->t_pgrp = NULL;
+ for (; *pgp; pgp = &(*pgp)->pg_hforw) {
+ if (*pgp == pgrp) {
+ *pgp = pgrp->pg_hforw;
+ break;
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (pgp == NULL)
+ panic("pgdelete: can't find pgrp on hash chain");
+#endif
+ if (--pgrp->pg_session->s_count == 0)
+ FREE(pgrp->pg_session, M_SESSION);
+ FREE(pgrp, M_PGRP);
+}
+
+static void orphanpg();
+
+/*
+ * Adjust pgrp jobc counters when specified process changes process group.
+ * We count the number of processes in each process group that "qualify"
+ * the group for terminal job control (those with a parent in a different
+ * process group of the same session). If that count reaches zero, the
+ * process group becomes orphaned. Check both the specified process'
+ * process group and that of its children.
+ * entering == 0 => p is leaving specified group.
+ * entering == 1 => p is entering specified group.
+ */
+fixjobc(p, pgrp, entering)
+ register struct proc *p;
+ register struct pgrp *pgrp;
+ int entering;
+{
+ register struct pgrp *hispgrp;
+ register struct session *mysession = pgrp->pg_session;
+
+ /*
+ * Check p's parent to see whether p qualifies its own process
+ * group; if so, adjust count for p's process group.
+ */
+ if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
+ hispgrp->pg_session == mysession)
+ if (entering)
+ pgrp->pg_jobc++;
+ else if (--pgrp->pg_jobc == 0)
+ orphanpg(pgrp);
+
+ /*
+ * Check this process' children to see whether they qualify
+ * their process groups; if so, adjust counts for children's
+ * process groups.
+ */
+ for (p = p->p_cptr; p; p = p->p_osptr)
+ if ((hispgrp = p->p_pgrp) != pgrp &&
+ hispgrp->pg_session == mysession &&
+ p->p_stat != SZOMB)
+ if (entering)
+ hispgrp->pg_jobc++;
+ else if (--hispgrp->pg_jobc == 0)
+ orphanpg(hispgrp);
+}
+
+/*
+ * A process group has become orphaned;
+ * if there are any stopped processes in the group,
+ * hang-up all process in that group.
+ */
+static void
+orphanpg(pg)
+ struct pgrp *pg;
+{
+ register struct proc *p;
+
+ for (p = pg->pg_mem; p; p = p->p_pgrpnxt) {
+ if (p->p_stat == SSTOP) {
+ for (p = pg->pg_mem; p; p = p->p_pgrpnxt) {
+ psignal(p, SIGHUP);
+ psignal(p, SIGCONT);
+ }
+ return;
+ }
+ }
+}
+
+#ifdef debug
+/* DEBUG */
+pgrpdump()
+{
+ register struct pgrp *pgrp;
+ register struct proc *p;
+ register i;
+
+ for (i=0; i<PIDHSZ; i++) {
+ if (pgrphash[i]) {
+ printf("\tindx %d\n", i);
+ for (pgrp=pgrphash[i]; pgrp; pgrp=pgrp->pg_hforw) {
+ printf("\tpgrp %x, pgid %d, sess %x, sesscnt %d, mem %x\n",
+ pgrp, pgrp->pg_id, pgrp->pg_session,
+ pgrp->pg_session->s_count, pgrp->pg_mem);
+ for (p=pgrp->pg_mem; p; p=p->p_pgrpnxt) {
+ printf("\t\tpid %d addr %x pgrp %x\n",
+ p->p_pid, p, p->p_pgrp);
+ }
+ }
+
+ }
+ }
+}
+#endif /* debug */
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
new file mode 100644
index 0000000..ef40077
--- /dev/null
+++ b/sys/kern/kern_prot.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_prot.c 8.6 (Berkeley) 1/21/94
+ */
+
+/*
+ * System calls related to processes and protection
+ */
+
+#include <sys/param.h>
+#include <sys/acct.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/proc.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/malloc.h>
+
+struct args {
+ int dummy;
+};
+
+/* ARGSUSED */
+getpid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_pid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ retval[1] = p->p_pptr->p_pid;
+#endif
+ return (0);
+}
+
+/* ARGSUSED */
+getppid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_pptr->p_pid;
+ return (0);
+}
+
+/* Get process group ID; note that POSIX getpgrp takes no parameter */
+getpgrp(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_pgrp->pg_id;
+ return (0);
+}
+
+/* ARGSUSED */
+getuid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_cred->p_ruid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ retval[1] = p->p_ucred->cr_uid;
+#endif
+ return (0);
+}
+
+/* ARGSUSED */
+geteuid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_ucred->cr_uid;
+ return (0);
+}
+
+/* ARGSUSED */
+getgid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_cred->p_rgid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ retval[1] = p->p_ucred->cr_groups[0];
+#endif
+ return (0);
+}
+
+/*
+ * Get effective group ID. The "egid" is groups[0], and could be obtained
+ * via getgroups. This syscall exists because it is somewhat painful to do
+ * correctly in a library function.
+ */
+/* ARGSUSED */
+getegid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_ucred->cr_groups[0];
+ return (0);
+}
+
+struct getgroups_args {
+ u_int gidsetsize;
+ gid_t *gidset;
+};
+getgroups(p, uap, retval)
+ struct proc *p;
+ register struct getgroups_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register u_int ngrp;
+ int error;
+
+ if ((ngrp = uap->gidsetsize) == 0) {
+ *retval = pc->pc_ucred->cr_ngroups;
+ return (0);
+ }
+ if (ngrp < pc->pc_ucred->cr_ngroups)
+ return (EINVAL);
+ ngrp = pc->pc_ucred->cr_ngroups;
+ if (error = copyout((caddr_t)pc->pc_ucred->cr_groups,
+ (caddr_t)uap->gidset, ngrp * sizeof(gid_t)))
+ return (error);
+ *retval = ngrp;
+ return (0);
+}
+
+/* ARGSUSED */
+setsid(p, uap, retval)
+ register struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ if (p->p_pgid == p->p_pid || pgfind(p->p_pid)) {
+ return (EPERM);
+ } else {
+ (void)enterpgrp(p, p->p_pid, 1);
+ *retval = p->p_pid;
+ return (0);
+ }
+}
+
+/*
+ * set process group (setpgid/old setpgrp)
+ *
+ * caller does setpgid(targpid, targpgid)
+ *
+ * pid must be caller or child of caller (ESRCH)
+ * if a child
+ * pid must be in same session (EPERM)
+ * pid can't have done an exec (EACCES)
+ * if pgid != pid
+ * there must exist some pid in same session having pgid (EPERM)
+ * pid must not be session leader (EPERM)
+ */
+struct setpgid_args {
+ int pid; /* target process id */
+ int pgid; /* target pgrp id */
+};
+/* ARGSUSED */
+setpgid(curp, uap, retval)
+ struct proc *curp;
+ register struct setpgid_args *uap;
+ int *retval;
+{
+ register struct proc *targp; /* target process */
+ register struct pgrp *pgrp; /* target pgrp */
+
+ if (uap->pid != 0 && uap->pid != curp->p_pid) {
+ if ((targp = pfind(uap->pid)) == 0 || !inferior(targp))
+ return (ESRCH);
+ if (targp->p_session != curp->p_session)
+ return (EPERM);
+ if (targp->p_flag & P_EXEC)
+ return (EACCES);
+ } else
+ targp = curp;
+ if (SESS_LEADER(targp))
+ return (EPERM);
+ if (uap->pgid == 0)
+ uap->pgid = targp->p_pid;
+ else if (uap->pgid != targp->p_pid)
+ if ((pgrp = pgfind(uap->pgid)) == 0 ||
+ pgrp->pg_session != curp->p_session)
+ return (EPERM);
+ return (enterpgrp(targp, uap->pgid, 0));
+}
+
+struct setuid_args {
+ uid_t uid;
+};
+/* ARGSUSED */
+setuid(p, uap, retval)
+ struct proc *p;
+ struct setuid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register uid_t uid;
+ int error;
+
+ uid = uap->uid;
+ if (uid != pc->p_ruid &&
+ (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ /*
+ * Everything's okay, do it.
+ * Transfer proc count to new user.
+ * Copy credentials so other references do not see our changes.
+ */
+ (void)chgproccnt(pc->p_ruid, -1);
+ (void)chgproccnt(uid, 1);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_uid = uid;
+ pc->p_ruid = uid;
+ pc->p_svuid = uid;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct seteuid_args {
+ uid_t euid;
+};
+/* ARGSUSED */
+seteuid(p, uap, retval)
+ struct proc *p;
+ struct seteuid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register uid_t euid;
+ int error;
+
+ euid = uap->euid;
+ if (euid != pc->p_ruid && euid != pc->p_svuid &&
+ (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ /*
+ * Everything's okay, do it. Copy credentials so other references do
+ * not see our changes.
+ */
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_uid = euid;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct setgid_args {
+ gid_t gid;
+};
+/* ARGSUSED */
+setgid(p, uap, retval)
+ struct proc *p;
+ struct setgid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register gid_t gid;
+ int error;
+
+ gid = uap->gid;
+ if (gid != pc->p_rgid && (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_groups[0] = gid;
+ pc->p_rgid = gid;
+ pc->p_svgid = gid; /* ??? */
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct setegid_args {
+ gid_t egid;
+};
+/* ARGSUSED */
+setegid(p, uap, retval)
+ struct proc *p;
+ struct setegid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register gid_t egid;
+ int error;
+
+ egid = uap->egid;
+ if (egid != pc->p_rgid && egid != pc->p_svgid &&
+ (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_groups[0] = egid;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct setgroups_args {
+ u_int gidsetsize;
+ gid_t *gidset;
+};
+/* ARGSUSED */
+setgroups(p, uap, retval)
+ struct proc *p;
+ struct setgroups_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register u_int ngrp;
+ int error;
+
+ if (error = suser(pc->pc_ucred, &p->p_acflag))
+ return (error);
+ if ((ngrp = uap->gidsetsize) > NGROUPS)
+ return (EINVAL);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ if (error = copyin((caddr_t)uap->gidset,
+ (caddr_t)pc->pc_ucred->cr_groups, ngrp * sizeof(gid_t)))
+ return (error);
+ pc->pc_ucred->cr_ngroups = ngrp;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct setreuid_args {
+ int ruid;
+ int euid;
+};
+/* ARGSUSED */
+osetreuid(p, uap, retval)
+ register struct proc *p;
+ struct setreuid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ struct seteuid_args args;
+
+ /*
+ * we assume that the intent of setting ruid is to be able to get
+ * back ruid priviledge. So we make sure that we will be able to
+ * do so, but do not actually set the ruid.
+ */
+ if (uap->ruid != (uid_t)-1 && uap->ruid != pc->p_ruid &&
+ uap->ruid != pc->p_svuid)
+ return (EPERM);
+ if (uap->euid == (uid_t)-1)
+ return (0);
+ args.euid = uap->euid;
+ return (seteuid(p, &args, retval));
+}
+
+struct setregid_args {
+ int rgid;
+ int egid;
+};
+/* ARGSUSED */
+osetregid(p, uap, retval)
+ register struct proc *p;
+ struct setregid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ struct setegid_args args;
+
+ /*
+ * we assume that the intent of setting rgid is to be able to get
+ * back rgid priviledge. So we make sure that we will be able to
+ * do so, but do not actually set the rgid.
+ */
+ if (uap->rgid != (gid_t)-1 && uap->rgid != pc->p_rgid &&
+ uap->rgid != pc->p_svgid)
+ return (EPERM);
+ if (uap->egid == (gid_t)-1)
+ return (0);
+ args.egid = uap->egid;
+ return (setegid(p, &args, retval));
+}
+#endif /* defined(COMPAT_43) || defined(COMPAT_SUNOS) */
+
+/*
+ * Check if gid is a member of the group set.
+ */
+groupmember(gid, cred)
+ gid_t gid;
+ register struct ucred *cred;
+{
+ register gid_t *gp;
+ gid_t *egp;
+
+ egp = &(cred->cr_groups[cred->cr_ngroups]);
+ for (gp = cred->cr_groups; gp < egp; gp++)
+ if (*gp == gid)
+ return (1);
+ return (0);
+}
+
+/*
+ * Test whether the specified credentials imply "super-user"
+ * privilege; if so, and we have accounting info, set the flag
+ * indicating use of super-powers.
+ * Returns 0 or error.
+ */
+suser(cred, acflag)
+ struct ucred *cred;
+ short *acflag;
+{
+ if (cred->cr_uid == 0) {
+ if (acflag)
+ *acflag |= ASU;
+ return (0);
+ }
+ return (EPERM);
+}
+
+/*
+ * Allocate a zeroed cred structure.
+ */
+struct ucred *
+crget()
+{
+ register struct ucred *cr;
+
+ MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK);
+ bzero((caddr_t)cr, sizeof(*cr));
+ cr->cr_ref = 1;
+ return (cr);
+}
+
+/*
+ * Free a cred structure.
+ * Throws away space when ref count gets to 0.
+ */
+crfree(cr)
+ struct ucred *cr;
+{
+ int s;
+
+ s = splimp(); /* ??? */
+ if (--cr->cr_ref == 0)
+ FREE((caddr_t)cr, M_CRED);
+ (void) splx(s);
+}
+
+/*
+ * Copy cred structure to a new one and free the old one.
+ */
+struct ucred *
+crcopy(cr)
+ struct ucred *cr;
+{
+ struct ucred *newcr;
+
+ if (cr->cr_ref == 1)
+ return (cr);
+ newcr = crget();
+ *newcr = *cr;
+ crfree(cr);
+ newcr->cr_ref = 1;
+ return (newcr);
+}
+
+/*
+ * Dup cred struct to a new held one.
+ */
+struct ucred *
+crdup(cr)
+ struct ucred *cr;
+{
+ struct ucred *newcr;
+
+ newcr = crget();
+ *newcr = *cr;
+ newcr->cr_ref = 1;
+ return (newcr);
+}
+
+/*
+ * Get login name, if available.
+ */
+struct getlogin_args {
+ char *namebuf;
+ u_int namelen;
+};
+/* ARGSUSED */
+getlogin(p, uap, retval)
+ struct proc *p;
+ struct getlogin_args *uap;
+ int *retval;
+{
+
+ if (uap->namelen > sizeof (p->p_pgrp->pg_session->s_login))
+ uap->namelen = sizeof (p->p_pgrp->pg_session->s_login);
+ return (copyout((caddr_t) p->p_pgrp->pg_session->s_login,
+ (caddr_t) uap->namebuf, uap->namelen));
+}
+
+/*
+ * Set login name.
+ */
+struct setlogin_args {
+ char *namebuf;
+};
+/* ARGSUSED */
+setlogin(p, uap, retval)
+ struct proc *p;
+ struct setlogin_args *uap;
+ int *retval;
+{
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ error = copyinstr((caddr_t) uap->namebuf,
+ (caddr_t) p->p_pgrp->pg_session->s_login,
+ sizeof (p->p_pgrp->pg_session->s_login) - 1, (u_int *)0);
+ if (error == ENAMETOOLONG)
+ error = EINVAL;
+ return (error);
+}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
new file mode 100644
index 0000000..68e9dfb
--- /dev/null
+++ b/sys/kern/kern_resource.c
@@ -0,0 +1,476 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/resourcevar.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+/*
+ * Resource controls and accounting.
+ */
+
+struct getpriority_args {
+ int which;
+ int who;
+};
+getpriority(curp, uap, retval)
+ struct proc *curp;
+ register struct getpriority_args *uap;
+ int *retval;
+{
+ register struct proc *p;
+ register int low = PRIO_MAX + 1;
+
+ switch (uap->which) {
+
+ case PRIO_PROCESS:
+ if (uap->who == 0)
+ p = curp;
+ else
+ p = pfind(uap->who);
+ if (p == 0)
+ break;
+ low = p->p_nice;
+ break;
+
+ case PRIO_PGRP: {
+ register struct pgrp *pg;
+
+ if (uap->who == 0)
+ pg = curp->p_pgrp;
+ else if ((pg = pgfind(uap->who)) == NULL)
+ break;
+ for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+ if (p->p_nice < low)
+ low = p->p_nice;
+ }
+ break;
+ }
+
+ case PRIO_USER:
+ if (uap->who == 0)
+ uap->who = curp->p_ucred->cr_uid;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_ucred->cr_uid == uap->who &&
+ p->p_nice < low)
+ low = p->p_nice;
+ }
+ break;
+
+ default:
+ return (EINVAL);
+ }
+ if (low == PRIO_MAX + 1)
+ return (ESRCH);
+ *retval = low;
+ return (0);
+}
+
+struct setpriority_args {
+ int which;
+ int who;
+ int prio;
+};
+/* ARGSUSED */
+setpriority(curp, uap, retval)
+ struct proc *curp;
+ register struct setpriority_args *uap;
+ int *retval;
+{
+ register struct proc *p;
+ int found = 0, error = 0;
+
+ switch (uap->which) {
+
+ case PRIO_PROCESS:
+ if (uap->who == 0)
+ p = curp;
+ else
+ p = pfind(uap->who);
+ if (p == 0)
+ break;
+ error = donice(curp, p, uap->prio);
+ found++;
+ break;
+
+ case PRIO_PGRP: {
+ register struct pgrp *pg;
+
+ if (uap->who == 0)
+ pg = curp->p_pgrp;
+ else if ((pg = pgfind(uap->who)) == NULL)
+ break;
+ for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+ error = donice(curp, p, uap->prio);
+ found++;
+ }
+ break;
+ }
+
+ case PRIO_USER:
+ if (uap->who == 0)
+ uap->who = curp->p_ucred->cr_uid;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next)
+ if (p->p_ucred->cr_uid == uap->who) {
+ error = donice(curp, p, uap->prio);
+ found++;
+ }
+ break;
+
+ default:
+ return (EINVAL);
+ }
+ if (found == 0)
+ return (ESRCH);
+ return (error);
+}
+
+donice(curp, chgp, n)
+ register struct proc *curp, *chgp;
+ register int n;
+{
+ register struct pcred *pcred = curp->p_cred;
+
+ if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
+ pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
+ pcred->p_ruid != chgp->p_ucred->cr_uid)
+ return (EPERM);
+ if (n > PRIO_MAX)
+ n = PRIO_MAX;
+ if (n < PRIO_MIN)
+ n = PRIO_MIN;
+ if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag))
+ return (EACCES);
+ chgp->p_nice = n;
+ (void)resetpriority(chgp);
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct setrlimit_args {
+ u_int which;
+ struct orlimit *lim;
+};
+/* ARGSUSED */
+osetrlimit(p, uap, retval)
+ struct proc *p;
+ register struct setrlimit_args *uap;
+ int *retval;
+{
+ struct orlimit olim;
+ struct rlimit lim;
+ int error;
+
+ if (error =
+ copyin((caddr_t)uap->lim, (caddr_t)&olim, sizeof (struct orlimit)))
+ return (error);
+ lim.rlim_cur = olim.rlim_cur;
+ lim.rlim_max = olim.rlim_max;
+ return (dosetrlimit(p, uap->which, &lim));
+}
+
+struct getrlimit_args {
+ u_int which;
+ struct orlimit *rlp;
+};
+/* ARGSUSED */
+ogetrlimit(p, uap, retval)
+ struct proc *p;
+ register struct getrlimit_args *uap;
+ int *retval;
+{
+ struct orlimit olim;
+
+ if (uap->which >= RLIM_NLIMITS)
+ return (EINVAL);
+ olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
+ if (olim.rlim_cur == -1)
+ olim.rlim_cur = 0x7fffffff;
+ olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
+ if (olim.rlim_max == -1)
+ olim.rlim_max = 0x7fffffff;
+ return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct __setrlimit_args {
+ u_int which;
+ struct rlimit *lim;
+};
+/* ARGSUSED */
+setrlimit(p, uap, retval)
+ struct proc *p;
+ register struct __setrlimit_args *uap;
+ int *retval;
+{
+ struct rlimit alim;
+ int error;
+
+ if (error =
+ copyin((caddr_t)uap->lim, (caddr_t)&alim, sizeof (struct rlimit)))
+ return (error);
+ return (dosetrlimit(p, uap->which, &alim));
+}
+
+dosetrlimit(p, which, limp)
+ struct proc *p;
+ u_int which;
+ struct rlimit *limp;
+{
+ register struct rlimit *alimp;
+ extern unsigned maxdmap;
+ int error;
+
+ if (which >= RLIM_NLIMITS)
+ return (EINVAL);
+ alimp = &p->p_rlimit[which];
+ if (limp->rlim_cur > alimp->rlim_max ||
+ limp->rlim_max > alimp->rlim_max)
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ if (limp->rlim_cur > limp->rlim_max)
+ limp->rlim_cur = limp->rlim_max;
+ if (p->p_limit->p_refcnt > 1 &&
+ (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
+ p->p_limit->p_refcnt--;
+ p->p_limit = limcopy(p->p_limit);
+ alimp = &p->p_rlimit[which];
+ }
+
+ switch (which) {
+
+ case RLIMIT_DATA:
+ if (limp->rlim_cur > maxdmap)
+ limp->rlim_cur = maxdmap;
+ if (limp->rlim_max > maxdmap)
+ limp->rlim_max = maxdmap;
+ break;
+
+ case RLIMIT_STACK:
+ if (limp->rlim_cur > maxdmap)
+ limp->rlim_cur = maxdmap;
+ if (limp->rlim_max > maxdmap)
+ limp->rlim_max = maxdmap;
+ /*
+ * Stack is allocated to the max at exec time with only
+ * "rlim_cur" bytes accessible. If stack limit is going
+ * up make more accessible, if going down make inaccessible.
+ */
+ if (limp->rlim_cur != alimp->rlim_cur) {
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_prot_t prot;
+
+ if (limp->rlim_cur > alimp->rlim_cur) {
+ prot = VM_PROT_ALL;
+ size = limp->rlim_cur - alimp->rlim_cur;
+ addr = USRSTACK - limp->rlim_cur;
+ } else {
+ prot = VM_PROT_NONE;
+ size = alimp->rlim_cur - limp->rlim_cur;
+ addr = USRSTACK - alimp->rlim_cur;
+ }
+ addr = trunc_page(addr);
+ size = round_page(size);
+ (void) vm_map_protect(&p->p_vmspace->vm_map,
+ addr, addr+size, prot, FALSE);
+ }
+ break;
+
+ case RLIMIT_NOFILE:
+ if (limp->rlim_cur > maxfiles)
+ limp->rlim_cur = maxfiles;
+ if (limp->rlim_max > maxfiles)
+ limp->rlim_max = maxfiles;
+ break;
+
+ case RLIMIT_NPROC:
+ if (limp->rlim_cur > maxproc)
+ limp->rlim_cur = maxproc;
+ if (limp->rlim_max > maxproc)
+ limp->rlim_max = maxproc;
+ break;
+ }
+ *alimp = *limp;
+ return (0);
+}
+
+struct __getrlimit_args {
+ u_int which;
+ struct rlimit *rlp;
+};
+/* ARGSUSED */
+getrlimit(p, uap, retval)
+ struct proc *p;
+ register struct __getrlimit_args *uap;
+ int *retval;
+{
+
+ if (uap->which >= RLIM_NLIMITS)
+ return (EINVAL);
+ return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
+ sizeof (struct rlimit)));
+}
+
+/*
+ * Transform the running time and tick information in proc p into user,
+ * system, and interrupt time usage.
+ */
+calcru(p, up, sp, ip)
+ register struct proc *p;
+ register struct timeval *up;
+ register struct timeval *sp;
+ register struct timeval *ip;
+{
+ register u_quad_t u, st, ut, it, tot;
+ register u_long sec, usec;
+ register int s;
+ struct timeval tv;
+
+ s = splstatclock();
+ st = p->p_sticks;
+ ut = p->p_uticks;
+ it = p->p_iticks;
+ splx(s);
+
+ tot = st + ut + it;
+ if (tot == 0) {
+ up->tv_sec = up->tv_usec = 0;
+ sp->tv_sec = sp->tv_usec = 0;
+ if (ip != NULL)
+ ip->tv_sec = ip->tv_usec = 0;
+ return;
+ }
+
+ sec = p->p_rtime.tv_sec;
+ usec = p->p_rtime.tv_usec;
+ if (p == curproc) {
+ /*
+ * Adjust for the current time slice. This is actually fairly
+ * important since the error here is on the order of a time
+ * quantum, which is much greater than the sampling error.
+ */
+ microtime(&tv);
+ sec += tv.tv_sec - runtime.tv_sec;
+ usec += tv.tv_usec - runtime.tv_usec;
+ }
+ u = sec * 1000000 + usec;
+ st = (u * st) / tot;
+ sp->tv_sec = st / 1000000;
+ sp->tv_usec = st % 1000000;
+ ut = (u * ut) / tot;
+ up->tv_sec = ut / 1000000;
+ up->tv_usec = ut % 1000000;
+ if (ip != NULL) {
+ it = (u * it) / tot;
+ ip->tv_sec = it / 1000000;
+ ip->tv_usec = it % 1000000;
+ }
+}
+
+struct getrusage_args {
+ int who;
+ struct rusage *rusage;
+};
+/* ARGSUSED */
+getrusage(p, uap, retval)
+ register struct proc *p;
+ register struct getrusage_args *uap;
+ int *retval;
+{
+ register struct rusage *rup;
+
+ switch (uap->who) {
+
+ case RUSAGE_SELF:
+ rup = &p->p_stats->p_ru;
+ calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
+ break;
+
+ case RUSAGE_CHILDREN:
+ rup = &p->p_stats->p_cru;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+ return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
+ sizeof (struct rusage)));
+}
+
+ruadd(ru, ru2)
+ register struct rusage *ru, *ru2;
+{
+ register long *ip, *ip2;
+ register int i;
+
+ timevaladd(&ru->ru_utime, &ru2->ru_utime);
+ timevaladd(&ru->ru_stime, &ru2->ru_stime);
+ if (ru->ru_maxrss < ru2->ru_maxrss)
+ ru->ru_maxrss = ru2->ru_maxrss;
+ ip = &ru->ru_first; ip2 = &ru2->ru_first;
+ for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
+ *ip++ += *ip2++;
+}
+
+/*
+ * Make a copy of the plimit structure.
+ * We share these structures copy-on-write after fork,
+ * and copy when a limit is changed.
+ */
+struct plimit *
+limcopy(lim)
+ struct plimit *lim;
+{
+ register struct plimit *copy;
+
+ MALLOC(copy, struct plimit *, sizeof(struct plimit),
+ M_SUBPROC, M_WAITOK);
+ bcopy(lim->pl_rlimit, copy->pl_rlimit,
+ sizeof(struct rlimit) * RLIM_NLIMITS);
+ copy->p_lflags = 0;
+ copy->p_refcnt = 1;
+ return (copy);
+}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
new file mode 100644
index 0000000..3dcff92
--- /dev/null
+++ b/sys/kern/kern_sig.c
@@ -0,0 +1,1197 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94
+ */
+
+#define SIGPROP /* include signal properties table */
+#include <sys/param.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/file.h>
+#include <sys/kernel.h>
+#include <sys/wait.h>
+#include <sys/ktrace.h>
+#include <sys/syslog.h>
+#include <sys/stat.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+#include <sys/user.h> /* for coredump */
+
+/*
+ * Can process p, with pcred pc, send the signal signum to process q?
+ */
+#define CANSIGNAL(p, pc, q, signum) \
+ ((pc)->pc_ucred->cr_uid == 0 || \
+ (pc)->p_ruid == (q)->p_cred->p_ruid || \
+ (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \
+ (pc)->p_ruid == (q)->p_ucred->cr_uid || \
+ (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid || \
+ ((signum) == SIGCONT && (q)->p_session == (p)->p_session))
+
+struct sigaction_args {
+ int signum;
+ struct sigaction *nsa;
+ struct sigaction *osa;
+};
+/* ARGSUSED */
+sigaction(p, uap, retval)
+ struct proc *p;
+ register struct sigaction_args *uap;
+ int *retval;
+{
+ struct sigaction vec;
+ register struct sigaction *sa;
+ register struct sigacts *ps = p->p_sigacts;
+ register int signum;
+ int bit, error;
+
+ signum = uap->signum;
+ if (signum <= 0 || signum >= NSIG ||
+ signum == SIGKILL || signum == SIGSTOP)
+ return (EINVAL);
+ sa = &vec;
+ if (uap->osa) {
+ sa->sa_handler = ps->ps_sigact[signum];
+ sa->sa_mask = ps->ps_catchmask[signum];
+ bit = sigmask(signum);
+ sa->sa_flags = 0;
+ if ((ps->ps_sigonstack & bit) != 0)
+ sa->sa_flags |= SA_ONSTACK;
+ if ((ps->ps_sigintr & bit) == 0)
+ sa->sa_flags |= SA_RESTART;
+ if (p->p_flag & P_NOCLDSTOP)
+ sa->sa_flags |= SA_NOCLDSTOP;
+ if (error = copyout((caddr_t)sa, (caddr_t)uap->osa,
+ sizeof (vec)))
+ return (error);
+ }
+ if (uap->nsa) {
+ if (error = copyin((caddr_t)uap->nsa, (caddr_t)sa,
+ sizeof (vec)))
+ return (error);
+ setsigvec(p, signum, sa);
+ }
+ return (0);
+}
+
+setsigvec(p, signum, sa)
+ register struct proc *p;
+ int signum;
+ register struct sigaction *sa;
+{
+ register struct sigacts *ps = p->p_sigacts;
+ register int bit;
+
+ bit = sigmask(signum);
+ /*
+ * Change setting atomically.
+ */
+ (void) splhigh();
+ ps->ps_sigact[signum] = sa->sa_handler;
+ ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask;
+ if ((sa->sa_flags & SA_RESTART) == 0)
+ ps->ps_sigintr |= bit;
+ else
+ ps->ps_sigintr &= ~bit;
+ if (sa->sa_flags & SA_ONSTACK)
+ ps->ps_sigonstack |= bit;
+ else
+ ps->ps_sigonstack &= ~bit;
+#ifdef COMPAT_SUNOS
+ if (sa->sa_flags & SA_USERTRAMP)
+ ps->ps_usertramp |= bit;
+ else
+ ps->ps_usertramp &= ~bit;
+#endif
+ if (signum == SIGCHLD) {
+ if (sa->sa_flags & SA_NOCLDSTOP)
+ p->p_flag |= P_NOCLDSTOP;
+ else
+ p->p_flag &= ~P_NOCLDSTOP;
+ }
+ /*
+ * Set bit in p_sigignore for signals that are set to SIG_IGN,
+ * and for signals set to SIG_DFL where the default is to ignore.
+ * However, don't put SIGCONT in p_sigignore,
+ * as we have to restart the process.
+ */
+ if (sa->sa_handler == SIG_IGN ||
+ (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) {
+ p->p_siglist &= ~bit; /* never to be seen again */
+ if (signum != SIGCONT)
+ p->p_sigignore |= bit; /* easier in psignal */
+ p->p_sigcatch &= ~bit;
+ } else {
+ p->p_sigignore &= ~bit;
+ if (sa->sa_handler == SIG_DFL)
+ p->p_sigcatch &= ~bit;
+ else
+ p->p_sigcatch |= bit;
+ }
+ (void) spl0();
+}
+
+/*
+ * Initialize signal state for process 0;
+ * set to ignore signals that are ignored by default.
+ */
+void
+siginit(p)
+ struct proc *p;
+{
+ register int i;
+
+ for (i = 0; i < NSIG; i++)
+ if (sigprop[i] & SA_IGNORE && i != SIGCONT)
+ p->p_sigignore |= sigmask(i);
+}
+
+/*
+ * Reset signals for an exec of the specified process.
+ */
+void
+execsigs(p)
+ register struct proc *p;
+{
+ register struct sigacts *ps = p->p_sigacts;
+ register int nc, mask;
+
+ /*
+ * Reset caught signals. Held signals remain held
+ * through p_sigmask (unless they were caught,
+ * and are now ignored by default).
+ */
+ while (p->p_sigcatch) {
+ nc = ffs((long)p->p_sigcatch);
+ mask = sigmask(nc);
+ p->p_sigcatch &= ~mask;
+ if (sigprop[nc] & SA_IGNORE) {
+ if (nc != SIGCONT)
+ p->p_sigignore |= mask;
+ p->p_siglist &= ~mask;
+ }
+ ps->ps_sigact[nc] = SIG_DFL;
+ }
+ /*
+ * Reset stack state to the user stack.
+ * Clear set of signals caught on the signal stack.
+ */
+ ps->ps_sigstk.ss_flags = SA_DISABLE;
+ ps->ps_sigstk.ss_size = 0;
+ ps->ps_sigstk.ss_base = 0;
+ ps->ps_flags = 0;
+}
+
+/*
+ * Manipulate signal mask.
+ * Note that we receive new mask, not pointer,
+ * and return old mask as return value;
+ * the library stub does the rest.
+ */
+struct sigprocmask_args {
+ int how;
+ sigset_t mask;
+};
+sigprocmask(p, uap, retval)
+ register struct proc *p;
+ struct sigprocmask_args *uap;
+ int *retval;
+{
+ int error = 0;
+
+ *retval = p->p_sigmask;
+ (void) splhigh();
+
+ switch (uap->how) {
+ case SIG_BLOCK:
+ p->p_sigmask |= uap->mask &~ sigcantmask;
+ break;
+
+ case SIG_UNBLOCK:
+ p->p_sigmask &= ~uap->mask;
+ break;
+
+ case SIG_SETMASK:
+ p->p_sigmask = uap->mask &~ sigcantmask;
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ (void) spl0();
+ return (error);
+}
+
+struct sigpending_args {
+ int dummy;
+};
+/* ARGSUSED */
+sigpending(p, uap, retval)
+ struct proc *p;
+ struct sigpending_args *uap;
+ int *retval;
+{
+
+ *retval = p->p_siglist;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Generalized interface signal handler, 4.3-compatible.
+ */
+struct osigvec_args {
+ int signum;
+ struct sigvec *nsv;
+ struct sigvec *osv;
+};
+/* ARGSUSED */
+osigvec(p, uap, retval)
+ struct proc *p;
+ register struct osigvec_args *uap;
+ int *retval;
+{
+ struct sigvec vec;
+ register struct sigacts *ps = p->p_sigacts;
+ register struct sigvec *sv;
+ register int signum;
+ int bit, error;
+
+ signum = uap->signum;
+ if (signum <= 0 || signum >= NSIG ||
+ signum == SIGKILL || signum == SIGSTOP)
+ return (EINVAL);
+ sv = &vec;
+ if (uap->osv) {
+ *(sig_t *)&sv->sv_handler = ps->ps_sigact[signum];
+ sv->sv_mask = ps->ps_catchmask[signum];
+ bit = sigmask(signum);
+ sv->sv_flags = 0;
+ if ((ps->ps_sigonstack & bit) != 0)
+ sv->sv_flags |= SV_ONSTACK;
+ if ((ps->ps_sigintr & bit) != 0)
+ sv->sv_flags |= SV_INTERRUPT;
+#ifndef COMPAT_SUNOS
+ if (p->p_flag & P_NOCLDSTOP)
+ sv->sv_flags |= SA_NOCLDSTOP;
+#endif
+ if (error = copyout((caddr_t)sv, (caddr_t)uap->osv,
+ sizeof (vec)))
+ return (error);
+ }
+ if (uap->nsv) {
+ if (error = copyin((caddr_t)uap->nsv, (caddr_t)sv,
+ sizeof (vec)))
+ return (error);
+#ifdef COMPAT_SUNOS
+ /*
+ * SunOS uses this bit (4, aka SA_DISABLE) as SV_RESETHAND,
+ * `reset to SIG_DFL on delivery'. We have no such option
+ * now or ever!
+ */
+ if (sv->sv_flags & SA_DISABLE)
+ return (EINVAL);
+ sv->sv_flags |= SA_USERTRAMP;
+#endif
+ sv->sv_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */
+ setsigvec(p, signum, (struct sigaction *)sv);
+ }
+ return (0);
+}
+
+struct osigblock_args {
+ int mask;
+};
+osigblock(p, uap, retval)
+ register struct proc *p;
+ struct osigblock_args *uap;
+ int *retval;
+{
+
+ (void) splhigh();
+ *retval = p->p_sigmask;
+ p->p_sigmask |= uap->mask &~ sigcantmask;
+ (void) spl0();
+ return (0);
+}
+
+struct osigsetmask_args {
+ int mask;
+};
+osigsetmask(p, uap, retval)
+ struct proc *p;
+ struct osigsetmask_args *uap;
+ int *retval;
+{
+
+ (void) splhigh();
+ *retval = p->p_sigmask;
+ p->p_sigmask = uap->mask &~ sigcantmask;
+ (void) spl0();
+ return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Suspend process until signal, providing mask to be set
+ * in the meantime. Note nonstandard calling convention:
+ * libc stub passes mask, not pointer, to save a copyin.
+ */
+struct sigsuspend_args {
+ sigset_t mask;
+};
+/* ARGSUSED */
+sigsuspend(p, uap, retval)
+ register struct proc *p;
+ struct sigsuspend_args *uap;
+ int *retval;
+{
+ register struct sigacts *ps = p->p_sigacts;
+
+ /*
+ * When returning from sigpause, we want
+ * the old mask to be restored after the
+ * signal handler has finished. Thus, we
+ * save it here and mark the sigacts structure
+ * to indicate this.
+ */
+ ps->ps_oldmask = p->p_sigmask;
+ ps->ps_flags |= SAS_OLDMASK;
+ p->p_sigmask = uap->mask &~ sigcantmask;
+ while (tsleep((caddr_t) ps, PPAUSE|PCATCH, "pause", 0) == 0)
+ /* void */;
+ /* always return EINTR rather than ERESTART... */
+ return (EINTR);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct osigstack_args {
+ struct sigstack *nss;
+ struct sigstack *oss;
+};
+/* ARGSUSED */
+osigstack(p, uap, retval)
+ struct proc *p;
+ register struct osigstack_args *uap;
+ int *retval;
+{
+ struct sigstack ss;
+ struct sigacts *psp;
+ int error = 0;
+
+ psp = p->p_sigacts;
+ ss.ss_sp = psp->ps_sigstk.ss_base;
+ ss.ss_onstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
+ if (uap->oss && (error = copyout((caddr_t)&ss, (caddr_t)uap->oss,
+ sizeof (struct sigstack))))
+ return (error);
+ if (uap->nss && (error = copyin((caddr_t)uap->nss, (caddr_t)&ss,
+ sizeof (ss))) == 0) {
+ psp->ps_sigstk.ss_base = ss.ss_sp;
+ psp->ps_sigstk.ss_size = 0;
+ psp->ps_sigstk.ss_flags |= ss.ss_onstack & SA_ONSTACK;
+ psp->ps_flags |= SAS_ALTSTACK;
+ }
+ return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct sigaltstack_args {
+ struct sigaltstack *nss;
+ struct sigaltstack *oss;
+};
+/* ARGSUSED */
+sigaltstack(p, uap, retval)
+ struct proc *p;
+ register struct sigaltstack_args *uap;
+ int *retval;
+{
+ struct sigacts *psp;
+ struct sigaltstack ss;
+ int error;
+
+ psp = p->p_sigacts;
+ if ((psp->ps_flags & SAS_ALTSTACK) == 0)
+ psp->ps_sigstk.ss_flags |= SA_DISABLE;
+ if (uap->oss && (error = copyout((caddr_t)&psp->ps_sigstk,
+ (caddr_t)uap->oss, sizeof (struct sigaltstack))))
+ return (error);
+ if (uap->nss == 0)
+ return (0);
+ if (error = copyin((caddr_t)uap->nss, (caddr_t)&ss, sizeof (ss)))
+ return (error);
+ if (ss.ss_flags & SA_DISABLE) {
+ if (psp->ps_sigstk.ss_flags & SA_ONSTACK)
+ return (EINVAL);
+ psp->ps_flags &= ~SAS_ALTSTACK;
+ psp->ps_sigstk.ss_flags = ss.ss_flags;
+ return (0);
+ }
+ if (ss.ss_size < MINSIGSTKSZ)
+ return (ENOMEM);
+ psp->ps_flags |= SAS_ALTSTACK;
+ psp->ps_sigstk= ss;
+ return (0);
+}
+
+struct kill_args {
+ int pid;
+ int signum;
+};
+/* ARGSUSED */
+kill(cp, uap, retval)
+ register struct proc *cp;
+ register struct kill_args *uap;
+ int *retval;
+{
+ register struct proc *p;
+ register struct pcred *pc = cp->p_cred;
+
+ if ((u_int)uap->signum >= NSIG)
+ return (EINVAL);
+ if (uap->pid > 0) {
+ /* kill single process */
+ if ((p = pfind(uap->pid)) == NULL)
+ return (ESRCH);
+ if (!CANSIGNAL(cp, pc, p, uap->signum))
+ return (EPERM);
+ if (uap->signum)
+ psignal(p, uap->signum);
+ return (0);
+ }
+ switch (uap->pid) {
+ case -1: /* broadcast signal */
+ return (killpg1(cp, uap->signum, 0, 1));
+ case 0: /* signal own process group */
+ return (killpg1(cp, uap->signum, 0, 0));
+ default: /* negative explicit process group */
+ return (killpg1(cp, uap->signum, -uap->pid, 0));
+ }
+ /* NOTREACHED */
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct okillpg_args {
+ int pgid;
+ int signum;
+};
+/* ARGSUSED */
+okillpg(p, uap, retval)
+ struct proc *p;
+ register struct okillpg_args *uap;
+ int *retval;
+{
+
+ if ((u_int)uap->signum >= NSIG)
+ return (EINVAL);
+ return (killpg1(p, uap->signum, uap->pgid, 0));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Common code for kill process group/broadcast kill.
+ * cp is calling process.
+ */
+killpg1(cp, signum, pgid, all)
+ register struct proc *cp;
+ int signum, pgid, all;
+{
+ register struct proc *p;
+ register struct pcred *pc = cp->p_cred;
+ struct pgrp *pgrp;
+ int nfound = 0;
+
+ if (all)
+ /*
+ * broadcast
+ */
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
+ p == cp || !CANSIGNAL(cp, pc, p, signum))
+ continue;
+ nfound++;
+ if (signum)
+ psignal(p, signum);
+ }
+ else {
+ if (pgid == 0)
+ /*
+ * zero pgid means send to my process group.
+ */
+ pgrp = cp->p_pgrp;
+ else {
+ pgrp = pgfind(pgid);
+ if (pgrp == NULL)
+ return (ESRCH);
+ }
+ for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+ if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
+ p->p_stat == SZOMB ||
+ !CANSIGNAL(cp, pc, p, signum))
+ continue;
+ nfound++;
+ if (signum)
+ psignal(p, signum);
+ }
+ }
+ return (nfound ? 0 : ESRCH);
+}
+
+/*
+ * Send a signal to a process group.
+ */
+void
+gsignal(pgid, signum)
+ int pgid, signum;
+{
+ struct pgrp *pgrp;
+
+ if (pgid && (pgrp = pgfind(pgid)))
+ pgsignal(pgrp, signum, 0);
+}
+
+/*
+ * Send a signal to a process group. If checktty is 1,
+ * limit to members which have a controlling terminal.
+ */
+void
+pgsignal(pgrp, signum, checkctty)
+ struct pgrp *pgrp;
+ int signum, checkctty;
+{
+ register struct proc *p;
+
+ if (pgrp)
+ for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt)
+ if (checkctty == 0 || p->p_flag & P_CONTROLT)
+ psignal(p, signum);
+}
+
+/*
+ * Send a signal caused by a trap to the current process.
+ * If it will be caught immediately, deliver it with correct code.
+ * Otherwise, post it normally.
+ */
+void
+trapsignal(p, signum, code)
+ struct proc *p;
+ register int signum;
+ u_int code;
+{
+ register struct sigacts *ps = p->p_sigacts;
+ int mask;
+
+ mask = sigmask(signum);
+ if ((p->p_flag & P_TRACED) == 0 && (p->p_sigcatch & mask) != 0 &&
+ (p->p_sigmask & mask) == 0) {
+ p->p_stats->p_ru.ru_nsignals++;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_PSIG))
+ ktrpsig(p->p_tracep, signum, ps->ps_sigact[signum],
+ p->p_sigmask, code);
+#endif
+ sendsig(ps->ps_sigact[signum], signum, p->p_sigmask, code);
+ p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+ } else {
+ ps->ps_code = code; /* XXX for core dump/debugger */
+ psignal(p, signum);
+ }
+}
+
+/*
+ * Send the signal to the process. If the signal has an action, the action
+ * is usually performed by the target process rather than the caller; we add
+ * the signal to the set of pending signals for the process.
+ *
+ * Exceptions:
+ * o When a stop signal is sent to a sleeping process that takes the
+ * default action, the process is stopped without awakening it.
+ * o SIGCONT restarts stopped processes (or puts them back to sleep)
+ * regardless of the signal action (eg, blocked or ignored).
+ *
+ * Other ignored signals are discarded immediately.
+ */
+void
+psignal(p, signum)
+ register struct proc *p;
+ register int signum;
+{
+ register int s, prop;
+ register sig_t action;
+ int mask;
+
+ if ((u_int)signum >= NSIG || signum == 0)
+ panic("psignal signal number");
+ mask = sigmask(signum);
+ prop = sigprop[signum];
+
+ /*
+ * If proc is traced, always give parent a chance.
+ */
+ if (p->p_flag & P_TRACED)
+ action = SIG_DFL;
+ else {
+ /*
+ * If the signal is being ignored,
+ * then we forget about it immediately.
+ * (Note: we don't set SIGCONT in p_sigignore,
+ * and if it is set to SIG_IGN,
+ * action will be SIG_DFL here.)
+ */
+ if (p->p_sigignore & mask)
+ return;
+ if (p->p_sigmask & mask)
+ action = SIG_HOLD;
+ else if (p->p_sigcatch & mask)
+ action = SIG_CATCH;
+ else
+ action = SIG_DFL;
+ }
+
+ if (p->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
+ (p->p_flag & P_TRACED) == 0)
+ p->p_nice = NZERO;
+
+ if (prop & SA_CONT)
+ p->p_siglist &= ~stopsigmask;
+
+ if (prop & SA_STOP) {
+ /*
+ * If sending a tty stop signal to a member of an orphaned
+ * process group, discard the signal here if the action
+ * is default; don't stop the process below if sleeping,
+ * and don't clear any pending SIGCONT.
+ */
+ if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 &&
+ action == SIG_DFL)
+ return;
+ p->p_siglist &= ~contsigmask;
+ }
+ p->p_siglist |= mask;
+
+ /*
+ * Defer further processing for signals which are held,
+ * except that stopped processes must be continued by SIGCONT.
+ */
+ if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP))
+ return;
+ s = splhigh();
+ switch (p->p_stat) {
+
+ case SSLEEP:
+ /*
+ * If process is sleeping uninterruptibly
+ * we can't interrupt the sleep... the signal will
+ * be noticed when the process returns through
+ * trap() or syscall().
+ */
+ if ((p->p_flag & P_SINTR) == 0)
+ goto out;
+ /*
+ * Process is sleeping and traced... make it runnable
+ * so it can discover the signal in issignal() and stop
+ * for the parent.
+ */
+ if (p->p_flag & P_TRACED)
+ goto run;
+ /*
+ * If SIGCONT is default (or ignored) and process is
+ * asleep, we are finished; the process should not
+ * be awakened.
+ */
+ if ((prop & SA_CONT) && action == SIG_DFL) {
+ p->p_siglist &= ~mask;
+ goto out;
+ }
+ /*
+ * When a sleeping process receives a stop
+ * signal, process immediately if possible.
+ * All other (caught or default) signals
+ * cause the process to run.
+ */
+ if (prop & SA_STOP) {
+ if (action != SIG_DFL)
+ goto runfast;
+ /*
+ * If a child holding parent blocked,
+ * stopping could cause deadlock.
+ */
+ if (p->p_flag & P_PPWAIT)
+ goto out;
+ p->p_siglist &= ~mask;
+ p->p_xstat = signum;
+ if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+ psignal(p->p_pptr, SIGCHLD);
+ stop(p);
+ goto out;
+ } else
+ goto runfast;
+ /*NOTREACHED*/
+
+ case SSTOP:
+ /*
+ * If traced process is already stopped,
+ * then no further action is necessary.
+ */
+ if (p->p_flag & P_TRACED)
+ goto out;
+
+ /*
+ * Kill signal always sets processes running.
+ */
+ if (signum == SIGKILL)
+ goto runfast;
+
+ if (prop & SA_CONT) {
+ /*
+ * If SIGCONT is default (or ignored), we continue the
+ * process but don't leave the signal in p_siglist, as
+ * it has no further action. If SIGCONT is held, we
+ * continue the process and leave the signal in
+ * p_siglist. If the process catches SIGCONT, let it
+ * handle the signal itself. If it isn't waiting on
+ * an event, then it goes back to run state.
+ * Otherwise, process goes back to sleep state.
+ */
+ if (action == SIG_DFL)
+ p->p_siglist &= ~mask;
+ if (action == SIG_CATCH)
+ goto runfast;
+ if (p->p_wchan == 0)
+ goto run;
+ p->p_stat = SSLEEP;
+ goto out;
+ }
+
+ if (prop & SA_STOP) {
+ /*
+ * Already stopped, don't need to stop again.
+ * (If we did the shell could get confused.)
+ */
+ p->p_siglist &= ~mask; /* take it away */
+ goto out;
+ }
+
+ /*
+ * If process is sleeping interruptibly, then simulate a
+ * wakeup so that when it is continued, it will be made
+ * runnable and can look at the signal. But don't make
+ * the process runnable, leave it stopped.
+ */
+ if (p->p_wchan && p->p_flag & P_SINTR)
+ unsleep(p);
+ goto out;
+
+ default:
+ /*
+ * SRUN, SIDL, SZOMB do nothing with the signal,
+ * other than kicking ourselves if we are running.
+ * It will either never be noticed, or noticed very soon.
+ */
+ if (p == curproc)
+ signotify(p);
+ goto out;
+ }
+ /*NOTREACHED*/
+
+runfast:
+ /*
+ * Raise priority to at least PUSER.
+ */
+ if (p->p_priority > PUSER)
+ p->p_priority = PUSER;
+run:
+ setrunnable(p);
+out:
+ splx(s);
+}
+
+/*
+ * If the current process has received a signal (should be caught or cause
+ * termination, should interrupt current syscall), return the signal number.
+ * Stop signals with default action are processed immediately, then cleared;
+ * they aren't returned. This is checked after each entry to the system for
+ * a syscall or trap (though this can usually be done without calling issignal
+ * by checking the pending signal masks in the CURSIG macro.) The normal call
+ * sequence is
+ *
+ * while (signum = CURSIG(curproc))
+ * postsig(signum);
+ */
+issignal(p)
+ register struct proc *p;
+{
+ register int signum, mask, prop;
+
+ for (;;) {
+ mask = p->p_siglist & ~p->p_sigmask;
+ if (p->p_flag & P_PPWAIT)
+ mask &= ~stopsigmask;
+ if (mask == 0) /* no signal to send */
+ return (0);
+ signum = ffs((long)mask);
+ mask = sigmask(signum);
+ prop = sigprop[signum];
+ /*
+ * We should see pending but ignored signals
+ * only if P_TRACED was on when they were posted.
+ */
+ if (mask & p->p_sigignore && (p->p_flag & P_TRACED) == 0) {
+ p->p_siglist &= ~mask;
+ continue;
+ }
+ if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) {
+ /*
+ * If traced, always stop, and stay
+ * stopped until released by the parent.
+ */
+ p->p_xstat = signum;
+ psignal(p->p_pptr, SIGCHLD);
+ do {
+ stop(p);
+ mi_switch();
+ } while (!trace_req(p) && p->p_flag & P_TRACED);
+
+ /*
+ * If the traced bit got turned off, go back up
+ * to the top to rescan signals. This ensures
+ * that p_sig* and ps_sigact are consistent.
+ */
+ if ((p->p_flag & P_TRACED) == 0)
+ continue;
+
+ /*
+ * If parent wants us to take the signal,
+ * then it will leave it in p->p_xstat;
+ * otherwise we just look for signals again.
+ */
+ p->p_siglist &= ~mask; /* clear the old signal */
+ signum = p->p_xstat;
+ if (signum == 0)
+ continue;
+
+ /*
+ * Put the new signal into p_siglist. If the
+ * signal is being masked, look for other signals.
+ */
+ mask = sigmask(signum);
+ p->p_siglist |= mask;
+ if (p->p_sigmask & mask)
+ continue;
+ }
+
+ /*
+ * Decide whether the signal should be returned.
+ * Return the signal's number, or fall through
+ * to clear it from the pending mask.
+ */
+ switch ((int)p->p_sigacts->ps_sigact[signum]) {
+
+ case SIG_DFL:
+ /*
+ * Don't take default actions on system processes.
+ */
+ if (p->p_pid <= 1) {
+#ifdef DIAGNOSTIC
+ /*
+ * Are you sure you want to ignore SIGSEGV
+ * in init? XXX
+ */
+ printf("Process (pid %d) got signal %d\n",
+ p->p_pid, signum);
+#endif
+ break; /* == ignore */
+ }
+ /*
+ * If there is a pending stop signal to process
+ * with default action, stop here,
+ * then clear the signal. However,
+ * if process is member of an orphaned
+ * process group, ignore tty stop signals.
+ */
+ if (prop & SA_STOP) {
+ if (p->p_flag & P_TRACED ||
+ (p->p_pgrp->pg_jobc == 0 &&
+ prop & SA_TTYSTOP))
+ break; /* == ignore */
+ p->p_xstat = signum;
+ stop(p);
+ if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+ psignal(p->p_pptr, SIGCHLD);
+ mi_switch();
+ break;
+ } else if (prop & SA_IGNORE) {
+ /*
+ * Except for SIGCONT, shouldn't get here.
+ * Default action is to ignore; drop it.
+ */
+ break; /* == ignore */
+ } else
+ return (signum);
+ /*NOTREACHED*/
+
+ case SIG_IGN:
+ /*
+ * Masking above should prevent us ever trying
+ * to take action on an ignored signal other
+ * than SIGCONT, unless process is traced.
+ */
+ if ((prop & SA_CONT) == 0 &&
+ (p->p_flag & P_TRACED) == 0)
+ printf("issignal\n");
+ break; /* == ignore */
+
+ default:
+ /*
+ * This signal has an action, let
+ * postsig() process it.
+ */
+ return (signum);
+ }
+ p->p_siglist &= ~mask; /* take the signal! */
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Put the argument process into the stopped state and notify the parent
+ * via wakeup. Signals are handled elsewhere. The process must not be
+ * on the run queue.
+ */
+stop(p)
+ register struct proc *p;
+{
+
+ p->p_stat = SSTOP;
+ p->p_flag &= ~P_WAITED;
+ wakeup((caddr_t)p->p_pptr);
+}
+
+/*
+ * Take the action for the specified signal
+ * from the current set of pending signals.
+ */
+void
+postsig(signum)
+ register int signum;
+{
+ register struct proc *p = curproc;
+ register struct sigacts *ps = p->p_sigacts;
+ register sig_t action;
+ int code, mask, returnmask;
+
+#ifdef DIAGNOSTIC
+ if (signum == 0)
+ panic("postsig");
+#endif
+ mask = sigmask(signum);
+ p->p_siglist &= ~mask;
+ action = ps->ps_sigact[signum];
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_PSIG))
+ ktrpsig(p->p_tracep,
+ signum, action, ps->ps_flags & SAS_OLDMASK ?
+ ps->ps_oldmask : p->p_sigmask, 0);
+#endif
+ if (action == SIG_DFL) {
+ /*
+ * Default action, where the default is to kill
+ * the process. (Other cases were ignored above.)
+ */
+ sigexit(p, signum);
+ /* NOTREACHED */
+ } else {
+ /*
+ * If we get here, the signal must be caught.
+ */
+#ifdef DIAGNOSTIC
+ if (action == SIG_IGN || (p->p_sigmask & mask))
+ panic("postsig action");
+#endif
+ /*
+ * Set the new mask value and also defer further
+ * occurences of this signal.
+ *
+ * Special case: user has done a sigpause. Here the
+ * current mask is not of interest, but rather the
+ * mask from before the sigpause is what we want
+ * restored after the signal processing is completed.
+ */
+ (void) splhigh();
+ if (ps->ps_flags & SAS_OLDMASK) {
+ returnmask = ps->ps_oldmask;
+ ps->ps_flags &= ~SAS_OLDMASK;
+ } else
+ returnmask = p->p_sigmask;
+ p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+ (void) spl0();
+ p->p_stats->p_ru.ru_nsignals++;
+ if (ps->ps_sig != signum) {
+ code = 0;
+ } else {
+ code = ps->ps_code;
+ ps->ps_code = 0;
+ }
+ sendsig(action, signum, returnmask, code);
+ }
+}
+
+/*
+ * Kill the current process for stated reason.
+ */
+killproc(p, why)
+ struct proc *p;
+ char *why;
+{
+
+ log(LOG_ERR, "pid %d was killed: %s\n", p->p_pid, why);
+ uprintf("sorry, pid %d was killed: %s\n", p->p_pid, why);
+ psignal(p, SIGKILL);
+}
+
+/*
+ * Force the current process to exit with the specified signal, dumping core
+ * if appropriate. We bypass the normal tests for masked and caught signals,
+ * allowing unrecoverable failures to terminate the process without changing
+ * signal state. Mark the accounting record with the signal termination.
+ * If dumping core, save the signal number for the debugger. Calls exit and
+ * does not return.
+ */
+sigexit(p, signum)
+ register struct proc *p;
+ int signum;
+{
+
+ p->p_acflag |= AXSIG;
+ if (sigprop[signum] & SA_CORE) {
+ p->p_sigacts->ps_sig = signum;
+ if (coredump(p) == 0)
+ signum |= WCOREFLAG;
+ }
+ exit1(p, W_EXITCODE(0, signum));
+ /* NOTREACHED */
+}
+
+/*
+ * Dump core, into a file named "progname.core", unless the process was
+ * setuid/setgid.
+ */
+coredump(p)
+ register struct proc *p;
+{
+ register struct vnode *vp;
+ register struct pcred *pcred = p->p_cred;
+ register struct ucred *cred = pcred->pc_ucred;
+ register struct vmspace *vm = p->p_vmspace;
+ struct nameidata nd;
+ struct vattr vattr;
+ int error, error1;
+ char name[MAXCOMLEN+6]; /* progname.core */
+
+ if (pcred->p_svuid != pcred->p_ruid || pcred->p_svgid != pcred->p_rgid)
+ return (EFAULT);
+ if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >=
+ p->p_rlimit[RLIMIT_CORE].rlim_cur)
+ return (EFAULT);
+ sprintf(name, "%s.core", p->p_comm);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, name, p);
+ if (error = vn_open(&nd,
+ O_CREAT | FWRITE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))
+ return (error);
+ vp = nd.ni_vp;
+
+ /* Don't dump to non-regular files or files with links. */
+ if (vp->v_type != VREG ||
+ VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
+ error = EFAULT;
+ goto out;
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_size = 0;
+ LEASE_CHECK(vp, p, cred, LEASE_WRITE);
+ VOP_SETATTR(vp, &vattr, cred, p);
+ p->p_acflag |= ACORE;
+ bcopy(p, &p->p_addr->u_kproc.kp_proc, sizeof(struct proc));
+ fill_eproc(p, &p->p_addr->u_kproc.kp_eproc);
+ error = cpu_coredump(p, vp, cred);
+ if (error == 0)
+ error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
+ (int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE,
+ IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+ if (error == 0)
+ error = vn_rdwr(UIO_WRITE, vp,
+ (caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
+ round_page(ctob(vm->vm_ssize)),
+ (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
+ IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+out:
+ VOP_UNLOCK(vp);
+ error1 = vn_close(vp, FWRITE, cred, p);
+ if (error == 0)
+ error = error1;
+ return (error);
+}
+
+/*
+ * Nonexistent system call-- signal process (may want to handle it).
+ * Flag error in case process won't see signal immediately (blocked or ignored).
+ */
+struct nosys_args {
+ int dummy;
+};
+/* ARGSUSED */
+nosys(p, args, retval)
+ struct proc *p;
+ struct nosys_args *args;
+ int *retval;
+{
+
+ psignal(p, SIGSYS);
+ return (EINVAL);
+}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
new file mode 100644
index 0000000..5c12afc
--- /dev/null
+++ b/sys/kern/kern_subr.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+
+uiomove(cp, n, uio)
+ register caddr_t cp;
+ register int n;
+ register struct uio *uio;
+{
+ register struct iovec *iov;
+ u_int cnt;
+ int error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
+ panic("uiomove: mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("uiomove proc");
+#endif
+ while (n > 0 && uio->uio_resid) {
+ iov = uio->uio_iov;
+ cnt = iov->iov_len;
+ if (cnt == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ continue;
+ }
+ if (cnt > n)
+ cnt = n;
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ case UIO_USERISPACE:
+ if (uio->uio_rw == UIO_READ)
+ error = copyout(cp, iov->iov_base, cnt);
+ else
+ error = copyin(iov->iov_base, cp, cnt);
+ if (error)
+ return (error);
+ break;
+
+ case UIO_SYSSPACE:
+ if (uio->uio_rw == UIO_READ)
+ bcopy((caddr_t)cp, iov->iov_base, cnt);
+ else
+ bcopy(iov->iov_base, (caddr_t)cp, cnt);
+ break;
+ }
+ iov->iov_base += cnt;
+ iov->iov_len -= cnt;
+ uio->uio_resid -= cnt;
+ uio->uio_offset += cnt;
+ cp += cnt;
+ n -= cnt;
+ }
+ return (error);
+}
+
+/*
+ * Give next character to user as result of read.
+ */
+ureadc(c, uio)
+ register int c;
+ register struct uio *uio;
+{
+ register struct iovec *iov;
+
+again:
+ if (uio->uio_iovcnt == 0 || uio->uio_resid == 0)
+ panic("ureadc");
+ iov = uio->uio_iov;
+ if (iov->iov_len == 0) {
+ uio->uio_iovcnt--;
+ uio->uio_iov++;
+ goto again;
+ }
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ if (subyte(iov->iov_base, c) < 0)
+ return (EFAULT);
+ break;
+
+ case UIO_SYSSPACE:
+ *iov->iov_base = c;
+ break;
+
+ case UIO_USERISPACE:
+ if (suibyte(iov->iov_base, c) < 0)
+ return (EFAULT);
+ break;
+ }
+ iov->iov_base++;
+ iov->iov_len--;
+ uio->uio_resid--;
+ uio->uio_offset++;
+ return (0);
+}
+
+#ifdef vax /* unused except by ct.c, other oddities XXX */
+/*
+ * Get next character written in by user from uio.
+ */
+uwritec(uio)
+ struct uio *uio;
+{
+ register struct iovec *iov;
+ register int c;
+
+ if (uio->uio_resid <= 0)
+ return (-1);
+again:
+ if (uio->uio_iovcnt <= 0)
+ panic("uwritec");
+ iov = uio->uio_iov;
+ if (iov->iov_len == 0) {
+ uio->uio_iov++;
+ if (--uio->uio_iovcnt == 0)
+ return (-1);
+ goto again;
+ }
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ c = fubyte(iov->iov_base);
+ break;
+
+ case UIO_SYSSPACE:
+ c = *(u_char *) iov->iov_base;
+ break;
+
+ case UIO_USERISPACE:
+ c = fuibyte(iov->iov_base);
+ break;
+ }
+ if (c < 0)
+ return (-1);
+ iov->iov_base++;
+ iov->iov_len--;
+ uio->uio_resid--;
+ uio->uio_offset++;
+ return (c);
+}
+#endif /* vax */
+
+/*
+ * General routine to allocate a hash table.
+ */
+void *
+hashinit(elements, type, hashmask)
+ int elements, type;
+ u_long *hashmask;
+{
+ long hashsize;
+ LIST_HEAD(generic, generic) *hashtbl;
+ int i;
+
+ if (elements <= 0)
+ panic("hashinit: bad cnt");
+ for (hashsize = 1; hashsize <= elements; hashsize <<= 1)
+ continue;
+ hashsize >>= 1;
+ hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK);
+ for (i = 0; i < hashsize; i++)
+ LIST_INIT(&hashtbl[i]);
+ *hashmask = hashsize - 1;
+ return (hashtbl);
+}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
new file mode 100644
index 0000000..1c2a578
--- /dev/null
+++ b/sys/kern/kern_synch.c
@@ -0,0 +1,666 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/vmmeter.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <machine/cpu.h>
+
+u_char curpriority; /* usrpri of curproc */
+int lbolt; /* once a second sleep address */
+
+/*
+ * Force switch among equal priority processes every 100ms.
+ */
+/* ARGSUSED */
+void
+roundrobin(arg)
+ void *arg;
+{
+
+ need_resched();
+ timeout(roundrobin, NULL, hz / 10);
+}
+
+/*
+ * Constants for digital decay and forget:
+ * 90% of (p_estcpu) usage in 5 * loadav time
+ * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
+ * Note that, as ps(1) mentions, this can let percentages
+ * total over 100% (I've seen 137.9% for 3 processes).
+ *
+ * Note that hardclock updates p_estcpu and p_cpticks independently.
+ *
+ * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
+ * That is, the system wants to compute a value of decay such
+ * that the following for loop:
+ * for (i = 0; i < (5 * loadavg); i++)
+ * p_estcpu *= decay;
+ * will compute
+ * p_estcpu *= 0.1;
+ * for all values of loadavg:
+ *
+ * Mathematically this loop can be expressed by saying:
+ * decay ** (5 * loadavg) ~= .1
+ *
+ * The system computes decay as:
+ * decay = (2 * loadavg) / (2 * loadavg + 1)
+ *
+ * We wish to prove that the system's computation of decay
+ * will always fulfill the equation:
+ * decay ** (5 * loadavg) ~= .1
+ *
+ * If we compute b as:
+ * b = 2 * loadavg
+ * then
+ * decay = b / (b + 1)
+ *
+ * We now need to prove two things:
+ * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
+ * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
+ *
+ * Facts:
+ * For x close to zero, exp(x) =~ 1 + x, since
+ * exp(x) = 0! + x**1/1! + x**2/2! + ... .
+ * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
+ * For x close to zero, ln(1+x) =~ x, since
+ * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
+ * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
+ * ln(.1) =~ -2.30
+ *
+ * Proof of (1):
+ * Solve (factor)**(power) =~ .1 given power (5*loadav):
+ * solving for factor,
+ * ln(factor) =~ (-2.30/5*loadav), or
+ * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
+ * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
+ *
+ * Proof of (2):
+ * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
+ * solving for power,
+ * power*ln(b/(b+1)) =~ -2.30, or
+ * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
+ *
+ * Actual power values for the implemented algorithm are as follows:
+ * loadav: 1 2 3 4
+ * power: 5.68 10.32 14.94 19.55
+ */
+
+/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
+#define loadfactor(loadav) (2 * (loadav))
+#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
+
+/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
+fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
+
+/*
+ * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
+ * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
+ * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
+ *
+ * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
+ * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
+ *
+ * If you dont want to bother with the faster/more-accurate formula, you
+ * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
+ * (more general) method of calculating the %age of CPU used by a process.
+ */
+#define CCPU_SHIFT 11
+
+/*
+ * Recompute process priorities, every hz ticks.
+ */
+/* ARGSUSED */
+void
+schedcpu(arg)
+ void *arg;
+{
+ register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+ register struct proc *p;
+ register int s;
+ register unsigned int newcpu;
+
+ wakeup((caddr_t)&lbolt);
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ /*
+ * Increment time in/out of memory and sleep time
+ * (if sleeping). We ignore overflow; with 16-bit int's
+ * (remember them?) overflow takes 45 days.
+ */
+ p->p_swtime++;
+ if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
+ p->p_slptime++;
+ p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
+ /*
+ * If the process has slept the entire second,
+ * stop recalculating its priority until it wakes up.
+ */
+ if (p->p_slptime > 1)
+ continue;
+ s = splstatclock(); /* prevent state changes */
+ /*
+ * p_pctcpu is only for ps.
+ */
+#if (FSHIFT >= CCPU_SHIFT)
+ p->p_pctcpu += (hz == 100)?
+ ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
+ 100 * (((fixpt_t) p->p_cpticks)
+ << (FSHIFT - CCPU_SHIFT)) / hz;
+#else
+ p->p_pctcpu += ((FSCALE - ccpu) *
+ (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
+#endif
+ p->p_cpticks = 0;
+ newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
+ p->p_estcpu = min(newcpu, UCHAR_MAX);
+ resetpriority(p);
+ if (p->p_priority >= PUSER) {
+#define PPQ (128 / NQS) /* priorities per queue */
+ if ((p != curproc) &&
+ p->p_stat == SRUN &&
+ (p->p_flag & P_INMEM) &&
+ (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
+ remrq(p);
+ p->p_priority = p->p_usrpri;
+ setrunqueue(p);
+ } else
+ p->p_priority = p->p_usrpri;
+ }
+ splx(s);
+ }
+ vmmeter();
+ if (bclnlist != NULL)
+ wakeup((caddr_t)pageproc);
+ timeout(schedcpu, (void *)0, hz);
+}
+
+/*
+ * Recalculate the priority of a process after it has slept for a while.
+ * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
+ * least six times the loadfactor will decay p_estcpu to zero.
+ */
+void
+updatepri(p)
+ register struct proc *p;
+{
+ register unsigned int newcpu = p->p_estcpu;
+ register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+
+ if (p->p_slptime > 5 * loadfac)
+ p->p_estcpu = 0;
+ else {
+ p->p_slptime--; /* the first time was done in schedcpu */
+ while (newcpu && --p->p_slptime)
+ newcpu = (int) decay_cpu(loadfac, newcpu);
+ p->p_estcpu = min(newcpu, UCHAR_MAX);
+ }
+ resetpriority(p);
+}
+
+/*
+ * We're only looking at 7 bits of the address; everything is
+ * aligned to 4, lots of things are aligned to greater powers
+ * of 2. Shift right by 8, i.e. drop the bottom 256 worth.
+ */
+#define TABLESIZE 128
+#define LOOKUP(x) (((int)(x) >> 8) & (TABLESIZE - 1))
+struct slpque {
+ struct proc *sq_head;
+ struct proc **sq_tailp;
+} slpque[TABLESIZE];
+
+/*
+ * During autoconfiguration or after a panic, a sleep will simply
+ * lower the priority briefly to allow interrupts, then return.
+ * The priority to be used (safepri) is machine-dependent, thus this
+ * value is initialized and maintained in the machine-dependent layers.
+ * This priority will typically be 0, or the lowest priority
+ * that is safe for use on the interrupt stack; it can be made
+ * higher to block network software interrupts after panics.
+ */
+int safepri;
+
+/*
+ * General sleep call. Suspends the current process until a wakeup is
+ * performed on the specified identifier. The process will then be made
+ * runnable with the specified priority. Sleeps at most timo/hz seconds
+ * (0 means no timeout). If pri includes PCATCH flag, signals are checked
+ * before and after sleeping, else signals are not checked. Returns 0 if
+ * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
+ * signal needs to be delivered, ERESTART is returned if the current system
+ * call should be restarted if possible, and EINTR is returned if the system
+ * call should be interrupted by the signal (return EINTR).
+ */
+int
+tsleep(ident, priority, wmesg, timo)
+ void *ident;
+ int priority, timo;
+ char *wmesg;
+{
+ register struct proc *p = curproc;
+ register struct slpque *qp;
+ register s;
+ int sig, catch = priority & PCATCH;
+ extern int cold;
+ void endtsleep __P((void *));
+
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 1, 0);
+#endif
+ s = splhigh();
+ if (cold || panicstr) {
+ /*
+ * After a panic, or during autoconfiguration,
+ * just give interrupts a chance, then just return;
+ * don't run any other procs or panic below,
+ * in case this is the idle process and already asleep.
+ */
+ splx(safepri);
+ splx(s);
+ return (0);
+ }
+#ifdef DIAGNOSTIC
+ if (ident == NULL || p->p_stat != SRUN || p->p_back)
+ panic("tsleep");
+#endif
+ p->p_wchan = ident;
+ p->p_wmesg = wmesg;
+ p->p_slptime = 0;
+ p->p_priority = priority & PRIMASK;
+ qp = &slpque[LOOKUP(ident)];
+ if (qp->sq_head == 0)
+ qp->sq_head = p;
+ else
+ *qp->sq_tailp = p;
+ *(qp->sq_tailp = &p->p_forw) = 0;
+ if (timo)
+ timeout(endtsleep, (void *)p, timo);
+ /*
+ * We put ourselves on the sleep queue and start our timeout
+ * before calling CURSIG, as we could stop there, and a wakeup
+ * or a SIGCONT (or both) could occur while we were stopped.
+ * A SIGCONT would cause us to be marked as SSLEEP
+ * without resuming us, thus we must be ready for sleep
+ * when CURSIG is called. If the wakeup happens while we're
+ * stopped, p->p_wchan will be 0 upon return from CURSIG.
+ */
+ if (catch) {
+ p->p_flag |= P_SINTR;
+ if (sig = CURSIG(p)) {
+ if (p->p_wchan)
+ unsleep(p);
+ p->p_stat = SRUN;
+ goto resume;
+ }
+ if (p->p_wchan == 0) {
+ catch = 0;
+ goto resume;
+ }
+ } else
+ sig = 0;
+ p->p_stat = SSLEEP;
+ p->p_stats->p_ru.ru_nvcsw++;
+ mi_switch();
+resume:
+ curpriority = p->p_usrpri;
+ splx(s);
+ p->p_flag &= ~P_SINTR;
+ if (p->p_flag & P_TIMEOUT) {
+ p->p_flag &= ~P_TIMEOUT;
+ if (sig == 0) {
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ return (EWOULDBLOCK);
+ }
+ } else if (timo)
+ untimeout(endtsleep, (void *)p);
+ if (catch && (sig != 0 || (sig = CURSIG(p)))) {
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ if (p->p_sigacts->ps_sigintr & sigmask(sig))
+ return (EINTR);
+ return (ERESTART);
+ }
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ return (0);
+}
+
+/*
+ * Implement timeout for tsleep.
+ * If process hasn't been awakened (wchan non-zero),
+ * set timeout flag and undo the sleep. If proc
+ * is stopped, just unsleep so it will remain stopped.
+ */
+void
+endtsleep(arg)
+ void *arg;
+{
+ register struct proc *p;
+ int s;
+
+ p = (struct proc *)arg;
+ s = splhigh();
+ if (p->p_wchan) {
+ if (p->p_stat == SSLEEP)
+ setrunnable(p);
+ else
+ unsleep(p);
+ p->p_flag |= P_TIMEOUT;
+ }
+ splx(s);
+}
+
+/*
+ * Short-term, non-interruptable sleep.
+ */
+void
+sleep(ident, priority)
+ void *ident;
+ int priority;
+{
+ register struct proc *p = curproc;
+ register struct slpque *qp;
+ register s;
+ extern int cold;
+
+#ifdef DIAGNOSTIC
+ if (priority > PZERO) {
+ printf("sleep called with priority %d > PZERO, wchan: %x\n",
+ priority, ident);
+ panic("old sleep");
+ }
+#endif
+ s = splhigh();
+ if (cold || panicstr) {
+ /*
+ * After a panic, or during autoconfiguration,
+ * just give interrupts a chance, then just return;
+ * don't run any other procs or panic below,
+ * in case this is the idle process and already asleep.
+ */
+ splx(safepri);
+ splx(s);
+ return;
+ }
+#ifdef DIAGNOSTIC
+ if (ident == NULL || p->p_stat != SRUN || p->p_back)
+ panic("sleep");
+#endif
+ p->p_wchan = ident;
+ p->p_wmesg = NULL;
+ p->p_slptime = 0;
+ p->p_priority = priority;
+ qp = &slpque[LOOKUP(ident)];
+ if (qp->sq_head == 0)
+ qp->sq_head = p;
+ else
+ *qp->sq_tailp = p;
+ *(qp->sq_tailp = &p->p_forw) = 0;
+ p->p_stat = SSLEEP;
+ p->p_stats->p_ru.ru_nvcsw++;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 1, 0);
+#endif
+ mi_switch();
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ curpriority = p->p_usrpri;
+ splx(s);
+}
+
+/*
+ * Remove a process from its wait queue
+ */
+void
+unsleep(p)
+ register struct proc *p;
+{
+ register struct slpque *qp;
+ register struct proc **hp;
+ int s;
+
+ s = splhigh();
+ if (p->p_wchan) {
+ hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
+ while (*hp != p)
+ hp = &(*hp)->p_forw;
+ *hp = p->p_forw;
+ if (qp->sq_tailp == &p->p_forw)
+ qp->sq_tailp = hp;
+ p->p_wchan = 0;
+ }
+ splx(s);
+}
+
+/*
+ * Make all processes sleeping on the specified identifier runnable.
+ */
+void
+wakeup(ident)
+ register void *ident;
+{
+ register struct slpque *qp;
+ register struct proc *p, **q;
+ int s;
+
+ s = splhigh();
+ qp = &slpque[LOOKUP(ident)];
+restart:
+ for (q = &qp->sq_head; p = *q; ) {
+#ifdef DIAGNOSTIC
+ if (p->p_back || p->p_stat != SSLEEP && p->p_stat != SSTOP)
+ panic("wakeup");
+#endif
+ if (p->p_wchan == ident) {
+ p->p_wchan = 0;
+ *q = p->p_forw;
+ if (qp->sq_tailp == &p->p_forw)
+ qp->sq_tailp = q;
+ if (p->p_stat == SSLEEP) {
+ /* OPTIMIZED EXPANSION OF setrunnable(p); */
+ if (p->p_slptime > 1)
+ updatepri(p);
+ p->p_slptime = 0;
+ p->p_stat = SRUN;
+ if (p->p_flag & P_INMEM)
+ setrunqueue(p);
+ /*
+ * Since curpriority is a user priority,
+ * p->p_priority is always better than
+ * curpriority.
+ */
+ if ((p->p_flag & P_INMEM) == 0)
+ wakeup((caddr_t)&proc0);
+ else
+ need_resched();
+ /* END INLINE EXPANSION */
+ goto restart;
+ }
+ } else
+ q = &p->p_forw;
+ }
+ splx(s);
+}
+
+/*
+ * The machine independent parts of mi_switch().
+ * Must be called at splstatclock() or higher.
+ */
+void
+mi_switch()
+{
+ register struct proc *p = curproc; /* XXX */
+ register struct rlimit *rlim;
+ register long s, u;
+ struct timeval tv;
+
+ /*
+ * Compute the amount of time during which the current
+ * process was running, and add that to its total so far.
+ */
+ microtime(&tv);
+ u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
+ s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
+ if (u < 0) {
+ u += 1000000;
+ s--;
+ } else if (u >= 1000000) {
+ u -= 1000000;
+ s++;
+ }
+ p->p_rtime.tv_usec = u;
+ p->p_rtime.tv_sec = s;
+
+ /*
+ * Check if the process exceeds its cpu resource allocation.
+ * If over max, kill it. In any case, if it has run for more
+ * than 10 minutes, reduce priority to give others a chance.
+ */
+ rlim = &p->p_rlimit[RLIMIT_CPU];
+ if (s >= rlim->rlim_cur) {
+ if (s >= rlim->rlim_max)
+ psignal(p, SIGKILL);
+ else {
+ psignal(p, SIGXCPU);
+ if (rlim->rlim_cur < rlim->rlim_max)
+ rlim->rlim_cur += 5;
+ }
+ }
+ if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) {
+ p->p_nice = NZERO + 4;
+ resetpriority(p);
+ }
+
+ /*
+ * Pick a new current process and record its start time.
+ */
+ cnt.v_swtch++;
+ cpu_switch(p);
+ microtime(&runtime);
+}
+
+/*
+ * Initialize the (doubly-linked) run queues
+ * to be empty.
+ */
+rqinit()
+{
+ register int i;
+
+ for (i = 0; i < NQS; i++)
+ qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
+}
+
+/*
+ * Change process state to be runnable,
+ * placing it on the run queue if it is in memory,
+ * and awakening the swapper if it isn't in memory.
+ */
+void
+setrunnable(p)
+ register struct proc *p;
+{
+ register int s;
+
+ s = splhigh();
+ switch (p->p_stat) {
+ case 0:
+ case SRUN:
+ case SZOMB:
+ default:
+ panic("setrunnable");
+ case SSTOP:
+ case SSLEEP:
+ unsleep(p); /* e.g. when sending signals */
+ break;
+
+ case SIDL:
+ break;
+ }
+ p->p_stat = SRUN;
+ if (p->p_flag & P_INMEM)
+ setrunqueue(p);
+ splx(s);
+ if (p->p_slptime > 1)
+ updatepri(p);
+ p->p_slptime = 0;
+ if ((p->p_flag & P_INMEM) == 0)
+ wakeup((caddr_t)&proc0);
+ else if (p->p_priority < curpriority)
+ need_resched();
+}
+
+/*
+ * Compute the priority of a process when running in user mode.
+ * Arrange to reschedule if the resulting priority is better
+ * than that of the current process.
+ */
+void
+resetpriority(p)
+ register struct proc *p;
+{
+ register unsigned int newpriority;
+
+ newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
+ newpriority = min(newpriority, MAXPRI);
+ p->p_usrpri = newpriority;
+ if (newpriority < curpriority)
+ need_resched();
+}
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
new file mode 100644
index 0000000..ae16dec
--- /dev/null
+++ b/sys/kern/kern_sysctl.c
@@ -0,0 +1,787 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Karels at Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94
+ */
+
+/*
+ * sysctl system call.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/unistd.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+sysctlfn kern_sysctl;
+sysctlfn hw_sysctl;
+#ifdef DEBUG
+sysctlfn debug_sysctl;
+#endif
+extern sysctlfn vm_sysctl;
+extern sysctlfn fs_sysctl;
+extern sysctlfn net_sysctl;
+extern sysctlfn cpu_sysctl;
+
+/*
+ * Locking and stats
+ */
+static struct sysctl_lock {
+ int sl_lock;
+ int sl_want;
+ int sl_locked;
+} memlock;
+
+struct sysctl_args {
+ int *name;
+ u_int namelen;
+ void *old;
+ size_t *oldlenp;
+ void *new;
+ size_t newlen;
+};
+
+int
+__sysctl(p, uap, retval)
+ struct proc *p;
+ register struct sysctl_args *uap;
+ int *retval;
+{
+ int error, dolock = 1;
+ u_int savelen, oldlen = 0;
+ sysctlfn *fn;
+ int name[CTL_MAXNAME];
+
+ if (uap->new != NULL && (error = suser(p->p_ucred, &p->p_acflag)))
+ return (error);
+ /*
+ * all top-level sysctl names are non-terminal
+ */
+ if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
+ return (EINVAL);
+ if (error = copyin(uap->name, &name, uap->namelen * sizeof(int)))
+ return (error);
+
+ switch (name[0]) {
+ case CTL_KERN:
+ fn = kern_sysctl;
+ if (name[2] != KERN_VNODE) /* XXX */
+ dolock = 0;
+ break;
+ case CTL_HW:
+ fn = hw_sysctl;
+ break;
+ case CTL_VM:
+ fn = vm_sysctl;
+ break;
+ case CTL_NET:
+ fn = net_sysctl;
+ break;
+#ifdef notyet
+ case CTL_FS:
+ fn = fs_sysctl;
+ break;
+#endif
+ case CTL_MACHDEP:
+ fn = cpu_sysctl;
+ break;
+#ifdef DEBUG
+ case CTL_DEBUG:
+ fn = debug_sysctl;
+ break;
+#endif
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ if (uap->oldlenp &&
+ (error = copyin(uap->oldlenp, &oldlen, sizeof(oldlen))))
+ return (error);
+ if (uap->old != NULL) {
+ if (!useracc(uap->old, oldlen, B_WRITE))
+ return (EFAULT);
+ while (memlock.sl_lock) {
+ memlock.sl_want = 1;
+ sleep((caddr_t)&memlock, PRIBIO+1);
+ memlock.sl_locked++;
+ }
+ memlock.sl_lock = 1;
+ if (dolock)
+ vslock(uap->old, oldlen);
+ savelen = oldlen;
+ }
+ error = (*fn)(name + 1, uap->namelen - 1, uap->old, &oldlen,
+ uap->new, uap->newlen, p);
+ if (uap->old != NULL) {
+ if (dolock)
+ vsunlock(uap->old, savelen, B_WRITE);
+ memlock.sl_lock = 0;
+ if (memlock.sl_want) {
+ memlock.sl_want = 0;
+ wakeup((caddr_t)&memlock);
+ }
+ }
+ if (error)
+ return (error);
+ if (uap->oldlenp)
+ error = copyout(&oldlen, uap->oldlenp, sizeof(oldlen));
+ *retval = oldlen;
+ return (0);
+}
+
+/*
+ * Attributes stored in the kernel.
+ */
+char hostname[MAXHOSTNAMELEN];
+int hostnamelen;
+long hostid;
+int securelevel;
+
+/*
+ * kernel related system variables.
+ */
+kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ int error, level, inthostid;
+ extern char ostype[], osrelease[], version[];
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1 && !(name[0] == KERN_PROC || name[0] == KERN_PROF))
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case KERN_OSTYPE:
+ return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
+ case KERN_OSRELEASE:
+ return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
+ case KERN_OSREV:
+ return (sysctl_rdint(oldp, oldlenp, newp, BSD));
+ case KERN_VERSION:
+ return (sysctl_rdstring(oldp, oldlenp, newp, version));
+ case KERN_MAXVNODES:
+ return(sysctl_int(oldp, oldlenp, newp, newlen, &desiredvnodes));
+ case KERN_MAXPROC:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &maxproc));
+ case KERN_MAXFILES:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &maxfiles));
+ case KERN_ARGMAX:
+ return (sysctl_rdint(oldp, oldlenp, newp, ARG_MAX));
+ case KERN_SECURELVL:
+ level = securelevel;
+ if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) ||
+ newp == NULL)
+ return (error);
+ if (level < securelevel && p->p_pid != 1)
+ return (EPERM);
+ securelevel = level;
+ return (0);
+ case KERN_HOSTNAME:
+ error = sysctl_string(oldp, oldlenp, newp, newlen,
+ hostname, sizeof(hostname));
+ if (newp && !error)
+ hostnamelen = newlen;
+ return (error);
+ case KERN_HOSTID:
+ inthostid = hostid; /* XXX assumes sizeof long <= sizeof int */
+ error = sysctl_int(oldp, oldlenp, newp, newlen, &inthostid);
+ hostid = inthostid;
+ return (error);
+ case KERN_CLOCKRATE:
+ return (sysctl_clockrate(oldp, oldlenp));
+ case KERN_BOOTTIME:
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime,
+ sizeof(struct timeval)));
+ case KERN_VNODE:
+ return (sysctl_vnode(oldp, oldlenp));
+ case KERN_PROC:
+ return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp));
+ case KERN_FILE:
+ return (sysctl_file(oldp, oldlenp));
+#ifdef GPROF
+ case KERN_PROF:
+ return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
+ newp, newlen));
+#endif
+ case KERN_POSIX1:
+ return (sysctl_rdint(oldp, oldlenp, newp, _POSIX_VERSION));
+ case KERN_NGROUPS:
+ return (sysctl_rdint(oldp, oldlenp, newp, NGROUPS_MAX));
+ case KERN_JOB_CONTROL:
+ return (sysctl_rdint(oldp, oldlenp, newp, 1));
+ case KERN_SAVED_IDS:
+#ifdef _POSIX_SAVED_IDS
+ return (sysctl_rdint(oldp, oldlenp, newp, 1));
+#else
+ return (sysctl_rdint(oldp, oldlenp, newp, 0));
+#endif
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * hardware related system variables.
+ */
+hw_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ extern char machine[], cpu_model[];
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case HW_MACHINE:
+ return (sysctl_rdstring(oldp, oldlenp, newp, machine));
+ case HW_MODEL:
+ return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
+ case HW_NCPU:
+ return (sysctl_rdint(oldp, oldlenp, newp, 1)); /* XXX */
+ case HW_BYTEORDER:
+ return (sysctl_rdint(oldp, oldlenp, newp, BYTE_ORDER));
+ case HW_PHYSMEM:
+ return (sysctl_rdint(oldp, oldlenp, newp, ctob(physmem)));
+ case HW_USERMEM:
+ return (sysctl_rdint(oldp, oldlenp, newp,
+ ctob(physmem - cnt.v_wire_count)));
+ case HW_PAGESIZE:
+ return (sysctl_rdint(oldp, oldlenp, newp, PAGE_SIZE));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
+#ifdef DEBUG
+/*
+ * Debugging related system variables.
+ */
+struct ctldebug debug0, debug1, debug2, debug3, debug4;
+struct ctldebug debug5, debug6, debug7, debug8, debug9;
+struct ctldebug debug10, debug11, debug12, debug13, debug14;
+struct ctldebug debug15, debug16, debug17, debug18, debug19;
+static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
+ &debug0, &debug1, &debug2, &debug3, &debug4,
+ &debug5, &debug6, &debug7, &debug8, &debug9,
+ &debug10, &debug11, &debug12, &debug13, &debug14,
+ &debug15, &debug16, &debug17, &debug18, &debug19,
+};
+int
+debug_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ struct ctldebug *cdp;
+
+ /* all sysctl names at this level are name and field */
+ if (namelen != 2)
+ return (ENOTDIR); /* overloaded */
+ cdp = debugvars[name[0]];
+ if (cdp->debugname == 0)
+ return (EOPNOTSUPP);
+ switch (name[1]) {
+ case CTL_DEBUG_NAME:
+ return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
+ case CTL_DEBUG_VALUE:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+#endif /* DEBUG */
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for an integer-valued sysctl function.
+ */
+sysctl_int(oldp, oldlenp, newp, newlen, valp)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ int *valp;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < sizeof(int))
+ return (ENOMEM);
+ if (newp && newlen != sizeof(int))
+ return (EINVAL);
+ *oldlenp = sizeof(int);
+ if (oldp)
+ error = copyout(valp, oldp, sizeof(int));
+ if (error == 0 && newp)
+ error = copyin(newp, valp, sizeof(int));
+ return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdint(oldp, oldlenp, newp, val)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ int val;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < sizeof(int))
+ return (ENOMEM);
+ if (newp)
+ return (EPERM);
+ *oldlenp = sizeof(int);
+ if (oldp)
+ error = copyout((caddr_t)&val, oldp, sizeof(int));
+ return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a string-valued sysctl function.
+ */
+sysctl_string(oldp, oldlenp, newp, newlen, str, maxlen)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ char *str;
+ int maxlen;
+{
+ int len, error = 0;
+
+ len = strlen(str) + 1;
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp && newlen >= maxlen)
+ return (EINVAL);
+ if (oldp) {
+ *oldlenp = len;
+ error = copyout(str, oldp, len);
+ }
+ if (error == 0 && newp) {
+ error = copyin(newp, str, newlen);
+ str[newlen] = 0;
+ }
+ return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdstring(oldp, oldlenp, newp, str)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ char *str;
+{
+ int len, error = 0;
+
+ len = strlen(str) + 1;
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp)
+ return (EPERM);
+ *oldlenp = len;
+ if (oldp)
+ error = copyout(str, oldp, len);
+ return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_struct(oldp, oldlenp, newp, newlen, sp, len)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ void *sp;
+ int len;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp && newlen > len)
+ return (EINVAL);
+ if (oldp) {
+ *oldlenp = len;
+ error = copyout(sp, oldp, len);
+ }
+ if (error == 0 && newp)
+ error = copyin(newp, sp, len);
+ return (error);
+}
+
+/*
+ * Validate parameters and get old parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_rdstruct(oldp, oldlenp, newp, sp, len)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp, *sp;
+ int len;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp)
+ return (EPERM);
+ *oldlenp = len;
+ if (oldp)
+ error = copyout(sp, oldp, len);
+ return (error);
+}
+
+/*
+ * Get file structures.
+ */
+sysctl_file(where, sizep)
+ char *where;
+ size_t *sizep;
+{
+ int buflen, error;
+ struct file *fp;
+ char *start = where;
+
+ buflen = *sizep;
+ if (where == NULL) {
+ /*
+ * overestimate by 10 files
+ */
+ *sizep = sizeof(filehead) + (nfiles + 10) * sizeof(struct file);
+ return (0);
+ }
+
+ /*
+ * first copyout filehead
+ */
+ if (buflen < sizeof(filehead)) {
+ *sizep = 0;
+ return (0);
+ }
+ if (error = copyout((caddr_t)&filehead, where, sizeof(filehead)))
+ return (error);
+ buflen -= sizeof(filehead);
+ where += sizeof(filehead);
+
+ /*
+ * followed by an array of file structures
+ */
+ for (fp = filehead; fp != NULL; fp = fp->f_filef) {
+ if (buflen < sizeof(struct file)) {
+ *sizep = where - start;
+ return (ENOMEM);
+ }
+ if (error = copyout((caddr_t)fp, where, sizeof (struct file)))
+ return (error);
+ buflen -= sizeof(struct file);
+ where += sizeof(struct file);
+ }
+ *sizep = where - start;
+ return (0);
+}
+
+/*
+ * try over estimating by 5 procs
+ */
+#define KERN_PROCSLOP (5 * sizeof (struct kinfo_proc))
+
+sysctl_doproc(name, namelen, where, sizep)
+ int *name;
+ u_int namelen;
+ char *where;
+ size_t *sizep;
+{
+ register struct proc *p;
+ register struct kinfo_proc *dp = (struct kinfo_proc *)where;
+ register int needed = 0;
+ int buflen = where != NULL ? *sizep : 0;
+ int doingzomb;
+ struct eproc eproc;
+ int error = 0;
+
+ if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL))
+ return (EINVAL);
+ p = (struct proc *)allproc;
+ doingzomb = 0;
+again:
+ for (; p != NULL; p = p->p_next) {
+ /*
+ * Skip embryonic processes.
+ */
+ if (p->p_stat == SIDL)
+ continue;
+ /*
+ * TODO - make more efficient (see notes below).
+ * do by session.
+ */
+ switch (name[0]) {
+
+ case KERN_PROC_PID:
+ /* could do this with just a lookup */
+ if (p->p_pid != (pid_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_PGRP:
+ /* could do this by traversing pgrp */
+ if (p->p_pgrp->pg_id != (pid_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_TTY:
+ if ((p->p_flag & P_CONTROLT) == 0 ||
+ p->p_session->s_ttyp == NULL ||
+ p->p_session->s_ttyp->t_dev != (dev_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_UID:
+ if (p->p_ucred->cr_uid != (uid_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_RUID:
+ if (p->p_cred->p_ruid != (uid_t)name[1])
+ continue;
+ break;
+ }
+ if (buflen >= sizeof(struct kinfo_proc)) {
+ fill_eproc(p, &eproc);
+ if (error = copyout((caddr_t)p, &dp->kp_proc,
+ sizeof(struct proc)))
+ return (error);
+ if (error = copyout((caddr_t)&eproc, &dp->kp_eproc,
+ sizeof(eproc)))
+ return (error);
+ dp++;
+ buflen -= sizeof(struct kinfo_proc);
+ }
+ needed += sizeof(struct kinfo_proc);
+ }
+ if (doingzomb == 0) {
+ p = zombproc;
+ doingzomb++;
+ goto again;
+ }
+ if (where != NULL) {
+ *sizep = (caddr_t)dp - where;
+ if (needed > *sizep)
+ return (ENOMEM);
+ } else {
+ needed += KERN_PROCSLOP;
+ *sizep = needed;
+ }
+ return (0);
+}
+
+/*
+ * Fill in an eproc structure for the specified process.
+ */
+void
+fill_eproc(p, ep)
+ register struct proc *p;
+ register struct eproc *ep;
+{
+ register struct tty *tp;
+
+ ep->e_paddr = p;
+ ep->e_sess = p->p_pgrp->pg_session;
+ ep->e_pcred = *p->p_cred;
+ ep->e_ucred = *p->p_ucred;
+ if (p->p_stat == SIDL || p->p_stat == SZOMB) {
+ ep->e_vm.vm_rssize = 0;
+ ep->e_vm.vm_tsize = 0;
+ ep->e_vm.vm_dsize = 0;
+ ep->e_vm.vm_ssize = 0;
+#ifndef sparc
+ /* ep->e_vm.vm_pmap = XXX; */
+#endif
+ } else {
+ register struct vmspace *vm = p->p_vmspace;
+
+#ifdef pmap_resident_count
+ ep->e_vm.vm_rssize = pmap_resident_count(&vm->vm_pmap); /*XXX*/
+#else
+ ep->e_vm.vm_rssize = vm->vm_rssize;
+#endif
+ ep->e_vm.vm_tsize = vm->vm_tsize;
+ ep->e_vm.vm_dsize = vm->vm_dsize;
+ ep->e_vm.vm_ssize = vm->vm_ssize;
+#ifndef sparc
+ ep->e_vm.vm_pmap = vm->vm_pmap;
+#endif
+ }
+ if (p->p_pptr)
+ ep->e_ppid = p->p_pptr->p_pid;
+ else
+ ep->e_ppid = 0;
+ ep->e_pgid = p->p_pgrp->pg_id;
+ ep->e_jobc = p->p_pgrp->pg_jobc;
+ if ((p->p_flag & P_CONTROLT) &&
+ (tp = ep->e_sess->s_ttyp)) {
+ ep->e_tdev = tp->t_dev;
+ ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+ ep->e_tsess = tp->t_session;
+ } else
+ ep->e_tdev = NODEV;
+ ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0;
+ if (SESS_LEADER(p))
+ ep->e_flag |= EPROC_SLEADER;
+ if (p->p_wmesg)
+ strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN);
+ ep->e_xsize = ep->e_xrssize = 0;
+ ep->e_xccount = ep->e_xswrss = 0;
+}
+
+#ifdef COMPAT_43
+#include <sys/socket.h>
+#define KINFO_PROC (0<<8)
+#define KINFO_RT (1<<8)
+#define KINFO_VNODE (2<<8)
+#define KINFO_FILE (3<<8)
+#define KINFO_METER (4<<8)
+#define KINFO_LOADAVG (5<<8)
+#define KINFO_CLOCKRATE (6<<8)
+
+struct getkerninfo_args {
+ int op;
+ char *where;
+ int *size;
+ int arg;
+};
+
+ogetkerninfo(p, uap, retval)
+ struct proc *p;
+ register struct getkerninfo_args *uap;
+ int *retval;
+{
+ int error, name[5];
+ u_int size;
+
+ if (uap->size &&
+ (error = copyin((caddr_t)uap->size, (caddr_t)&size, sizeof(size))))
+ return (error);
+
+ switch (uap->op & 0xff00) {
+
+ case KINFO_RT:
+ name[0] = PF_ROUTE;
+ name[1] = 0;
+ name[2] = (uap->op & 0xff0000) >> 16;
+ name[3] = uap->op & 0xff;
+ name[4] = uap->arg;
+ error = net_sysctl(name, 5, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_VNODE:
+ name[0] = KERN_VNODE;
+ error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_PROC:
+ name[0] = KERN_PROC;
+ name[1] = uap->op & 0xff;
+ name[2] = uap->arg;
+ error = kern_sysctl(name, 3, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_FILE:
+ name[0] = KERN_FILE;
+ error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_METER:
+ name[0] = VM_METER;
+ error = vm_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_LOADAVG:
+ name[0] = VM_LOADAVG;
+ error = vm_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_CLOCKRATE:
+ name[0] = KERN_CLOCKRATE;
+ error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ default:
+ return (EOPNOTSUPP);
+ }
+ if (error)
+ return (error);
+ *retval = size;
+ if (uap->size)
+ error = copyout((caddr_t)&size, (caddr_t)uap->size,
+ sizeof(size));
+ return (error);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
new file mode 100644
index 0000000..4dadcb8
--- /dev/null
+++ b/sys/kern/kern_time.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_time.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#include <machine/cpu.h>
+
+/*
+ * Time of day and interval timer support.
+ *
+ * These routines provide the kernel entry points to get and set
+ * the time-of-day and per-process interval timers. Subroutines
+ * here provide support for adding and subtracting timeval structures
+ * and decrementing interval timers, optionally reloading the interval
+ * timers when they expire.
+ */
+
+struct gettimeofday_args {
+ struct timeval *tp;
+ struct timezone *tzp;
+};
+/* ARGSUSED */
+gettimeofday(p, uap, retval)
+ struct proc *p;
+ register struct gettimeofday_args *uap;
+ int *retval;
+{
+ struct timeval atv;
+ int error = 0;
+
+ if (uap->tp) {
+ microtime(&atv);
+ if (error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
+ sizeof (atv)))
+ return (error);
+ }
+ if (uap->tzp)
+ error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
+ sizeof (tz));
+ return (error);
+}
+
+struct settimeofday_args {
+ struct timeval *tv;
+ struct timezone *tzp;
+};
+/* ARGSUSED */
+settimeofday(p, uap, retval)
+ struct proc *p;
+ struct settimeofday_args *uap;
+ int *retval;
+{
+ struct timeval atv, delta;
+ struct timezone atz;
+ int error, s;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ /* Verify all parameters before changing time. */
+ if (uap->tv &&
+ (error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof(atv))))
+ return (error);
+ if (uap->tzp &&
+ (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
+ return (error);
+ if (uap->tv) {
+ /* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
+ s = splclock();
+ /* nb. delta.tv_usec may be < 0, but this is OK here */
+ delta.tv_sec = atv.tv_sec - time.tv_sec;
+ delta.tv_usec = atv.tv_usec - time.tv_usec;
+ time = atv;
+ (void) splsoftclock();
+ timevaladd(&boottime, &delta);
+ timevalfix(&boottime);
+ timevaladd(&runtime, &delta);
+ timevalfix(&runtime);
+ LEASE_UPDATETIME(delta.tv_sec);
+ splx(s);
+ resettodr();
+ }
+ if (uap->tzp)
+ tz = atz;
+ return (0);
+}
+
+extern int tickadj; /* "standard" clock skew, us./tick */
+int tickdelta; /* current clock skew, us. per tick */
+long timedelta; /* unapplied time correction, us. */
+long bigadj = 1000000; /* use 10x skew above bigadj us. */
+
+struct adjtime_args {
+ struct timeval *delta;
+ struct timeval *olddelta;
+};
+/* ARGSUSED */
+adjtime(p, uap, retval)
+ struct proc *p;
+ register struct adjtime_args *uap;
+ int *retval;
+{
+ struct timeval atv;
+ register long ndelta, ntickdelta, odelta;
+ int s, error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ if (error =
+ copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval)))
+ return (error);
+
+ /*
+ * Compute the total correction and the rate at which to apply it.
+ * Round the adjustment down to a whole multiple of the per-tick
+ * delta, so that after some number of incremental changes in
+ * hardclock(), tickdelta will become zero, lest the correction
+ * overshoot and start taking us away from the desired final time.
+ */
+ ndelta = atv.tv_sec * 1000000 + atv.tv_usec;
+ if (ndelta > bigadj)
+ ntickdelta = 10 * tickadj;
+ else
+ ntickdelta = tickadj;
+ if (ndelta % ntickdelta)
+ ndelta = ndelta / ntickdelta * ntickdelta;
+
+ /*
+ * To make hardclock()'s job easier, make the per-tick delta negative
+ * if we want time to run slower; then hardclock can simply compute
+ * tick + tickdelta, and subtract tickdelta from timedelta.
+ */
+ if (ndelta < 0)
+ ntickdelta = -ntickdelta;
+ s = splclock();
+ odelta = timedelta;
+ timedelta = ndelta;
+ tickdelta = ntickdelta;
+ splx(s);
+
+ if (uap->olddelta) {
+ atv.tv_sec = odelta / 1000000;
+ atv.tv_usec = odelta % 1000000;
+ (void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta,
+ sizeof(struct timeval));
+ }
+ return (0);
+}
+
+/*
+ * Get value of an interval timer. The process virtual and
+ * profiling virtual time timers are kept in the p_stats area, since
+ * they can be swapped out. These are kept internally in the
+ * way they are specified externally: in time until they expire.
+ *
+ * The real time interval timer is kept in the process table slot
+ * for the process, and its value (it_value) is kept as an
+ * absolute time rather than as a delta, so that it is easy to keep
+ * periodic real-time signals from drifting.
+ *
+ * Virtual time timers are processed in the hardclock() routine of
+ * kern_clock.c. The real time timer is processed by a timeout
+ * routine, called from the softclock() routine. Since a callout
+ * may be delayed in real time due to interrupt processing in the system,
+ * it is possible for the real time timeout routine (realitexpire, given below),
+ * to be delayed in real time past when it is supposed to occur. It
+ * does not suffice, therefore, to reload the real timer .it_value from the
+ * real time timers .it_interval. Rather, we compute the next time in
+ * absolute time the timer should go off.
+ */
+struct getitimer_args {
+ u_int which;
+ struct itimerval *itv;
+};
+/* ARGSUSED */
+getitimer(p, uap, retval)
+ struct proc *p;
+ register struct getitimer_args *uap;
+ int *retval;
+{
+ struct itimerval aitv;
+ int s;
+
+ if (uap->which > ITIMER_PROF)
+ return (EINVAL);
+ s = splclock();
+ if (uap->which == ITIMER_REAL) {
+ /*
+ * Convert from absoulte to relative time in .it_value
+ * part of real time timer. If time for real time timer
+ * has passed return 0, else return difference between
+ * current time and time for the timer to go off.
+ */
+ aitv = p->p_realtimer;
+ if (timerisset(&aitv.it_value))
+ if (timercmp(&aitv.it_value, &time, <))
+ timerclear(&aitv.it_value);
+ else
+ timevalsub(&aitv.it_value,
+ (struct timeval *)&time);
+ } else
+ aitv = p->p_stats->p_timer[uap->which];
+ splx(s);
+ return (copyout((caddr_t)&aitv, (caddr_t)uap->itv,
+ sizeof (struct itimerval)));
+}
+
+struct setitimer_args {
+ u_int which;
+ struct itimerval *itv, *oitv;
+};
+/* ARGSUSED */
+setitimer(p, uap, retval)
+ struct proc *p;
+ register struct setitimer_args *uap;
+ int *retval;
+{
+ struct itimerval aitv;
+ register struct itimerval *itvp;
+ int s, error;
+
+ if (uap->which > ITIMER_PROF)
+ return (EINVAL);
+ itvp = uap->itv;
+ if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
+ sizeof(struct itimerval))))
+ return (error);
+ if ((uap->itv = uap->oitv) && (error = getitimer(p, uap, retval)))
+ return (error);
+ if (itvp == 0)
+ return (0);
+ if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval))
+ return (EINVAL);
+ s = splclock();
+ if (uap->which == ITIMER_REAL) {
+ untimeout(realitexpire, (caddr_t)p);
+ if (timerisset(&aitv.it_value)) {
+ timevaladd(&aitv.it_value, (struct timeval *)&time);
+ timeout(realitexpire, (caddr_t)p, hzto(&aitv.it_value));
+ }
+ p->p_realtimer = aitv;
+ } else
+ p->p_stats->p_timer[uap->which] = aitv;
+ splx(s);
+ return (0);
+}
+
+/*
+ * Real interval timer expired:
+ * send process whose timer expired an alarm signal.
+ * If time is not set up to reload, then just return.
+ * Else compute next time timer should go off which is > current time.
+ * This is where delay in processing this timeout causes multiple
+ * SIGALRM calls to be compressed into one.
+ */
+void
+realitexpire(arg)
+ void *arg;
+{
+ register struct proc *p;
+ int s;
+
+ p = (struct proc *)arg;
+ psignal(p, SIGALRM);
+ if (!timerisset(&p->p_realtimer.it_interval)) {
+ timerclear(&p->p_realtimer.it_value);
+ return;
+ }
+ for (;;) {
+ s = splclock();
+ timevaladd(&p->p_realtimer.it_value,
+ &p->p_realtimer.it_interval);
+ if (timercmp(&p->p_realtimer.it_value, &time, >)) {
+ timeout(realitexpire, (caddr_t)p,
+ hzto(&p->p_realtimer.it_value));
+ splx(s);
+ return;
+ }
+ splx(s);
+ }
+}
+
+/*
+ * Check that a proposed value to load into the .it_value or
+ * .it_interval part of an interval timer is acceptable, and
+ * fix it to have at least minimal value (i.e. if it is less
+ * than the resolution of the clock, round it up.)
+ */
+itimerfix(tv)
+ struct timeval *tv;
+{
+
+ if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
+ tv->tv_usec < 0 || tv->tv_usec >= 1000000)
+ return (EINVAL);
+ if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
+ tv->tv_usec = tick;
+ return (0);
+}
+
+/*
+ * Decrement an interval timer by a specified number
+ * of microseconds, which must be less than a second,
+ * i.e. < 1000000. If the timer expires, then reload
+ * it. In this case, carry over (usec - old value) to
+ * reduce the value reloaded into the timer so that
+ * the timer does not drift. This routine assumes
+ * that it is called in a context where the timers
+ * on which it is operating cannot change in value.
+ */
+itimerdecr(itp, usec)
+ register struct itimerval *itp;
+ int usec;
+{
+
+ if (itp->it_value.tv_usec < usec) {
+ if (itp->it_value.tv_sec == 0) {
+ /* expired, and already in next interval */
+ usec -= itp->it_value.tv_usec;
+ goto expire;
+ }
+ itp->it_value.tv_usec += 1000000;
+ itp->it_value.tv_sec--;
+ }
+ itp->it_value.tv_usec -= usec;
+ usec = 0;
+ if (timerisset(&itp->it_value))
+ return (1);
+ /* expired, exactly at end of interval */
+expire:
+ if (timerisset(&itp->it_interval)) {
+ itp->it_value = itp->it_interval;
+ itp->it_value.tv_usec -= usec;
+ if (itp->it_value.tv_usec < 0) {
+ itp->it_value.tv_usec += 1000000;
+ itp->it_value.tv_sec--;
+ }
+ } else
+ itp->it_value.tv_usec = 0; /* sec is already 0 */
+ return (0);
+}
+
+/*
+ * Add and subtract routines for timevals.
+ * N.B.: subtract routine doesn't deal with
+ * results which are before the beginning,
+ * it just gets very confused in this case.
+ * Caveat emptor.
+ */
+timevaladd(t1, t2)
+ struct timeval *t1, *t2;
+{
+
+ t1->tv_sec += t2->tv_sec;
+ t1->tv_usec += t2->tv_usec;
+ timevalfix(t1);
+}
+
+timevalsub(t1, t2)
+ struct timeval *t1, *t2;
+{
+
+ t1->tv_sec -= t2->tv_sec;
+ t1->tv_usec -= t2->tv_usec;
+ timevalfix(t1);
+}
+
+timevalfix(t1)
+ struct timeval *t1;
+{
+
+ if (t1->tv_usec < 0) {
+ t1->tv_sec--;
+ t1->tv_usec += 1000000;
+ }
+ if (t1->tv_usec >= 1000000) {
+ t1->tv_sec++;
+ t1->tv_usec -= 1000000;
+ }
+}
diff --git a/sys/kern/kern_xxx.c b/sys/kern/kern_xxx.c
new file mode 100644
index 0000000..64fac91
--- /dev/null
+++ b/sys/kern/kern_xxx.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_xxx.c 8.2 (Berkeley) 11/14/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/reboot.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+struct reboot_args {
+ int opt;
+};
+/* ARGSUSED */
+reboot(p, uap, retval)
+ struct proc *p;
+ struct reboot_args *uap;
+ int *retval;
+{
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ boot(uap->opt);
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+struct gethostname_args {
+ char *hostname;
+ u_int len;
+};
+/* ARGSUSED */
+ogethostname(p, uap, retval)
+ struct proc *p;
+ struct gethostname_args *uap;
+ int *retval;
+{
+ int name;
+
+ name = KERN_HOSTNAME;
+ return (kern_sysctl(&name, 1, uap->hostname, &uap->len, 0, 0));
+}
+
+struct sethostname_args {
+ char *hostname;
+ u_int len;
+};
+/* ARGSUSED */
+osethostname(p, uap, retval)
+ struct proc *p;
+ register struct sethostname_args *uap;
+ int *retval;
+{
+ int name;
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ name = KERN_HOSTNAME;
+ return (kern_sysctl(&name, 1, 0, 0, uap->hostname, uap->len));
+}
+
+extern long hostid;
+
+struct gethostid_args {
+ int dummy;
+};
+/* ARGSUSED */
+ogethostid(p, uap, retval)
+ struct proc *p;
+ struct gethostid_args *uap;
+ int *retval;
+{
+
+ *(long *)retval = hostid;
+ return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+#ifdef COMPAT_43
+struct sethostid_args {
+ long hostid;
+};
+/* ARGSUSED */
+osethostid(p, uap, retval)
+ struct proc *p;
+ struct sethostid_args *uap;
+ int *retval;
+{
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ hostid = uap->hostid;
+ return (0);
+}
+
+oquota()
+{
+
+ return (ENOSYS);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh
new file mode 100644
index 0000000..0ddea0c
--- /dev/null
+++ b/sys/kern/makesyscalls.sh
@@ -0,0 +1,171 @@
+#! /bin/sh -
+# @(#)makesyscalls.sh 8.1 (Berkeley) 6/10/93
+
+set -e
+
+# name of compat option:
+compat=COMPAT_43
+
+# output files:
+sysnames="syscalls.c"
+syshdr="../sys/syscall.h"
+syssw="init_sysent.c"
+
+# tmp files:
+sysdcl="sysent.dcl"
+syscompat="sysent.compat"
+sysent="sysent.switch"
+
+trap "rm $sysdcl $syscompat $sysent" 0
+
+case $# in
+ 0) echo "Usage: $0 input-file" 1>&2
+ exit 1
+ ;;
+esac
+
+awk < $1 "
+ BEGIN {
+ sysdcl = \"$sysdcl\"
+ syscompat = \"$syscompat\"
+ sysent = \"$sysent\"
+ sysnames = \"$sysnames\"
+ syshdr = \"$syshdr\"
+ compat = \"$compat\"
+ infile = \"$1\"
+ "'
+
+ printf "/*\n * System call switch table.\n *\n" > sysdcl
+ printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysdcl
+
+ printf "\n#ifdef %s\n", compat > syscompat
+ printf "#define compat(n, name) n, __CONCAT(o,name)\n\n" > syscompat
+
+ printf "/*\n * System call names.\n *\n" > sysnames
+ printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames
+
+ printf "/*\n * System call numbers.\n *\n" > syshdr
+ printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr
+ }
+ NR == 1 {
+ printf " * created from%s\n */\n\n", $0 > sysdcl
+ printf "#include <sys/param.h>\n" > sysdcl
+ printf "#include <sys/systm.h>\n\n" > sysdcl
+ printf "int\tnosys();\n\n" > sysdcl
+
+ printf "struct sysent sysent[] = {\n" > sysent
+
+ printf " * created from%s\n */\n\n", $0 > sysnames
+ printf "char *syscallnames[] = {\n" > sysnames
+
+ printf " * created from%s\n */\n\n", $0 > syshdr
+ next
+ }
+ NF == 0 || $1 ~ /^;/ {
+ next
+ }
+ $1 ~ /^#[ ]*if/ {
+ print > sysent
+ print > sysdcl
+ print > syscompat
+ print > sysnames
+ savesyscall = syscall
+ next
+ }
+ $1 ~ /^#[ ]*else/ {
+ print > sysent
+ print > sysdcl
+ print > syscompat
+ print > sysnames
+ syscall = savesyscall
+ next
+ }
+ $1 ~ /^#/ {
+ print > sysent
+ print > sysdcl
+ print > syscompat
+ print > sysnames
+ next
+ }
+ syscall != $1 {
+ printf "%s: line %d: syscall number out of sync at %d\n", \
+ infile, NR, syscall
+ printf "line is:\n"
+ print
+ exit 1
+ }
+ { comment = $4
+ for (i = 5; i <= NF; i++)
+ comment = comment " " $i
+ if (NF < 5)
+ $5 = $4
+ }
+ $2 == "STD" {
+ printf("int\t%s();\n", $4) > sysdcl
+ printf("\t{ %d, %s },\t\t\t/* %d = %s */\n", \
+ $3, $4, syscall, $5) > sysent
+ printf("\t\"%s\",\t\t\t/* %d = %s */\n", \
+ $5, syscall, $5) > sysnames
+ printf("#define\tSYS_%s\t%d\n", \
+ $5, syscall) > syshdr
+ syscall++
+ next
+ }
+ $2 == "COMPAT" {
+ printf("int\to%s();\n", $4) > syscompat
+ printf("\t{ compat(%d,%s) },\t\t/* %d = old %s */\n", \
+ $3, $4, syscall, $5) > sysent
+ printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \
+ $5, syscall, $5) > sysnames
+ printf("\t\t\t\t/* %d is old %s */\n", \
+ syscall, comment) > syshdr
+ syscall++
+ next
+ }
+ $2 == "LIBCOMPAT" {
+ printf("int\to%s();\n", $4) > syscompat
+ printf("\t{ compat(%d,%s) },\t\t/* %d = old %s */\n", \
+ $3, $4, syscall, $5) > sysent
+ printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \
+ $5, syscall, $5) > sysnames
+ printf("#define\tSYS_%s\t%d\t/* compatibility; still used by libc */\n", \
+ $5, syscall) > syshdr
+ syscall++
+ next
+ }
+ $2 == "OBSOL" {
+ printf("\t{ 0, nosys },\t\t\t/* %d = obsolete %s */\n", \
+ syscall, comment) > sysent
+ printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", \
+ $4, syscall, comment) > sysnames
+ printf("\t\t\t\t/* %d is obsolete %s */\n", \
+ syscall, comment) > syshdr
+ syscall++
+ next
+ }
+ $2 == "UNIMPL" {
+ printf("\t{ 0, nosys },\t\t\t/* %d = %s */\n", \
+ syscall, comment) > sysent
+ printf("\t\"#%d\",\t\t\t/* %d = %s */\n", \
+ syscall, syscall, comment) > sysnames
+ syscall++
+ next
+ }
+ {
+ printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $2
+ exit 1
+ }
+ END {
+ printf("\n#else /* %s */\n", compat) > syscompat
+ printf("#define compat(n, name) 0, nosys\n") > syscompat
+ printf("#endif /* %s */\n\n", compat) > syscompat
+
+ printf("};\n\n") > sysent
+ printf("int\tnsysent = sizeof(sysent) / sizeof(sysent[0]);\n") > sysent
+
+ printf("};\n") > sysnames
+ } '
+
+cat $sysdcl $syscompat $sysent >$syssw
+
+chmod 444 $sysnames $syshdr $syssw
diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c
new file mode 100644
index 0000000..af17988
--- /dev/null
+++ b/sys/kern/subr_autoconf.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratories.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_autoconf.c 8.1 (Berkeley) 6/10/93
+ *
+ * from: $Header: subr_autoconf.c,v 1.12 93/02/01 19:31:48 torek Exp $ (LBL)
+ */
+
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+
+/*
+ * Autoconfiguration subroutines.
+ */
+
+/*
+ * ioconf.c exports exactly two names: cfdata and cfroots. All system
+ * devices and drivers are found via these tables.
+ */
+extern struct cfdata cfdata[];
+extern short cfroots[];
+
+#define ROOT ((struct device *)NULL)
+
+struct matchinfo {
+ cfmatch_t fn;
+ struct device *parent;
+ void *aux;
+ struct cfdata *match;
+ int pri;
+};
+
+/*
+ * Apply the matching function and choose the best. This is used
+ * a few times and we want to keep the code small.
+ */
+static void
+mapply(m, cf)
+ register struct matchinfo *m;
+ register struct cfdata *cf;
+{
+ register int pri;
+
+ if (m->fn != NULL)
+ pri = (*m->fn)(m->parent, cf, m->aux);
+ else
+ pri = (*cf->cf_driver->cd_match)(m->parent, cf, m->aux);
+ if (pri > m->pri) {
+ m->match = cf;
+ m->pri = pri;
+ }
+}
+
+/*
+ * Iterate over all potential children of some device, calling the given
+ * function (default being the child's match function) for each one.
+ * Nonzero returns are matches; the highest value returned is considered
+ * the best match. Return the `found child' if we got a match, or NULL
+ * otherwise. The `aux' pointer is simply passed on through.
+ *
+ * Note that this function is designed so that it can be used to apply
+ * an arbitrary function to all potential children (its return value
+ * can be ignored).
+ */
+struct cfdata *
+config_search(fn, parent, aux)
+ cfmatch_t fn;
+ register struct device *parent;
+ void *aux;
+{
+ register struct cfdata *cf;
+ register short *p;
+ struct matchinfo m;
+
+ m.fn = fn;
+ m.parent = parent;
+ m.aux = aux;
+ m.match = NULL;
+ m.pri = 0;
+ for (cf = cfdata; cf->cf_driver; cf++) {
+ /*
+ * Skip cf if no longer eligible, otherwise scan through
+ * parents for one matching `parent', and try match function.
+ */
+ if (cf->cf_fstate == FSTATE_FOUND)
+ continue;
+ for (p = cf->cf_parents; *p >= 0; p++)
+ if (parent->dv_cfdata == &cfdata[*p])
+ mapply(&m, cf);
+ }
+ return (m.match);
+}
+
+/*
+ * Find the given root device.
+ * This is much like config_search, but there is no parent.
+ */
+struct cfdata *
+config_rootsearch(fn, rootname, aux)
+ register cfmatch_t fn;
+ register char *rootname;
+ register void *aux;
+{
+ register struct cfdata *cf;
+ register short *p;
+ struct matchinfo m;
+
+ m.fn = fn;
+ m.parent = ROOT;
+ m.aux = aux;
+ m.match = NULL;
+ m.pri = 0;
+ /*
+ * Look at root entries for matching name. We do not bother
+ * with found-state here since only one root should ever be
+ * searched (and it must be done first).
+ */
+ for (p = cfroots; *p >= 0; p++) {
+ cf = &cfdata[*p];
+ if (strcmp(cf->cf_driver->cd_name, rootname) == 0)
+ mapply(&m, cf);
+ }
+ return (m.match);
+}
+
+static char *msgs[3] = { "", " not configured\n", " unsupported\n" };
+
+/*
+ * The given `aux' argument describes a device that has been found
+ * on the given parent, but not necessarily configured. Locate the
+ * configuration data for that device (using the cd_match configuration
+ * driver function) and attach it, and return true. If the device was
+ * not configured, call the given `print' function and return 0.
+ */
+int
+config_found(parent, aux, print)
+ struct device *parent;
+ void *aux;
+ cfprint_t print;
+{
+ struct cfdata *cf;
+
+ if ((cf = config_search((cfmatch_t)NULL, parent, aux)) != NULL) {
+ config_attach(parent, cf, aux, print);
+ return (1);
+ }
+ printf(msgs[(*print)(aux, parent->dv_xname)]);
+ return (0);
+}
+
+/*
+ * As above, but for root devices.
+ */
+int
+config_rootfound(rootname, aux)
+ char *rootname;
+ void *aux;
+{
+ struct cfdata *cf;
+
+ if ((cf = config_rootsearch((cfmatch_t)NULL, rootname, aux)) != NULL) {
+ config_attach(ROOT, cf, aux, (cfprint_t)NULL);
+ return (1);
+ }
+ printf("root device %s not configured\n", rootname);
+ return (0);
+}
+
+/* just like sprintf(buf, "%d") except that it works from the end */
+static char *
+number(ep, n)
+ register char *ep;
+ register int n;
+{
+
+ *--ep = 0;
+ while (n >= 10) {
+ *--ep = (n % 10) + '0';
+ n /= 10;
+ }
+ *--ep = n + '0';
+ return (ep);
+}
+
+/*
+ * Attach a found device. Allocates memory for device variables.
+ */
+void
+config_attach(parent, cf, aux, print)
+ register struct device *parent;
+ register struct cfdata *cf;
+ register void *aux;
+ cfprint_t print;
+{
+ register struct device *dev;
+ register struct cfdriver *cd;
+ register size_t lname, lunit;
+ register char *xunit;
+ int myunit;
+ char num[10];
+ static struct device **nextp = &alldevs;
+
+ cd = cf->cf_driver;
+ if (cd->cd_devsize < sizeof(struct device))
+ panic("config_attach");
+ myunit = cf->cf_unit;
+ if (cf->cf_fstate == FSTATE_NOTFOUND)
+ cf->cf_fstate = FSTATE_FOUND;
+ else
+ cf->cf_unit++;
+
+ /* compute length of name and decimal expansion of unit number */
+ lname = strlen(cd->cd_name);
+ xunit = number(&num[sizeof num], myunit);
+ lunit = &num[sizeof num] - xunit;
+ if (lname + lunit >= sizeof(dev->dv_xname))
+ panic("config_attach: device name too long");
+
+ /* get memory for all device vars */
+ dev = (struct device *)malloc(cd->cd_devsize, M_DEVBUF, M_WAITOK);
+ /* XXX cannot wait! */
+ bzero(dev, cd->cd_devsize);
+ *nextp = dev; /* link up */
+ nextp = &dev->dv_next;
+ dev->dv_class = cd->cd_class;
+ dev->dv_cfdata = cf;
+ dev->dv_unit = myunit;
+ bcopy(cd->cd_name, dev->dv_xname, lname);
+ bcopy(xunit, dev->dv_xname + lname, lunit);
+ dev->dv_parent = parent;
+ if (parent == ROOT)
+ printf("%s (root)", dev->dv_xname);
+ else {
+ printf("%s at %s", dev->dv_xname, parent->dv_xname);
+ (void) (*print)(aux, (char *)0);
+ }
+
+ /* put this device in the devices array */
+ if (dev->dv_unit >= cd->cd_ndevs) {
+ /*
+ * Need to expand the array.
+ */
+ int old = cd->cd_ndevs, oldbytes, new, newbytes;
+ void **nsp;
+
+ if (old == 0) {
+ nsp = malloc(MINALLOCSIZE, M_DEVBUF, M_WAITOK); /*XXX*/
+ bzero(nsp, MINALLOCSIZE);
+ cd->cd_ndevs = MINALLOCSIZE / sizeof(void *);
+ } else {
+ new = cd->cd_ndevs;
+ do {
+ new *= 2;
+ } while (new <= dev->dv_unit);
+ cd->cd_ndevs = new;
+ oldbytes = old * sizeof(void *);
+ newbytes = new * sizeof(void *);
+ nsp = malloc(newbytes, M_DEVBUF, M_WAITOK); /*XXX*/
+ bcopy(cd->cd_devs, nsp, oldbytes);
+ bzero(&nsp[old], newbytes - oldbytes);
+ free(cd->cd_devs, M_DEVBUF);
+ }
+ cd->cd_devs = nsp;
+ }
+ if (cd->cd_devs[dev->dv_unit])
+ panic("config_attach: duplicate %s", dev->dv_xname);
+ cd->cd_devs[dev->dv_unit] = dev;
+
+ /*
+ * Before attaching, clobber any unfound devices that are
+ * otherwise identical.
+ */
+ for (cf = cfdata; cf->cf_driver; cf++)
+ if (cf->cf_driver == cd && cf->cf_unit == dev->dv_unit &&
+ cf->cf_fstate == FSTATE_NOTFOUND)
+ cf->cf_fstate = FSTATE_FOUND;
+ (*cd->cd_attach)(parent, dev, aux);
+}
+
+/*
+ * Attach an event. These must come from initially-zero space (see
+ * commented-out assignments below), but that occurs naturally for
+ * device instance variables.
+ */
+void
+evcnt_attach(dev, name, ev)
+ struct device *dev;
+ const char *name;
+ struct evcnt *ev;
+{
+ static struct evcnt **nextp = &allevents;
+
+#ifdef DIAGNOSTIC
+ if (strlen(name) >= sizeof(ev->ev_name))
+ panic("evcnt_attach");
+#endif
+ /* ev->ev_next = NULL; */
+ ev->ev_dev = dev;
+ /* ev->ev_count = 0; */
+ strcpy(ev->ev_name, name);
+ *nextp = ev;
+ nextp = &ev->ev_next;
+}
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
new file mode 100644
index 0000000..f065761
--- /dev/null
+++ b/sys/kern/subr_log.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_log.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Error log buffer for kernel printf's.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/msgbuf.h>
+#include <sys/file.h>
+
+#define LOG_RDPRI (PZERO + 1)
+
+#define LOG_ASYNC 0x04
+#define LOG_RDWAIT 0x08
+
+struct logsoftc {
+ int sc_state; /* see above for possibilities */
+ struct selinfo sc_selp; /* process waiting on select call */
+ int sc_pgid; /* process/group for async I/O */
+} logsoftc;
+
+int log_open; /* also used in log() */
+
+/*ARGSUSED*/
+logopen(dev, flags, mode, p)
+ dev_t dev;
+ int flags, mode;
+ struct proc *p;
+{
+ register struct msgbuf *mbp = msgbufp;
+
+ if (log_open)
+ return (EBUSY);
+ log_open = 1;
+ logsoftc.sc_pgid = p->p_pid; /* signal process only */
+ /*
+ * Potential race here with putchar() but since putchar should be
+ * called by autoconf, msg_magic should be initialized by the time
+ * we get here.
+ */
+ if (mbp->msg_magic != MSG_MAGIC) {
+ register int i;
+
+ mbp->msg_magic = MSG_MAGIC;
+ mbp->msg_bufx = mbp->msg_bufr = 0;
+ for (i=0; i < MSG_BSIZE; i++)
+ mbp->msg_bufc[i] = 0;
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+logclose(dev, flag, mode, p)
+ dev_t dev;
+ int flag, mode;
+ struct proc *p;
+{
+
+ log_open = 0;
+ logsoftc.sc_state = 0;
+ return (0);
+}
+
+/*ARGSUSED*/
+logread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct msgbuf *mbp = msgbufp;
+ register long l;
+ register int s;
+ int error = 0;
+
+ s = splhigh();
+ while (mbp->msg_bufr == mbp->msg_bufx) {
+ if (flag & IO_NDELAY) {
+ splx(s);
+ return (EWOULDBLOCK);
+ }
+ logsoftc.sc_state |= LOG_RDWAIT;
+ if (error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH,
+ "klog", 0)) {
+ splx(s);
+ return (error);
+ }
+ }
+ splx(s);
+ logsoftc.sc_state &= ~LOG_RDWAIT;
+
+ while (uio->uio_resid > 0) {
+ l = mbp->msg_bufx - mbp->msg_bufr;
+ if (l < 0)
+ l = MSG_BSIZE - mbp->msg_bufr;
+ l = min(l, uio->uio_resid);
+ if (l == 0)
+ break;
+ error = uiomove((caddr_t)&mbp->msg_bufc[mbp->msg_bufr],
+ (int)l, uio);
+ if (error)
+ break;
+ mbp->msg_bufr += l;
+ if (mbp->msg_bufr < 0 || mbp->msg_bufr >= MSG_BSIZE)
+ mbp->msg_bufr = 0;
+ }
+ return (error);
+}
+
+/*ARGSUSED*/
+logselect(dev, rw, p)
+ dev_t dev;
+ int rw;
+ struct proc *p;
+{
+ int s = splhigh();
+
+ switch (rw) {
+
+ case FREAD:
+ if (msgbufp->msg_bufr != msgbufp->msg_bufx) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &logsoftc.sc_selp);
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+logwakeup()
+{
+ struct proc *p;
+
+ if (!log_open)
+ return;
+ selwakeup(&logsoftc.sc_selp);
+ if (logsoftc.sc_state & LOG_ASYNC) {
+ if (logsoftc.sc_pgid < 0)
+ gsignal(-logsoftc.sc_pgid, SIGIO);
+ else if (p = pfind(logsoftc.sc_pgid))
+ psignal(p, SIGIO);
+ }
+ if (logsoftc.sc_state & LOG_RDWAIT) {
+ wakeup((caddr_t)msgbufp);
+ logsoftc.sc_state &= ~LOG_RDWAIT;
+ }
+}
+
+/*ARGSUSED*/
+logioctl(dev, com, data, flag, p)
+ dev_t dev;
+ int com;
+ caddr_t data;
+ int flag;
+ struct proc *p;
+{
+ long l;
+ int s;
+
+ switch (com) {
+
+ /* return number of characters immediately available */
+ case FIONREAD:
+ s = splhigh();
+ l = msgbufp->msg_bufx - msgbufp->msg_bufr;
+ splx(s);
+ if (l < 0)
+ l += MSG_BSIZE;
+ *(int *)data = l;
+ break;
+
+ case FIONBIO:
+ break;
+
+ case FIOASYNC:
+ if (*(int *)data)
+ logsoftc.sc_state |= LOG_ASYNC;
+ else
+ logsoftc.sc_state &= ~LOG_ASYNC;
+ break;
+
+ case TIOCSPGRP:
+ logsoftc.sc_pgid = *(int *)data;
+ break;
+
+ case TIOCGPGRP:
+ *(int *)data = logsoftc.sc_pgid;
+ break;
+
+ default:
+ return (-1);
+ }
+ return (0);
+}
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
new file mode 100644
index 0000000..2adb779
--- /dev/null
+++ b/sys/kern/subr_prf.c
@@ -0,0 +1,601 @@
+/*-
+ * Copyright (c) 1986, 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_prf.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/reboot.h>
+#include <sys/msgbuf.h>
+#include <sys/proc.h>
+#include <sys/ioctl.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/tty.h>
+#include <sys/tprintf.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+
+/*
+ * Note that stdarg.h and the ANSI style va_start macro is used for both
+ * ANSI and traditional C compilers.
+ */
+#include <machine/stdarg.h>
+
+#ifdef KADB
+#include <machine/kdbparam.h>
+#endif
+
+#define TOCONS 0x01
+#define TOTTY 0x02
+#define TOLOG 0x04
+
+struct tty *constty; /* pointer to console "window" tty */
+
+extern cnputc(); /* standard console putc */
+int (*v_putc)() = cnputc; /* routine to putc on virtual console */
+
+void logpri __P((int level));
+static void putchar __P((int ch, int flags, struct tty *tp));
+static char *ksprintn __P((u_long num, int base, int *len));
+void kprintf __P((const char *fmt, int flags, struct tty *tp, va_list ap));
+
+int consintr = 1; /* Ok to handle console interrupts? */
+
+/*
+ * Variable panicstr contains argument to first call to panic; used as flag
+ * to indicate that the kernel has already called panic.
+ */
+const char *panicstr;
+
+/*
+ * Panic is called on unresolvable fatal errors. It prints "panic: mesg",
+ * and then reboots. If we are called twice, then we avoid trying to sync
+ * the disks as this often leads to recursive panics.
+ */
+#ifdef __GNUC__
+volatile void boot(int flags); /* boot() does not return */
+volatile /* panic() does not return */
+#endif
+void
+#ifdef __STDC__
+panic(const char *fmt, ...)
+#else
+panic(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ int bootopt;
+ va_list ap;
+
+ bootopt = RB_AUTOBOOT | RB_DUMP;
+ if (panicstr)
+ bootopt |= RB_NOSYNC;
+ else
+ panicstr = fmt;
+
+ va_start(ap, fmt);
+ printf("panic: %r\n", fmt, ap);
+ va_end(ap);
+
+#ifdef KGDB
+ kgdb_panic();
+#endif
+#ifdef KADB
+ if (boothowto & RB_KDB)
+ kdbpanic();
+#endif
+ boot(bootopt);
+}
+
+/*
+ * Warn that a system table is full.
+ */
+void
+tablefull(tab)
+ const char *tab;
+{
+
+ log(LOG_ERR, "%s: table is full\n", tab);
+}
+
+/*
+ * Uprintf prints to the controlling terminal for the current process.
+ * It may block if the tty queue is overfull. No message is printed if
+ * the queue does not clear in a reasonable time.
+ */
+void
+#ifdef __STDC__
+uprintf(const char *fmt, ...)
+#else
+uprintf(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ register struct proc *p = curproc;
+ va_list ap;
+
+ if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+ va_start(ap, fmt);
+ kprintf(fmt, TOTTY, p->p_session->s_ttyp, ap);
+ va_end(ap);
+ }
+}
+
+tpr_t
+tprintf_open(p)
+ register struct proc *p;
+{
+
+ if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+ SESSHOLD(p->p_session);
+ return ((tpr_t) p->p_session);
+ }
+ return ((tpr_t) NULL);
+}
+
+void
+tprintf_close(sess)
+ tpr_t sess;
+{
+
+ if (sess)
+ SESSRELE((struct session *) sess);
+}
+
+/*
+ * tprintf prints on the controlling terminal associated
+ * with the given session.
+ */
+void
+#ifdef __STDC__
+tprintf(tpr_t tpr, const char *fmt, ...)
+#else
+tprintf(tpr, fmt, va_alist)
+ tpr_t tpr;
+ char *fmt;
+#endif
+{
+ register struct session *sess = (struct session *)tpr;
+ struct tty *tp = NULL;
+ int flags = TOLOG;
+ va_list ap;
+
+ logpri(LOG_INFO);
+ if (sess && sess->s_ttyvp && ttycheckoutq(sess->s_ttyp, 0)) {
+ flags |= TOTTY;
+ tp = sess->s_ttyp;
+ }
+ va_start(ap, fmt);
+ kprintf(fmt, flags, tp, ap);
+ va_end(ap);
+ logwakeup();
+}
+
+/*
+ * Ttyprintf displays a message on a tty; it should be used only by
+ * the tty driver, or anything that knows the underlying tty will not
+ * be revoke(2)'d away. Other callers should use tprintf.
+ */
+void
+#ifdef __STDC__
+ttyprintf(struct tty *tp, const char *fmt, ...)
+#else
+ttyprintf(tp, fmt, va_alist)
+ struct tty *tp;
+ char *fmt;
+#endif
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ kprintf(fmt, TOTTY, tp, ap);
+ va_end(ap);
+}
+
+extern int log_open;
+
+/*
+ * Log writes to the log buffer, and guarantees not to sleep (so can be
+ * called by interrupt routines). If there is no process reading the
+ * log yet, it writes to the console also.
+ */
+void
+#ifdef __STDC__
+log(int level, const char *fmt, ...)
+#else
+log(level, fmt, va_alist)
+ int level;
+ char *fmt;
+#endif
+{
+ register int s;
+ va_list ap;
+
+ s = splhigh();
+ logpri(level);
+ va_start(ap, fmt);
+ kprintf(fmt, TOLOG, NULL, ap);
+ splx(s);
+ va_end(ap);
+ if (!log_open) {
+ va_start(ap, fmt);
+ kprintf(fmt, TOCONS, NULL, ap);
+ va_end(ap);
+ }
+ logwakeup();
+}
+
+void
+logpri(level)
+ int level;
+{
+ register int ch;
+ register char *p;
+
+ putchar('<', TOLOG, NULL);
+ for (p = ksprintn((u_long)level, 10, NULL); ch = *p--;)
+ putchar(ch, TOLOG, NULL);
+ putchar('>', TOLOG, NULL);
+}
+
+void
+#ifdef __STDC__
+addlog(const char *fmt, ...)
+#else
+addlog(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ register int s;
+ va_list ap;
+
+ s = splhigh();
+ va_start(ap, fmt);
+ kprintf(fmt, TOLOG, NULL, ap);
+ splx(s);
+ va_end(ap);
+ if (!log_open) {
+ va_start(ap, fmt);
+ kprintf(fmt, TOCONS, NULL, ap);
+ va_end(ap);
+ }
+ logwakeup();
+}
+
+void
+#ifdef __STDC__
+printf(const char *fmt, ...)
+#else
+printf(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ va_list ap;
+ register int savintr;
+
+ savintr = consintr; /* disable interrupts */
+ consintr = 0;
+ va_start(ap, fmt);
+ kprintf(fmt, TOCONS | TOLOG, NULL, ap);
+ va_end(ap);
+ if (!panicstr)
+ logwakeup();
+ consintr = savintr; /* reenable interrupts */
+}
+
+/*
+ * Scaled down version of printf(3).
+ *
+ * Two additional formats:
+ *
+ * The format %b is supported to decode error registers.
+ * Its usage is:
+ *
+ * printf("reg=%b\n", regval, "<base><arg>*");
+ *
+ * where <base> is the output base expressed as a control character, e.g.
+ * \10 gives octal; \20 gives hex. Each arg is a sequence of characters,
+ * the first of which gives the bit number to be inspected (origin 1), and
+ * the next characters (up to a control character, i.e. a character <= 32),
+ * give the name of the register. Thus:
+ *
+ * kprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
+ *
+ * would produce output:
+ *
+ * reg=3<BITTWO,BITONE>
+ *
+ * The format %r passes an additional format string and argument list
+ * recursively. Its usage is:
+ *
+ * fn(char *fmt, ...)
+ * {
+ * va_list ap;
+ * va_start(ap, fmt);
+ * printf("prefix: %r: suffix\n", fmt, ap);
+ * va_end(ap);
+ * }
+ *
+ * Space or zero padding and a field width are supported for the numeric
+ * formats only.
+ */
+void
+kprintf(fmt, flags, tp, ap)
+ register const char *fmt;
+ int flags;
+ struct tty *tp;
+ va_list ap;
+{
+ register char *p, *q;
+ register int ch, n;
+ u_long ul;
+ int base, lflag, tmp, width;
+ char padc;
+
+ for (;;) {
+ padc = ' ';
+ width = 0;
+ while ((ch = *(u_char *)fmt++) != '%') {
+ if (ch == '\0')
+ return;
+ putchar(ch, flags, tp);
+ }
+ lflag = 0;
+reswitch: switch (ch = *(u_char *)fmt++) {
+ case '0':
+ padc = '0';
+ goto reswitch;
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ for (width = 0;; ++fmt) {
+ width = width * 10 + ch - '0';
+ ch = *fmt;
+ if (ch < '0' || ch > '9')
+ break;
+ }
+ goto reswitch;
+ case 'l':
+ lflag = 1;
+ goto reswitch;
+ case 'b':
+ ul = va_arg(ap, int);
+ p = va_arg(ap, char *);
+ for (q = ksprintn(ul, *p++, NULL); ch = *q--;)
+ putchar(ch, flags, tp);
+
+ if (!ul)
+ break;
+
+ for (tmp = 0; n = *p++;) {
+ if (ul & (1 << (n - 1))) {
+ putchar(tmp ? ',' : '<', flags, tp);
+ for (; (n = *p) > ' '; ++p)
+ putchar(n, flags, tp);
+ tmp = 1;
+ } else
+ for (; *p > ' '; ++p)
+ continue;
+ }
+ if (tmp)
+ putchar('>', flags, tp);
+ break;
+ case 'c':
+ putchar(va_arg(ap, int), flags, tp);
+ break;
+ case 'r':
+ p = va_arg(ap, char *);
+ kprintf(p, flags, tp, va_arg(ap, va_list));
+ break;
+ case 's':
+ p = va_arg(ap, char *);
+ while (ch = *p++)
+ putchar(ch, flags, tp);
+ break;
+ case 'd':
+ ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+ if ((long)ul < 0) {
+ putchar('-', flags, tp);
+ ul = -(long)ul;
+ }
+ base = 10;
+ goto number;
+ case 'o':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 8;
+ goto number;
+ case 'u':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 10;
+ goto number;
+ case 'x':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 16;
+number: p = ksprintn(ul, base, &tmp);
+ if (width && (width -= tmp) > 0)
+ while (width--)
+ putchar(padc, flags, tp);
+ while (ch = *p--)
+ putchar(ch, flags, tp);
+ break;
+ default:
+ putchar('%', flags, tp);
+ if (lflag)
+ putchar('l', flags, tp);
+ /* FALLTHROUGH */
+ case '%':
+ putchar(ch, flags, tp);
+ }
+ }
+}
+
+/*
+ * Print a character on console or users terminal. If destination is
+ * the console then the last MSGBUFS characters are saved in msgbuf for
+ * inspection later.
+ */
+static void
+putchar(c, flags, tp)
+ register int c;
+ int flags;
+ struct tty *tp;
+{
+ extern int msgbufmapped;
+ register struct msgbuf *mbp;
+
+ if (panicstr)
+ constty = NULL;
+ if ((flags & TOCONS) && tp == NULL && constty) {
+ tp = constty;
+ flags |= TOTTY;
+ }
+ if ((flags & TOTTY) && tp && tputchar(c, tp) < 0 &&
+ (flags & TOCONS) && tp == constty)
+ constty = NULL;
+ if ((flags & TOLOG) &&
+ c != '\0' && c != '\r' && c != 0177 && msgbufmapped) {
+ mbp = msgbufp;
+ if (mbp->msg_magic != MSG_MAGIC) {
+ bzero((caddr_t)mbp, sizeof(*mbp));
+ mbp->msg_magic = MSG_MAGIC;
+ }
+ mbp->msg_bufc[mbp->msg_bufx++] = c;
+ if (mbp->msg_bufx < 0 || mbp->msg_bufx >= MSG_BSIZE)
+ mbp->msg_bufx = 0;
+ }
+ if ((flags & TOCONS) && constty == NULL && c != '\0')
+ (*v_putc)(c);
+}
+
+/*
+ * Scaled down version of sprintf(3).
+ */
+#ifdef __STDC__
+sprintf(char *buf, const char *cfmt, ...)
+#else
+sprintf(buf, cfmt, va_alist)
+ char *buf, *cfmt;
+#endif
+{
+ register const char *fmt = cfmt;
+ register char *p, *bp;
+ register int ch, base;
+ u_long ul;
+ int lflag;
+ va_list ap;
+
+ va_start(ap, cfmt);
+ for (bp = buf; ; ) {
+ while ((ch = *(u_char *)fmt++) != '%')
+ if ((*bp++ = ch) == '\0')
+ return ((bp - buf) - 1);
+
+ lflag = 0;
+reswitch: switch (ch = *(u_char *)fmt++) {
+ case 'l':
+ lflag = 1;
+ goto reswitch;
+ case 'c':
+ *bp++ = va_arg(ap, int);
+ break;
+ case 's':
+ p = va_arg(ap, char *);
+ while (*bp++ = *p++)
+ continue;
+ --bp;
+ break;
+ case 'd':
+ ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+ if ((long)ul < 0) {
+ *bp++ = '-';
+ ul = -(long)ul;
+ }
+ base = 10;
+ goto number;
+ break;
+ case 'o':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 8;
+ goto number;
+ break;
+ case 'u':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 10;
+ goto number;
+ break;
+ case 'x':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 16;
+number: for (p = ksprintn(ul, base, NULL); ch = *p--;)
+ *bp++ = ch;
+ break;
+ default:
+ *bp++ = '%';
+ if (lflag)
+ *bp++ = 'l';
+ /* FALLTHROUGH */
+ case '%':
+ *bp++ = ch;
+ }
+ }
+ va_end(ap);
+}
+
+/*
+ * Put a number (base <= 16) in a buffer in reverse order; return an
+ * optional length and a pointer to the NULL terminated (preceded?)
+ * buffer.
+ */
+static char *
+ksprintn(ul, base, lenp)
+ register u_long ul;
+ register int base, *lenp;
+{ /* A long in base 8, plus NULL. */
+ static char buf[sizeof(long) * NBBY / 3 + 2];
+ register char *p;
+
+ p = buf;
+ do {
+ *++p = "0123456789abcdef"[ul % base];
+ } while (ul /= base);
+ if (lenp)
+ *lenp = p - buf;
+ return (p);
+}
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
new file mode 100644
index 0000000..4fb81d8
--- /dev/null
+++ b/sys/kern/subr_prof.c
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/malloc.h>
+#include <sys/gmon.h>
+
+/*
+ * Froms is actually a bunch of unsigned shorts indexing tos
+ */
+struct gmonparam _gmonparam = { GMON_PROF_OFF };
+
+extern char etext[];
+
+kmstartup()
+{
+ char *cp;
+ struct gmonparam *p = &_gmonparam;
+ /*
+ * Round lowpc and highpc to multiples of the density we're using
+ * so the rest of the scaling (here and in gprof) stays in ints.
+ */
+ p->lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
+ p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
+ p->textsize = p->highpc - p->lowpc;
+ printf("Profiling kernel, textsize=%d [%x..%x]\n",
+ p->textsize, p->lowpc, p->highpc);
+ p->kcountsize = p->textsize / HISTFRACTION;
+ p->hashfraction = HASHFRACTION;
+ p->fromssize = p->textsize / HASHFRACTION;
+ p->tolimit = p->textsize * ARCDENSITY / 100;
+ if (p->tolimit < MINARCS)
+ p->tolimit = MINARCS;
+ else if (p->tolimit > MAXARCS)
+ p->tolimit = MAXARCS;
+ p->tossize = p->tolimit * sizeof(struct tostruct);
+ cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize,
+ M_GPROF, M_NOWAIT);
+ if (cp == 0) {
+ printf("No memory for profiling.\n");
+ return;
+ }
+ bzero(cp, p->kcountsize + p->tossize + p->fromssize);
+ p->tos = (struct tostruct *)cp;
+ cp += p->tossize;
+ p->kcount = (u_short *)cp;
+ cp += p->kcountsize;
+ p->froms = (u_short *)cp;
+}
+
+/*
+ * Return kernel profiling information.
+ */
+sysctl_doprof(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+{
+ struct gmonparam *gp = &_gmonparam;
+ int error;
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case GPROF_STATE:
+ error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
+ if (error)
+ return (error);
+ if (gp->state == GMON_PROF_OFF)
+ stopprofclock(&proc0);
+ else
+ startprofclock(&proc0);
+ return (0);
+ case GPROF_COUNT:
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ gp->kcount, gp->kcountsize));
+ case GPROF_FROMS:
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ gp->froms, gp->fromssize));
+ case GPROF_TOS:
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ gp->tos, gp->tossize));
+ case GPROF_GMONPARAM:
+ return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+#endif /* GPROF */
+
+/*
+ * Profiling system call.
+ *
+ * The scale factor is a fixed point number with 16 bits of fraction, so that
+ * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling.
+ */
+struct profil_args {
+ caddr_t samples;
+ u_int size;
+ u_int offset;
+ u_int scale;
+};
+/* ARGSUSED */
+profil(p, uap, retval)
+ struct proc *p;
+ register struct profil_args *uap;
+ int *retval;
+{
+ register struct uprof *upp;
+ int s;
+
+ if (uap->scale > (1 << 16))
+ return (EINVAL);
+ if (uap->scale == 0) {
+ stopprofclock(p);
+ return (0);
+ }
+ upp = &p->p_stats->p_prof;
+
+ /* Block profile interrupts while changing state. */
+ s = splstatclock();
+ upp->pr_off = uap->offset;
+ upp->pr_scale = uap->scale;
+ upp->pr_base = uap->samples;
+ upp->pr_size = uap->size;
+ startprofclock(p);
+ splx(s);
+
+ return (0);
+}
+
+/*
+ * Scale is a fixed-point number with the binary point 16 bits
+ * into the value, and is <= 1.0. pc is at most 32 bits, so the
+ * intermediate result is at most 48 bits.
+ */
+#define PC_TO_INDEX(pc, prof) \
+ ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
+ (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
+
+/*
+ * Collect user-level profiling statistics; called on a profiling tick,
+ * when a process is running in user-mode. This routine may be called
+ * from an interrupt context. We try to update the user profiling buffers
+ * cheaply with fuswintr() and suswintr(). If that fails, we revert to
+ * an AST that will vector us to trap() with a context in which copyin
+ * and copyout will work. Trap will then call addupc_task().
+ *
+ * Note that we may (rarely) not get around to the AST soon enough, and
+ * lose profile ticks when the next tick overwrites this one, but in this
+ * case the system is overloaded and the profile is probably already
+ * inaccurate.
+ */
+void
+addupc_intr(p, pc, ticks)
+ register struct proc *p;
+ register u_long pc;
+ u_int ticks;
+{
+ register struct uprof *prof;
+ register caddr_t addr;
+ register u_int i;
+ register int v;
+
+ if (ticks == 0)
+ return;
+ prof = &p->p_stats->p_prof;
+ if (pc < prof->pr_off ||
+ (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+ return; /* out of range; ignore */
+
+ addr = prof->pr_base + i;
+ if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) {
+ prof->pr_addr = pc;
+ prof->pr_ticks = ticks;
+ need_proftick(p);
+ }
+}
+
+/*
+ * Much like before, but we can afford to take faults here. If the
+ * update fails, we simply turn off profiling.
+ */
+void
+addupc_task(p, pc, ticks)
+ register struct proc *p;
+ register u_long pc;
+ u_int ticks;
+{
+ register struct uprof *prof;
+ register caddr_t addr;
+ register u_int i;
+ u_short v;
+
+ /* Testing P_PROFIL may be unnecessary, but is certainly safe. */
+ if ((p->p_flag & P_PROFIL) == 0 || ticks == 0)
+ return;
+
+ prof = &p->p_stats->p_prof;
+ if (pc < prof->pr_off ||
+ (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+ return;
+
+ addr = prof->pr_base + i;
+ if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
+ v += ticks;
+ if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
+ return;
+ }
+ stopprofclock(p);
+}
diff --git a/sys/kern/subr_rmap.c b/sys/kern/subr_rmap.c
new file mode 100644
index 0000000..2f31173
--- /dev/null
+++ b/sys/kern/subr_rmap.c
@@ -0,0 +1,81 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)subr_rmap.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/map.h>
+#include <sys/proc.h>
+
+void
+rminit(a1, a2, a3, a4, a5)
+ struct map *a1;
+ long a2, a3;
+ char *a4;
+ int a5;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+long
+rmalloc(a1, a2)
+ struct map *a1;
+ long a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+void
+rmfree(a1, a2, a3)
+ struct map *a1;
+ long a2, a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c
new file mode 100644
index 0000000..c692ec1
--- /dev/null
+++ b/sys/kern/subr_xxx.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_xxx.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Miscellaneous trivial functions, including many
+ * that are often inline-expanded or done in assembler.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+
+/*
+ * Unsupported device function (e.g. writing to read-only device).
+ */
+enodev()
+{
+
+ return (ENODEV);
+}
+
+/*
+ * Unconfigured device function; driver not configured.
+ */
+enxio()
+{
+
+ return (ENXIO);
+}
+
+/*
+ * Unsupported ioctl function.
+ */
+enoioctl()
+{
+
+ return (ENOTTY);
+}
+
+/*
+ * Unsupported system function.
+ * This is used for an otherwise-reasonable operation
+ * that is not supported by the current system binary.
+ */
+enosys()
+{
+
+ return (ENOSYS);
+}
+
+/*
+ * Return error for operation not supported
+ * on a specific object or file type.
+ */
+eopnotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Generic null operation, always returns success.
+ */
+nullop()
+{
+
+ return (0);
+}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
new file mode 100644
index 0000000..a121209
--- /dev/null
+++ b/sys/kern/sys_generic.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * Read system call.
+ */
+struct read_args {
+ int fd;
+ char *buf;
+ u_int nbyte;
+};
+/* ARGSUSED */
+read(p, uap, retval)
+ struct proc *p;
+ register struct read_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ struct iovec aiov;
+ long cnt, error = 0;
+#ifdef KTRACE
+ struct iovec ktriov;
+#endif
+
+ if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+ (fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ aiov.iov_base = (caddr_t)uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO))
+ ktriov = aiov;
+#endif
+ cnt = uap->nbyte;
+ if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO) && error == 0)
+ ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
+#endif
+ *retval = cnt;
+ return (error);
+}
+
+/*
+ * Scatter read system call.
+ */
+struct readv_args {
+ int fdes;
+ struct iovec *iovp;
+ u_int iovcnt;
+};
+readv(p, uap, retval)
+ struct proc *p;
+ register struct readv_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ register struct iovec *iov;
+ struct iovec *needfree;
+ struct iovec aiov[UIO_SMALLIOV];
+ long i, cnt, error = 0;
+ u_int iovlen;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (((u_int)uap->fdes) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fdes]) == NULL ||
+ (fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ /* note: can't use iovlen until iovcnt is validated */
+ iovlen = uap->iovcnt * sizeof (struct iovec);
+ if (uap->iovcnt > UIO_SMALLIOV) {
+ if (uap->iovcnt > UIO_MAXIOV)
+ return (EINVAL);
+ MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+ needfree = iov;
+ } else {
+ iov = aiov;
+ needfree = NULL;
+ }
+ auio.uio_iov = iov;
+ auio.uio_iovcnt = uap->iovcnt;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
+ goto done;
+ auio.uio_resid = 0;
+ for (i = 0; i < uap->iovcnt; i++) {
+ if (iov->iov_len < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ auio.uio_resid += iov->iov_len;
+ if (auio.uio_resid < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ iov++;
+ }
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO)) {
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ cnt = auio.uio_resid;
+ if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, uap->fdes, UIO_READ, ktriov,
+ cnt, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+ *retval = cnt;
+done:
+ if (needfree)
+ FREE(needfree, M_IOV);
+ return (error);
+}
+
+/*
+ * Write system call
+ */
+struct write_args {
+ int fd;
+ char *buf;
+ u_int nbyte;
+};
+write(p, uap, retval)
+ struct proc *p;
+ register struct write_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ struct iovec aiov;
+ long cnt, error = 0;
+#ifdef KTRACE
+ struct iovec ktriov;
+#endif
+
+ if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+ (fp->f_flag & FWRITE) == 0)
+ return (EBADF);
+ aiov.iov_base = (caddr_t)uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO))
+ ktriov = aiov;
+#endif
+ cnt = uap->nbyte;
+ if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ if (error == EPIPE)
+ psignal(p, SIGPIPE);
+ }
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO) && error == 0)
+ ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
+ &ktriov, cnt, error);
+#endif
+ *retval = cnt;
+ return (error);
+}
+
+/*
+ * Gather write system call
+ */
+struct writev_args {
+ int fd;
+ struct iovec *iovp;
+ u_int iovcnt;
+};
+writev(p, uap, retval)
+ struct proc *p;
+ register struct writev_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ register struct iovec *iov;
+ struct iovec *needfree;
+ struct iovec aiov[UIO_SMALLIOV];
+ long i, cnt, error = 0;
+ u_int iovlen;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+ (fp->f_flag & FWRITE) == 0)
+ return (EBADF);
+ /* note: can't use iovlen until iovcnt is validated */
+ iovlen = uap->iovcnt * sizeof (struct iovec);
+ if (uap->iovcnt > UIO_SMALLIOV) {
+ if (uap->iovcnt > UIO_MAXIOV)
+ return (EINVAL);
+ MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+ needfree = iov;
+ } else {
+ iov = aiov;
+ needfree = NULL;
+ }
+ auio.uio_iov = iov;
+ auio.uio_iovcnt = uap->iovcnt;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
+ goto done;
+ auio.uio_resid = 0;
+ for (i = 0; i < uap->iovcnt; i++) {
+ if (iov->iov_len < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ auio.uio_resid += iov->iov_len;
+ if (auio.uio_resid < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ iov++;
+ }
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO)) {
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ cnt = auio.uio_resid;
+ if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ if (error == EPIPE)
+ psignal(p, SIGPIPE);
+ }
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
+ ktriov, cnt, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+ *retval = cnt;
+done:
+ if (needfree)
+ FREE(needfree, M_IOV);
+ return (error);
+}
+
+/*
+ * Ioctl system call
+ */
+struct ioctl_args {
+ int fd;
+ int com;
+ caddr_t data;
+};
+/* ARGSUSED */
+ioctl(p, uap, retval)
+ struct proc *p;
+ register struct ioctl_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp;
+ register int com, error;
+ register u_int size;
+ caddr_t data, memp;
+ int tmp;
+#define STK_PARAMS 128
+ char stkbuf[STK_PARAMS];
+
+ fdp = p->p_fd;
+ if ((u_int)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+
+ if ((fp->f_flag & (FREAD | FWRITE)) == 0)
+ return (EBADF);
+
+ switch (com = uap->com) {
+ case FIONCLEX:
+ fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
+ return (0);
+ case FIOCLEX:
+ fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
+ return (0);
+ }
+
+ /*
+ * Interpret high order word to find amount of data to be
+ * copied to/from the user's address space.
+ */
+ size = IOCPARM_LEN(com);
+ if (size > IOCPARM_MAX)
+ return (ENOTTY);
+ memp = NULL;
+ if (size > sizeof (stkbuf)) {
+ memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+ data = memp;
+ } else
+ data = stkbuf;
+ if (com&IOC_IN) {
+ if (size) {
+ error = copyin(uap->data, data, (u_int)size);
+ if (error) {
+ if (memp)
+ free(memp, M_IOCTLOPS);
+ return (error);
+ }
+ } else
+ *(caddr_t *)data = uap->data;
+ } else if ((com&IOC_OUT) && size)
+ /*
+ * Zero the buffer so the user always
+ * gets back something deterministic.
+ */
+ bzero(data, size);
+ else if (com&IOC_VOID)
+ *(caddr_t *)data = uap->data;
+
+ switch (com) {
+
+ case FIONBIO:
+ if (tmp = *(int *)data)
+ fp->f_flag |= FNONBLOCK;
+ else
+ fp->f_flag &= ~FNONBLOCK;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+ break;
+
+ case FIOASYNC:
+ if (tmp = *(int *)data)
+ fp->f_flag |= FASYNC;
+ else
+ fp->f_flag &= ~FASYNC;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+ break;
+
+ case FIOSETOWN:
+ tmp = *(int *)data;
+ if (fp->f_type == DTYPE_SOCKET) {
+ ((struct socket *)fp->f_data)->so_pgid = tmp;
+ error = 0;
+ break;
+ }
+ if (tmp <= 0) {
+ tmp = -tmp;
+ } else {
+ struct proc *p1 = pfind(tmp);
+ if (p1 == 0) {
+ error = ESRCH;
+ break;
+ }
+ tmp = p1->p_pgrp->pg_id;
+ }
+ error = (*fp->f_ops->fo_ioctl)
+ (fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
+ break;
+
+ case FIOGETOWN:
+ if (fp->f_type == DTYPE_SOCKET) {
+ error = 0;
+ *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
+ break;
+ }
+ error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
+ *(int *)data = -*(int *)data;
+ break;
+
+ default:
+ error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
+ /*
+ * Copy any data to user, size was
+ * already set and checked above.
+ */
+ if (error == 0 && (com&IOC_OUT) && size)
+ error = copyout(data, uap->data, (u_int)size);
+ break;
+ }
+ if (memp)
+ free(memp, M_IOCTLOPS);
+ return (error);
+}
+
+int selwait, nselcoll;
+
+/*
+ * Select system call.
+ */
+struct select_args {
+ u_int nd;
+ fd_set *in, *ou, *ex;
+ struct timeval *tv;
+};
+select(p, uap, retval)
+ register struct proc *p;
+ register struct select_args *uap;
+ int *retval;
+{
+ fd_set ibits[3], obits[3];
+ struct timeval atv;
+ int s, ncoll, error = 0, timo;
+ u_int ni;
+
+ bzero((caddr_t)ibits, sizeof(ibits));
+ bzero((caddr_t)obits, sizeof(obits));
+ if (uap->nd > FD_SETSIZE)
+ return (EINVAL);
+ if (uap->nd > p->p_fd->fd_nfiles)
+ uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
+ ni = howmany(uap->nd, NFDBITS) * sizeof(fd_mask);
+
+#define getbits(name, x) \
+ if (uap->name && \
+ (error = copyin((caddr_t)uap->name, (caddr_t)&ibits[x], ni))) \
+ goto done;
+ getbits(in, 0);
+ getbits(ou, 1);
+ getbits(ex, 2);
+#undef getbits
+
+ if (uap->tv) {
+ error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
+ sizeof (atv));
+ if (error)
+ goto done;
+ if (itimerfix(&atv)) {
+ error = EINVAL;
+ goto done;
+ }
+ s = splclock();
+ timevaladd(&atv, (struct timeval *)&time);
+ timo = hzto(&atv);
+ /*
+ * Avoid inadvertently sleeping forever.
+ */
+ if (timo == 0)
+ timo = 1;
+ splx(s);
+ } else
+ timo = 0;
+retry:
+ ncoll = nselcoll;
+ p->p_flag |= P_SELECT;
+ error = selscan(p, ibits, obits, uap->nd, retval);
+ if (error || *retval)
+ goto done;
+ s = splhigh();
+ /* this should be timercmp(&time, &atv, >=) */
+ if (uap->tv && (time.tv_sec > atv.tv_sec ||
+ time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec)) {
+ splx(s);
+ goto done;
+ }
+ if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
+ splx(s);
+ goto retry;
+ }
+ p->p_flag &= ~P_SELECT;
+ error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
+ splx(s);
+ if (error == 0)
+ goto retry;
+done:
+ p->p_flag &= ~P_SELECT;
+ /* select is not restarted after signals... */
+ if (error == ERESTART)
+ error = EINTR;
+ if (error == EWOULDBLOCK)
+ error = 0;
+#define putbits(name, x) \
+ if (uap->name && \
+ (error2 = copyout((caddr_t)&obits[x], (caddr_t)uap->name, ni))) \
+ error = error2;
+ if (error == 0) {
+ int error2;
+
+ putbits(in, 0);
+ putbits(ou, 1);
+ putbits(ex, 2);
+#undef putbits
+ }
+ return (error);
+}
+
+selscan(p, ibits, obits, nfd, retval)
+ struct proc *p;
+ fd_set *ibits, *obits;
+ int nfd, *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register int msk, i, j, fd;
+ register fd_mask bits;
+ struct file *fp;
+ int n = 0;
+ static int flag[3] = { FREAD, FWRITE, 0 };
+
+ for (msk = 0; msk < 3; msk++) {
+ for (i = 0; i < nfd; i += NFDBITS) {
+ bits = ibits[msk].fds_bits[i/NFDBITS];
+ while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
+ bits &= ~(1 << j);
+ fp = fdp->fd_ofiles[fd];
+ if (fp == NULL)
+ return (EBADF);
+ if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
+ FD_SET(fd, &obits[msk]);
+ n++;
+ }
+ }
+ }
+ }
+ *retval = n;
+ return (0);
+}
+
+/*ARGSUSED*/
+seltrue(dev, flag, p)
+ dev_t dev;
+ int flag;
+ struct proc *p;
+{
+
+ return (1);
+}
+
+/*
+ * Record a select request.
+ */
+void
+selrecord(selector, sip)
+ struct proc *selector;
+ struct selinfo *sip;
+{
+ struct proc *p;
+ pid_t mypid;
+
+ mypid = selector->p_pid;
+ if (sip->si_pid == mypid)
+ return;
+ if (sip->si_pid && (p = pfind(sip->si_pid)) &&
+ p->p_wchan == (caddr_t)&selwait)
+ sip->si_flags |= SI_COLL;
+ else
+ sip->si_pid = mypid;
+}
+
+/*
+ * Do a wakeup when a selectable event occurs.
+ */
+void
+selwakeup(sip)
+ register struct selinfo *sip;
+{
+ register struct proc *p;
+ int s;
+
+ if (sip->si_pid == 0)
+ return;
+ if (sip->si_flags & SI_COLL) {
+ nselcoll++;
+ sip->si_flags &= ~SI_COLL;
+ wakeup((caddr_t)&selwait);
+ }
+ p = pfind(sip->si_pid);
+ sip->si_pid = 0;
+ if (p != NULL) {
+ s = splhigh();
+ if (p->p_wchan == (caddr_t)&selwait) {
+ if (p->p_stat == SSLEEP)
+ setrunnable(p);
+ else
+ unsleep(p);
+ } else if (p->p_flag & P_SELECT)
+ p->p_flag &= ~P_SELECT;
+ splx(s);
+ }
+}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
new file mode 100644
index 0000000..4cc40ba
--- /dev/null
+++ b/sys/kern/sys_process.c
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)sys_process.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+
+/*
+ * Process debugging system call.
+ */
+struct ptrace_args {
+ int req;
+ pid_t pid;
+ caddr_t addr;
+ int data;
+};
+ptrace(a1, a2, a3)
+ struct proc *a1;
+ struct ptrace_args *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (ENOSYS);
+}
+
+trace_req(a1)
+ struct proc *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
new file mode 100644
index 0000000..a93ae86
--- /dev/null
+++ b/sys/kern/sys_socket.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sys_socket.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+struct fileops socketops =
+ { soo_read, soo_write, soo_ioctl, soo_select, soo_close };
+
+/* ARGSUSED */
+soo_read(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+
+ return (soreceive((struct socket *)fp->f_data, (struct mbuf **)0,
+ uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0));
+}
+
+/* ARGSUSED */
+soo_write(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+
+ return (sosend((struct socket *)fp->f_data, (struct mbuf *)0,
+ uio, (struct mbuf *)0, (struct mbuf *)0, 0));
+}
+
+soo_ioctl(fp, cmd, data, p)
+ struct file *fp;
+ int cmd;
+ register caddr_t data;
+ struct proc *p;
+{
+ register struct socket *so = (struct socket *)fp->f_data;
+
+ switch (cmd) {
+
+ case FIONBIO:
+ if (*(int *)data)
+ so->so_state |= SS_NBIO;
+ else
+ so->so_state &= ~SS_NBIO;
+ return (0);
+
+ case FIOASYNC:
+ if (*(int *)data) {
+ so->so_state |= SS_ASYNC;
+ so->so_rcv.sb_flags |= SB_ASYNC;
+ so->so_snd.sb_flags |= SB_ASYNC;
+ } else {
+ so->so_state &= ~SS_ASYNC;
+ so->so_rcv.sb_flags &= ~SB_ASYNC;
+ so->so_snd.sb_flags &= ~SB_ASYNC;
+ }
+ return (0);
+
+ case FIONREAD:
+ *(int *)data = so->so_rcv.sb_cc;
+ return (0);
+
+ case SIOCSPGRP:
+ so->so_pgid = *(int *)data;
+ return (0);
+
+ case SIOCGPGRP:
+ *(int *)data = so->so_pgid;
+ return (0);
+
+ case SIOCATMARK:
+ *(int *)data = (so->so_state&SS_RCVATMARK) != 0;
+ return (0);
+ }
+ /*
+ * Interface/routing/protocol specific ioctls:
+ * interface and routing ioctls should have a
+ * different entry since a socket's unnecessary
+ */
+ if (IOCGROUP(cmd) == 'i')
+ return (ifioctl(so, cmd, data, p));
+ if (IOCGROUP(cmd) == 'r')
+ return (rtioctl(cmd, data, p));
+ return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
+ (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)0));
+}
+
+soo_select(fp, which, p)
+ struct file *fp;
+ int which;
+ struct proc *p;
+{
+ register struct socket *so = (struct socket *)fp->f_data;
+ register int s = splnet();
+
+ switch (which) {
+
+ case FREAD:
+ if (soreadable(so)) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &so->so_rcv.sb_sel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ break;
+
+ case FWRITE:
+ if (sowriteable(so)) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &so->so_snd.sb_sel);
+ so->so_snd.sb_flags |= SB_SEL;
+ break;
+
+ case 0:
+ if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &so->so_rcv.sb_sel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+soo_stat(so, ub)
+ register struct socket *so;
+ register struct stat *ub;
+{
+
+ bzero((caddr_t)ub, sizeof (*ub));
+ ub->st_mode = S_IFSOCK;
+ return ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
+ (struct mbuf *)ub, (struct mbuf *)0,
+ (struct mbuf *)0));
+}
+
+/* ARGSUSED */
+soo_close(fp, p)
+ struct file *fp;
+ struct proc *p;
+{
+ int error = 0;
+
+ if (fp->f_data)
+ error = soclose((struct socket *)fp->f_data);
+ fp->f_data = 0;
+ return (error);
+}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
new file mode 100644
index 0000000..1809905
--- /dev/null
+++ b/sys/kern/syscalls.c
@@ -0,0 +1,251 @@
+/*
+ * System call names.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from @(#)syscalls.master 8.2 (Berkeley) 1/13/94
+ */
+
+char *syscallnames[] = {
+ "syscall", /* 0 = syscall */
+ "exit", /* 1 = exit */
+ "fork", /* 2 = fork */
+ "read", /* 3 = read */
+ "write", /* 4 = write */
+ "open", /* 5 = open */
+ "close", /* 6 = close */
+ "wait4", /* 7 = wait4 */
+ "old.creat", /* 8 = old creat */
+ "link", /* 9 = link */
+ "unlink", /* 10 = unlink */
+ "obs_execv", /* 11 = obsolete execv */
+ "chdir", /* 12 = chdir */
+ "fchdir", /* 13 = fchdir */
+ "mknod", /* 14 = mknod */
+ "chmod", /* 15 = chmod */
+ "chown", /* 16 = chown */
+ "break", /* 17 = break */
+ "getfsstat", /* 18 = getfsstat */
+ "old.lseek", /* 19 = old lseek */
+ "getpid", /* 20 = getpid */
+ "mount", /* 21 = mount */
+ "unmount", /* 22 = unmount */
+ "setuid", /* 23 = setuid */
+ "getuid", /* 24 = getuid */
+ "geteuid", /* 25 = geteuid */
+ "ptrace", /* 26 = ptrace */
+ "recvmsg", /* 27 = recvmsg */
+ "sendmsg", /* 28 = sendmsg */
+ "recvfrom", /* 29 = recvfrom */
+ "accept", /* 30 = accept */
+ "getpeername", /* 31 = getpeername */
+ "getsockname", /* 32 = getsockname */
+ "access", /* 33 = access */
+ "chflags", /* 34 = chflags */
+ "fchflags", /* 35 = fchflags */
+ "sync", /* 36 = sync */
+ "kill", /* 37 = kill */
+ "old.stat", /* 38 = old stat */
+ "getppid", /* 39 = getppid */
+ "old.lstat", /* 40 = old lstat */
+ "dup", /* 41 = dup */
+ "pipe", /* 42 = pipe */
+ "getegid", /* 43 = getegid */
+ "profil", /* 44 = profil */
+#ifdef KTRACE
+ "ktrace", /* 45 = ktrace */
+#else
+ "#45", /* 45 = ktrace */
+#endif
+ "sigaction", /* 46 = sigaction */
+ "getgid", /* 47 = getgid */
+ "sigprocmask", /* 48 = sigprocmask */
+ "getlogin", /* 49 = getlogin */
+ "setlogin", /* 50 = setlogin */
+ "acct", /* 51 = acct */
+ "sigpending", /* 52 = sigpending */
+ "sigaltstack", /* 53 = sigaltstack */
+ "ioctl", /* 54 = ioctl */
+ "reboot", /* 55 = reboot */
+ "revoke", /* 56 = revoke */
+ "symlink", /* 57 = symlink */
+ "readlink", /* 58 = readlink */
+ "execve", /* 59 = execve */
+ "umask", /* 60 = umask */
+ "chroot", /* 61 = chroot */
+ "old.fstat", /* 62 = old fstat */
+ "old.getkerninfo", /* 63 = old getkerninfo */
+ "old.getpagesize", /* 64 = old getpagesize */
+ "msync", /* 65 = msync */
+ "vfork", /* 66 = vfork */
+ "obs_vread", /* 67 = obsolete vread */
+ "obs_vwrite", /* 68 = obsolete vwrite */
+ "sbrk", /* 69 = sbrk */
+ "sstk", /* 70 = sstk */
+ "old.mmap", /* 71 = old mmap */
+ "vadvise", /* 72 = vadvise */
+ "munmap", /* 73 = munmap */
+ "mprotect", /* 74 = mprotect */
+ "madvise", /* 75 = madvise */
+ "obs_vhangup", /* 76 = obsolete vhangup */
+ "obs_vlimit", /* 77 = obsolete vlimit */
+ "mincore", /* 78 = mincore */
+ "getgroups", /* 79 = getgroups */
+ "setgroups", /* 80 = setgroups */
+ "getpgrp", /* 81 = getpgrp */
+ "setpgid", /* 82 = setpgid */
+ "setitimer", /* 83 = setitimer */
+ "old.wait", /* 84 = old wait */
+ "swapon", /* 85 = swapon */
+ "getitimer", /* 86 = getitimer */
+ "old.gethostname", /* 87 = old gethostname */
+ "old.sethostname", /* 88 = old sethostname */
+ "getdtablesize", /* 89 = getdtablesize */
+ "dup2", /* 90 = dup2 */
+ "#91", /* 91 = getdopt */
+ "fcntl", /* 92 = fcntl */
+ "select", /* 93 = select */
+ "#94", /* 94 = setdopt */
+ "fsync", /* 95 = fsync */
+ "setpriority", /* 96 = setpriority */
+ "socket", /* 97 = socket */
+ "connect", /* 98 = connect */
+ "old.accept", /* 99 = old accept */
+ "getpriority", /* 100 = getpriority */
+ "old.send", /* 101 = old send */
+ "old.recv", /* 102 = old recv */
+ "sigreturn", /* 103 = sigreturn */
+ "bind", /* 104 = bind */
+ "setsockopt", /* 105 = setsockopt */
+ "listen", /* 106 = listen */
+ "obs_vtimes", /* 107 = obsolete vtimes */
+ "old.sigvec", /* 108 = old sigvec */
+ "old.sigblock", /* 109 = old sigblock */
+ "old.sigsetmask", /* 110 = old sigsetmask */
+ "sigsuspend", /* 111 = sigsuspend */
+ "old.sigstack", /* 112 = old sigstack */
+ "old.recvmsg", /* 113 = old recvmsg */
+ "old.sendmsg", /* 114 = old sendmsg */
+#ifdef TRACE
+ "vtrace", /* 115 = vtrace */
+#else
+ "obs_vtrace", /* 115 = obsolete vtrace */
+#endif
+ "gettimeofday", /* 116 = gettimeofday */
+ "getrusage", /* 117 = getrusage */
+ "getsockopt", /* 118 = getsockopt */
+#ifdef vax
+ "resuba", /* 119 = resuba */
+#else
+ "#119", /* 119 = nosys */
+#endif
+ "readv", /* 120 = readv */
+ "writev", /* 121 = writev */
+ "settimeofday", /* 122 = settimeofday */
+ "fchown", /* 123 = fchown */
+ "fchmod", /* 124 = fchmod */
+ "old.recvfrom", /* 125 = old recvfrom */
+ "old.setreuid", /* 126 = old setreuid */
+ "old.setregid", /* 127 = old setregid */
+ "rename", /* 128 = rename */
+ "old.truncate", /* 129 = old truncate */
+ "old.ftruncate", /* 130 = old ftruncate */
+ "flock", /* 131 = flock */
+ "mkfifo", /* 132 = mkfifo */
+ "sendto", /* 133 = sendto */
+ "shutdown", /* 134 = shutdown */
+ "socketpair", /* 135 = socketpair */
+ "mkdir", /* 136 = mkdir */
+ "rmdir", /* 137 = rmdir */
+ "utimes", /* 138 = utimes */
+ "obs_4.2", /* 139 = obsolete 4.2 sigreturn */
+ "adjtime", /* 140 = adjtime */
+ "old.getpeername", /* 141 = old getpeername */
+ "old.gethostid", /* 142 = old gethostid */
+ "old.sethostid", /* 143 = old sethostid */
+ "old.getrlimit", /* 144 = old getrlimit */
+ "old.setrlimit", /* 145 = old setrlimit */
+ "old.killpg", /* 146 = old killpg */
+ "setsid", /* 147 = setsid */
+ "quotactl", /* 148 = quotactl */
+ "old.quota", /* 149 = old quota */
+ "old.getsockname", /* 150 = old getsockname */
+ "#151", /* 151 = nosys */
+ "#152", /* 152 = nosys */
+ "#153", /* 153 = nosys */
+ "#154", /* 154 = nosys */
+#ifdef NFS
+ "nfssvc", /* 155 = nfssvc */
+#else
+ "#155", /* 155 = nosys */
+#endif
+ "old.getdirentries", /* 156 = old getdirentries */
+ "statfs", /* 157 = statfs */
+ "fstatfs", /* 158 = fstatfs */
+ "#159", /* 159 = nosys */
+ "#160", /* 160 = nosys */
+#ifdef NFS
+ "getfh", /* 161 = getfh */
+#else
+ "#161", /* 161 = nosys */
+#endif
+ "#162", /* 162 = nosys */
+ "#163", /* 163 = nosys */
+ "#164", /* 164 = nosys */
+ "#165", /* 165 = nosys */
+ "#166", /* 166 = nosys */
+ "#167", /* 167 = nosys */
+ "#168", /* 168 = nosys */
+ "#169", /* 169 = nosys */
+ "#170", /* 170 = nosys */
+#ifdef SYSVSHM
+ "shmsys", /* 171 = shmsys */
+#else
+ "#171", /* 171 = nosys */
+#endif
+ "#172", /* 172 = nosys */
+ "#173", /* 173 = nosys */
+ "#174", /* 174 = nosys */
+ "#175", /* 175 = nosys */
+ "#176", /* 176 = nosys */
+ "#177", /* 177 = nosys */
+ "#178", /* 178 = nosys */
+ "#179", /* 179 = nosys */
+ "#180", /* 180 = nosys */
+ "setgid", /* 181 = setgid */
+ "setegid", /* 182 = setegid */
+ "seteuid", /* 183 = seteuid */
+#ifdef LFS
+ "lfs_bmapv", /* 184 = lfs_bmapv */
+ "lfs_markv", /* 185 = lfs_markv */
+ "lfs_segclean", /* 186 = lfs_segclean */
+ "lfs_segwait", /* 187 = lfs_segwait */
+#else
+ "#184", /* 184 = nosys */
+ "#185", /* 185 = nosys */
+ "#186", /* 186 = nosys */
+ "#187", /* 187 = nosys */
+#endif
+ "stat", /* 188 = stat */
+ "fstat", /* 189 = fstat */
+ "lstat", /* 190 = lstat */
+ "pathconf", /* 191 = pathconf */
+ "fpathconf", /* 192 = fpathconf */
+ "#193", /* 193 = nosys */
+ "getrlimit", /* 194 = getrlimit */
+ "setrlimit", /* 195 = setrlimit */
+ "getdirentries", /* 196 = getdirentries */
+ "mmap", /* 197 = mmap */
+ "__syscall", /* 198 = __syscall */
+ "lseek", /* 199 = lseek */
+ "truncate", /* 200 = truncate */
+ "ftruncate", /* 201 = ftruncate */
+ "__sysctl", /* 202 = __sysctl */
+ "mlock", /* 203 = mlock */
+ "munlock", /* 204 = munlock */
+ "#205", /* 205 = nosys */
+ "#206", /* 206 = nosys */
+ "#207", /* 207 = nosys */
+ "#208", /* 208 = nosys */
+ "#209", /* 209 = nosys */
+ "#210", /* 210 = nosys */
+};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
new file mode 100644
index 0000000..1b8de14
--- /dev/null
+++ b/sys/kern/syscalls.master
@@ -0,0 +1,276 @@
+ @(#)syscalls.master 8.2 (Berkeley) 1/13/94
+; System call name/number master file.
+; Processed to created init_sysent.c, syscalls.c and syscall.h.
+
+; Columns: number type nargs name altname/comments
+; number system call number, must be in order
+; type one of STD, OBSOL, UNIMPL, COMPAT
+; nargs number of arguments
+; name name of syscall routine
+; altname name of system call if different
+; for UNIMPL/OBSOL, name continues with comments
+
+; types:
+; STD always included
+; COMPAT included on COMPAT #ifdef
+; LIBCOMPAT included on COMPAT #ifdef, and placed in syscall.h
+; OBSOL obsolete, not included in system, only specifies name
+; UNIMPL not implemented, placeholder only
+
+; #ifdef's, etc. may be included, and are copied to the output files.
+
+; Reserved/unimplemented system calls in the range 0-150 inclusive
+; are reserved for use in future Berkeley releases.
+; Additional system calls implemented in vendor and other
+; redistributions should be placed in the reserved range at the end
+; of the current calls.
+
+0 STD 0 nosys syscall
+1 STD 1 exit
+2 STD 0 fork
+3 STD 3 read
+4 STD 3 write
+5 STD 3 open
+6 STD 1 close
+7 STD 4 wait4
+8 COMPAT 2 creat
+9 STD 2 link
+10 STD 1 unlink
+11 OBSOL 2 execv
+12 STD 1 chdir
+13 STD 1 fchdir
+14 STD 3 mknod
+15 STD 2 chmod
+16 STD 3 chown
+17 STD 1 obreak break
+18 STD 3 getfsstat
+19 COMPAT 3 lseek
+20 STD 0 getpid
+21 STD 4 mount
+22 STD 2 unmount
+23 STD 1 setuid
+24 STD 0 getuid
+25 STD 0 geteuid
+26 STD 4 ptrace
+27 STD 3 recvmsg
+28 STD 3 sendmsg
+29 STD 6 recvfrom
+30 STD 3 accept
+31 STD 3 getpeername
+32 STD 3 getsockname
+33 STD 2 access
+34 STD 2 chflags
+35 STD 2 fchflags
+36 STD 0 sync
+37 STD 2 kill
+38 COMPAT 2 stat
+39 STD 0 getppid
+40 COMPAT 2 lstat
+41 STD 2 dup
+42 STD 0 pipe
+43 STD 0 getegid
+44 STD 4 profil
+#ifdef KTRACE
+45 STD 4 ktrace
+#else
+45 UNIMPL 0 ktrace
+#endif
+46 STD 3 sigaction
+47 STD 0 getgid
+48 STD 2 sigprocmask
+49 STD 2 getlogin
+50 STD 1 setlogin
+51 STD 1 acct
+52 STD 0 sigpending
+53 STD 2 sigaltstack
+54 STD 3 ioctl
+55 STD 1 reboot
+56 STD 1 revoke
+57 STD 2 symlink
+58 STD 3 readlink
+59 STD 3 execve
+60 STD 1 umask
+61 STD 1 chroot
+62 COMPAT 2 fstat
+63 COMPAT 4 getkerninfo
+64 COMPAT 0 getpagesize
+65 STD 2 msync
+66 STD 0 vfork
+67 OBSOL 0 vread
+68 OBSOL 0 vwrite
+69 STD 1 sbrk
+70 STD 1 sstk
+71 COMPAT 7 mmap
+72 STD 1 ovadvise vadvise
+73 STD 2 munmap
+74 STD 3 mprotect
+75 STD 3 madvise
+76 OBSOL 0 vhangup
+77 OBSOL 0 vlimit
+78 STD 3 mincore
+79 STD 2 getgroups
+80 STD 2 setgroups
+81 STD 0 getpgrp
+82 STD 2 setpgid
+83 STD 3 setitimer
+84 COMPAT 0 wait
+85 STD 1 swapon
+86 STD 2 getitimer
+87 COMPAT 2 gethostname
+88 COMPAT 2 sethostname
+89 STD 0 getdtablesize
+90 STD 2 dup2
+91 UNIMPL 2 getdopt
+92 STD 3 fcntl
+93 STD 5 select
+94 UNIMPL 2 setdopt
+95 STD 1 fsync
+96 STD 3 setpriority
+97 STD 3 socket
+98 STD 3 connect
+99 COMPAT 3 accept
+100 STD 2 getpriority
+101 COMPAT 4 send
+102 COMPAT 4 recv
+103 STD 1 sigreturn
+104 STD 3 bind
+105 STD 5 setsockopt
+106 STD 2 listen
+107 OBSOL 0 vtimes
+108 COMPAT 3 sigvec
+109 COMPAT 1 sigblock
+110 COMPAT 1 sigsetmask
+111 STD 1 sigsuspend
+112 COMPAT 2 sigstack
+113 COMPAT 3 recvmsg
+114 COMPAT 3 sendmsg
+#ifdef TRACE
+115 STD 2 vtrace
+#else
+115 OBSOL 2 vtrace
+#endif
+116 STD 2 gettimeofday
+117 STD 2 getrusage
+118 STD 5 getsockopt
+#ifdef vax
+119 STD 1 resuba
+#else
+119 UNIMPL 0 nosys
+#endif
+120 STD 3 readv
+121 STD 3 writev
+122 STD 2 settimeofday
+123 STD 3 fchown
+124 STD 2 fchmod
+125 COMPAT 6 recvfrom
+126 COMPAT 2 setreuid
+127 COMPAT 2 setregid
+128 STD 2 rename
+129 COMPAT 2 truncate
+130 COMPAT 2 ftruncate
+131 STD 2 flock
+132 STD 2 mkfifo
+133 STD 6 sendto
+134 STD 2 shutdown
+135 STD 5 socketpair
+136 STD 2 mkdir
+137 STD 1 rmdir
+138 STD 2 utimes
+139 OBSOL 0 4.2 sigreturn
+140 STD 2 adjtime
+141 COMPAT 3 getpeername
+142 COMPAT 0 gethostid
+143 COMPAT 1 sethostid
+144 COMPAT 2 getrlimit
+145 COMPAT 2 setrlimit
+146 COMPAT 2 killpg
+147 STD 0 setsid
+148 STD 4 quotactl
+149 COMPAT 4 quota
+150 COMPAT 3 getsockname
+
+; Syscalls 151-180 inclusive are reserved for vendor-specific
+; system calls. (This includes various calls added for compatibity
+; with other Unix variants.)
+; Some of these calls are now supported by BSD...
+151 UNIMPL 0 nosys
+152 UNIMPL 0 nosys
+153 UNIMPL 0 nosys
+154 UNIMPL 0 nosys
+#ifdef NFS
+155 STD 2 nfssvc
+#else
+155 UNIMPL 0 nosys
+#endif
+156 COMPAT 4 getdirentries
+157 STD 2 statfs
+158 STD 2 fstatfs
+159 UNIMPL 0 nosys
+160 UNIMPL 0 nosys
+#ifdef NFS
+161 STD 2 getfh
+#else
+161 UNIMPL 0 nosys
+#endif
+162 UNIMPL 0 nosys
+163 UNIMPL 0 nosys
+164 UNIMPL 0 nosys
+165 UNIMPL 0 nosys
+166 UNIMPL 0 nosys
+167 UNIMPL 0 nosys
+168 UNIMPL 0 nosys
+169 UNIMPL 0 nosys
+170 UNIMPL 0 nosys
+#ifdef SYSVSHM
+171 STD 4 shmsys
+#else
+171 UNIMPL 0 nosys
+#endif
+172 UNIMPL 0 nosys
+173 UNIMPL 0 nosys
+174 UNIMPL 0 nosys
+175 UNIMPL 0 nosys
+176 UNIMPL 0 nosys
+177 UNIMPL 0 nosys
+178 UNIMPL 0 nosys
+179 UNIMPL 0 nosys
+180 UNIMPL 0 nosys
+
+; Syscalls 180-199 are used by/reserved for BSD
+181 STD 1 setgid
+182 STD 1 setegid
+183 STD 1 seteuid
+#ifdef LFS
+184 STD 3 lfs_bmapv
+185 STD 3 lfs_markv
+186 STD 2 lfs_segclean
+187 STD 2 lfs_segwait
+#else
+184 UNIMPL 0 nosys
+185 UNIMPL 0 nosys
+186 UNIMPL 0 nosys
+187 UNIMPL 0 nosys
+#endif
+188 STD 2 stat
+189 STD 2 fstat
+190 STD 2 lstat
+191 STD 2 pathconf
+192 STD 2 fpathconf
+193 UNIMPL 0 nosys
+194 STD 2 getrlimit
+195 STD 2 setrlimit
+196 STD 4 getdirentries
+197 STD 8 mmap
+198 STD 0 nosys __syscall
+199 STD 5 lseek
+200 STD 4 truncate
+201 STD 4 ftruncate
+202 STD 6 __sysctl
+203 STD 2 mlock
+204 STD 2 munlock
+205 UNIMPL 0 nosys
+206 UNIMPL 0 nosys
+207 UNIMPL 0 nosys
+208 UNIMPL 0 nosys
+209 UNIMPL 0 nosys
+210 UNIMPL 0 nosys
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
new file mode 100644
index 0000000..6cc7be2
--- /dev/null
+++ b/sys/kern/tty.c
@@ -0,0 +1,1923 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty.c 8.8 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#define TTYDEFCHARS
+#include <sys/tty.h>
+#undef TTYDEFCHARS
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/dkstat.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+
+static int proc_compare __P((struct proc *p1, struct proc *p2));
+static int ttnread __P((struct tty *));
+static void ttyblock __P((struct tty *tp));
+static void ttyecho __P((int, struct tty *tp));
+static void ttyrubo __P((struct tty *, int));
+
+/* Symbolic sleep message strings. */
+char ttclos[] = "ttycls";
+char ttopen[] = "ttyopn";
+char ttybg[] = "ttybg";
+char ttybuf[] = "ttybuf";
+char ttyin[] = "ttyin";
+char ttyout[] = "ttyout";
+
+/*
+ * Table with character classes and parity. The 8th bit indicates parity,
+ * the 7th bit indicates the character is an alphameric or underscore (for
+ * ALTWERASE), and the low 6 bits indicate delay type. If the low 6 bits
+ * are 0 then the character needs no special processing on output; classes
+ * other than 0 might be translated or (not currently) require delays.
+ */
+#define E 0x00 /* Even parity. */
+#define O 0x80 /* Odd parity. */
+#define PARITY(c) (char_type[c] & O)
+
+#define ALPHA 0x40 /* Alpha or underscore. */
+#define ISALPHA(c) (char_type[(c) & TTY_CHARMASK] & ALPHA)
+
+#define CCLASSMASK 0x3f
+#define CCLASS(c) (char_type[c] & CCLASSMASK)
+
+#define BS BACKSPACE
+#define CC CONTROL
+#define CR RETURN
+#define NA ORDINARY | ALPHA
+#define NL NEWLINE
+#define NO ORDINARY
+#define TB TAB
+#define VT VTAB
+
+char const char_type[] = {
+ E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* nul - bel */
+ O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */
+ O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */
+ E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */
+ O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */
+ E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */
+ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */
+ O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */
+ O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */
+ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */
+ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */
+ O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */
+ E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */
+ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */
+ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */
+ E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */
+ /*
+ * Meta chars; should be settable per character set;
+ * for now, treat them all as normal characters.
+ */
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+};
+#undef BS
+#undef CC
+#undef CR
+#undef NA
+#undef NL
+#undef NO
+#undef TB
+#undef VT
+
+/* Macros to clear/set/test flags. */
+#define SET(t, f) (t) |= (f)
+#define CLR(t, f) (t) &= ~(f)
+#define ISSET(t, f) ((t) & (f))
+
+/*
+ * Initial open of tty, or (re)entry to standard tty line discipline.
+ */
+int
+ttyopen(device, tp)
+ dev_t device;
+ register struct tty *tp;
+{
+ int s;
+
+ s = spltty();
+ tp->t_dev = device;
+ if (!ISSET(tp->t_state, TS_ISOPEN)) {
+ SET(tp->t_state, TS_ISOPEN);
+ bzero(&tp->t_winsize, sizeof(tp->t_winsize));
+ }
+ CLR(tp->t_state, TS_WOPEN);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Handle close() on a tty line: flush and set to initial state,
+ * bumping generation number so that pending read/write calls
+ * can detect recycling of the tty.
+ */
+int
+ttyclose(tp)
+ register struct tty *tp;
+{
+ extern struct tty *constty; /* Temporary virtual console. */
+
+ if (constty == tp)
+ constty = NULL;
+
+ ttyflush(tp, FREAD | FWRITE);
+
+ tp->t_gen++;
+ tp->t_pgrp = NULL;
+ tp->t_session = NULL;
+ tp->t_state = 0;
+ return (0);
+}
+
+#define FLUSHQ(q) { \
+ if ((q)->c_cc) \
+ ndflush(q, (q)->c_cc); \
+}
+
+/* Is 'c' a line delimiter ("break" character)? */
+#define TTBREAKC(c) \
+ ((c) == '\n' || ((c) == cc[VEOF] || \
+ (c) == cc[VEOL] || (c) == cc[VEOL2]) && (c) != _POSIX_VDISABLE)
+
+
+/*
+ * Process input of a single character received on a tty.
+ */
+int
+ttyinput(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register int iflag, lflag;
+ register u_char *cc;
+ int i, err;
+
+ /*
+ * If input is pending take it first.
+ */
+ lflag = tp->t_lflag;
+ if (ISSET(lflag, PENDIN))
+ ttypend(tp);
+ /*
+ * Gather stats.
+ */
+ if (ISSET(lflag, ICANON)) {
+ ++tk_cancc;
+ ++tp->t_cancc;
+ } else {
+ ++tk_rawcc;
+ ++tp->t_rawcc;
+ }
+ ++tk_nin;
+
+ /* Handle exceptional conditions (break, parity, framing). */
+ cc = tp->t_cc;
+ iflag = tp->t_iflag;
+ if (err = (ISSET(c, TTY_ERRORMASK))) {
+ CLR(c, TTY_ERRORMASK);
+ if (ISSET(err, TTY_FE) && !c) { /* Break. */
+ if (ISSET(iflag, IGNBRK))
+ goto endcase;
+ else if (ISSET(iflag, BRKINT) &&
+ ISSET(lflag, ISIG) &&
+ (cc[VINTR] != _POSIX_VDISABLE))
+ c = cc[VINTR];
+ else if (ISSET(iflag, PARMRK))
+ goto parmrk;
+ } else if (ISSET(err, TTY_PE) &&
+ ISSET(iflag, INPCK) || ISSET(err, TTY_FE)) {
+ if (ISSET(iflag, IGNPAR))
+ goto endcase;
+ else if (ISSET(iflag, PARMRK)) {
+parmrk: (void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
+ (void)putc(0 | TTY_QUOTE, &tp->t_rawq);
+ (void)putc(c | TTY_QUOTE, &tp->t_rawq);
+ goto endcase;
+ } else
+ c = 0;
+ }
+ }
+ /*
+ * In tandem mode, check high water mark.
+ */
+ if (ISSET(iflag, IXOFF))
+ ttyblock(tp);
+ if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP))
+ CLR(c, 0x80);
+ if (!ISSET(lflag, EXTPROC)) {
+ /*
+ * Check for literal nexting very first
+ */
+ if (ISSET(tp->t_state, TS_LNCH)) {
+ SET(c, TTY_QUOTE);
+ CLR(tp->t_state, TS_LNCH);
+ }
+ /*
+ * Scan for special characters. This code
+ * is really just a big case statement with
+ * non-constant cases. The bottom of the
+ * case statement is labeled ``endcase'', so goto
+ * it after a case match, or similar.
+ */
+
+ /*
+ * Control chars which aren't controlled
+ * by ICANON, ISIG, or IXON.
+ */
+ if (ISSET(lflag, IEXTEN)) {
+ if (CCEQ(cc[VLNEXT], c)) {
+ if (ISSET(lflag, ECHO)) {
+ if (ISSET(lflag, ECHOE)) {
+ (void)ttyoutput('^', tp);
+ (void)ttyoutput('\b', tp);
+ } else
+ ttyecho(c, tp);
+ }
+ SET(tp->t_state, TS_LNCH);
+ goto endcase;
+ }
+ if (CCEQ(cc[VDISCARD], c)) {
+ if (ISSET(lflag, FLUSHO))
+ CLR(tp->t_lflag, FLUSHO);
+ else {
+ ttyflush(tp, FWRITE);
+ ttyecho(c, tp);
+ if (tp->t_rawq.c_cc + tp->t_canq.c_cc)
+ ttyretype(tp);
+ SET(tp->t_lflag, FLUSHO);
+ }
+ goto startoutput;
+ }
+ }
+ /*
+ * Signals.
+ */
+ if (ISSET(lflag, ISIG)) {
+ if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) {
+ if (!ISSET(lflag, NOFLSH))
+ ttyflush(tp, FREAD | FWRITE);
+ ttyecho(c, tp);
+ pgsignal(tp->t_pgrp,
+ CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1);
+ goto endcase;
+ }
+ if (CCEQ(cc[VSUSP], c)) {
+ if (!ISSET(lflag, NOFLSH))
+ ttyflush(tp, FREAD);
+ ttyecho(c, tp);
+ pgsignal(tp->t_pgrp, SIGTSTP, 1);
+ goto endcase;
+ }
+ }
+ /*
+ * Handle start/stop characters.
+ */
+ if (ISSET(iflag, IXON)) {
+ if (CCEQ(cc[VSTOP], c)) {
+ if (!ISSET(tp->t_state, TS_TTSTOP)) {
+ SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, 0);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp,
+ 0);
+#endif
+ return (0);
+ }
+ if (!CCEQ(cc[VSTART], c))
+ return (0);
+ /*
+ * if VSTART == VSTOP then toggle
+ */
+ goto endcase;
+ }
+ if (CCEQ(cc[VSTART], c))
+ goto restartoutput;
+ }
+ /*
+ * IGNCR, ICRNL, & INLCR
+ */
+ if (c == '\r') {
+ if (ISSET(iflag, IGNCR))
+ goto endcase;
+ else if (ISSET(iflag, ICRNL))
+ c = '\n';
+ } else if (c == '\n' && ISSET(iflag, INLCR))
+ c = '\r';
+ }
+ if (!ISSET(tp->t_lflag, EXTPROC) && ISSET(lflag, ICANON)) {
+ /*
+ * From here on down canonical mode character
+ * processing takes place.
+ */
+ /*
+ * erase (^H / ^?)
+ */
+ if (CCEQ(cc[VERASE], c)) {
+ if (tp->t_rawq.c_cc)
+ ttyrub(unputc(&tp->t_rawq), tp);
+ goto endcase;
+ }
+ /*
+ * kill (^U)
+ */
+ if (CCEQ(cc[VKILL], c)) {
+ if (ISSET(lflag, ECHOKE) &&
+ tp->t_rawq.c_cc == tp->t_rocount &&
+ !ISSET(lflag, ECHOPRT))
+ while (tp->t_rawq.c_cc)
+ ttyrub(unputc(&tp->t_rawq), tp);
+ else {
+ ttyecho(c, tp);
+ if (ISSET(lflag, ECHOK) ||
+ ISSET(lflag, ECHOKE))
+ ttyecho('\n', tp);
+ FLUSHQ(&tp->t_rawq);
+ tp->t_rocount = 0;
+ }
+ CLR(tp->t_state, TS_LOCAL);
+ goto endcase;
+ }
+ /*
+ * word erase (^W)
+ */
+ if (CCEQ(cc[VWERASE], c)) {
+ int alt = ISSET(lflag, ALTWERASE);
+ int ctype;
+
+ /*
+ * erase whitespace
+ */
+ while ((c = unputc(&tp->t_rawq)) == ' ' || c == '\t')
+ ttyrub(c, tp);
+ if (c == -1)
+ goto endcase;
+ /*
+ * erase last char of word and remember the
+ * next chars type (for ALTWERASE)
+ */
+ ttyrub(c, tp);
+ c = unputc(&tp->t_rawq);
+ if (c == -1)
+ goto endcase;
+ if (c == ' ' || c == '\t') {
+ (void)putc(c, &tp->t_rawq);
+ goto endcase;
+ }
+ ctype = ISALPHA(c);
+ /*
+ * erase rest of word
+ */
+ do {
+ ttyrub(c, tp);
+ c = unputc(&tp->t_rawq);
+ if (c == -1)
+ goto endcase;
+ } while (c != ' ' && c != '\t' &&
+ (alt == 0 || ISALPHA(c) == ctype));
+ (void)putc(c, &tp->t_rawq);
+ goto endcase;
+ }
+ /*
+ * reprint line (^R)
+ */
+ if (CCEQ(cc[VREPRINT], c)) {
+ ttyretype(tp);
+ goto endcase;
+ }
+ /*
+ * ^T - kernel info and generate SIGINFO
+ */
+ if (CCEQ(cc[VSTATUS], c)) {
+ if (ISSET(lflag, ISIG))
+ pgsignal(tp->t_pgrp, SIGINFO, 1);
+ if (!ISSET(lflag, NOKERNINFO))
+ ttyinfo(tp);
+ goto endcase;
+ }
+ }
+ /*
+ * Check for input buffer overflow
+ */
+ if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= TTYHOG) {
+ if (ISSET(iflag, IMAXBEL)) {
+ if (tp->t_outq.c_cc < tp->t_hiwat)
+ (void)ttyoutput(CTRL('g'), tp);
+ } else
+ ttyflush(tp, FREAD | FWRITE);
+ goto endcase;
+ }
+ /*
+ * Put data char in q for user and
+ * wakeup on seeing a line delimiter.
+ */
+ if (putc(c, &tp->t_rawq) >= 0) {
+ if (!ISSET(lflag, ICANON)) {
+ ttwakeup(tp);
+ ttyecho(c, tp);
+ goto endcase;
+ }
+ if (TTBREAKC(c)) {
+ tp->t_rocount = 0;
+ catq(&tp->t_rawq, &tp->t_canq);
+ ttwakeup(tp);
+ } else if (tp->t_rocount++ == 0)
+ tp->t_rocol = tp->t_column;
+ if (ISSET(tp->t_state, TS_ERASE)) {
+ /*
+ * end of prterase \.../
+ */
+ CLR(tp->t_state, TS_ERASE);
+ (void)ttyoutput('/', tp);
+ }
+ i = tp->t_column;
+ ttyecho(c, tp);
+ if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) {
+ /*
+ * Place the cursor over the '^' of the ^D.
+ */
+ i = min(2, tp->t_column - i);
+ while (i > 0) {
+ (void)ttyoutput('\b', tp);
+ i--;
+ }
+ }
+ }
+endcase:
+ /*
+ * IXANY means allow any character to restart output.
+ */
+ if (ISSET(tp->t_state, TS_TTSTOP) &&
+ !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP])
+ return (0);
+restartoutput:
+ CLR(tp->t_lflag, FLUSHO);
+ CLR(tp->t_state, TS_TTSTOP);
+startoutput:
+ return (ttstart(tp));
+}
+
+/*
+ * Output a single character on a tty, doing output processing
+ * as needed (expanding tabs, newline processing, etc.).
+ * Returns < 0 if succeeds, otherwise returns char to resend.
+ * Must be recursive.
+ */
+int
+ttyoutput(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register long oflag;
+ register int col, s;
+
+ oflag = tp->t_oflag;
+ if (!ISSET(oflag, OPOST)) {
+ if (ISSET(tp->t_lflag, FLUSHO))
+ return (-1);
+ if (putc(c, &tp->t_outq))
+ return (c);
+ tk_nout++;
+ tp->t_outcc++;
+ return (-1);
+ }
+ /*
+ * Do tab expansion if OXTABS is set. Special case if we external
+ * processing, we don't do the tab expansion because we'll probably
+ * get it wrong. If tab expansion needs to be done, let it happen
+ * externally.
+ */
+ CLR(c, ~TTY_CHARMASK);
+ if (c == '\t' &&
+ ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) {
+ c = 8 - (tp->t_column & 7);
+ if (!ISSET(tp->t_lflag, FLUSHO)) {
+ s = spltty(); /* Don't interrupt tabs. */
+ c -= b_to_q(" ", c, &tp->t_outq);
+ tk_nout += c;
+ tp->t_outcc += c;
+ splx(s);
+ }
+ tp->t_column += c;
+ return (c ? -1 : '\t');
+ }
+ if (c == CEOT && ISSET(oflag, ONOEOT))
+ return (-1);
+
+ /*
+ * Newline translation: if ONLCR is set,
+ * translate newline into "\r\n".
+ */
+ if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) {
+ tk_nout++;
+ tp->t_outcc++;
+ if (putc('\r', &tp->t_outq))
+ return (c);
+ }
+ tk_nout++;
+ tp->t_outcc++;
+ if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq))
+ return (c);
+
+ col = tp->t_column;
+ switch (CCLASS(c)) {
+ case BACKSPACE:
+ if (col > 0)
+ --col;
+ break;
+ case CONTROL:
+ break;
+ case NEWLINE:
+ case RETURN:
+ col = 0;
+ break;
+ case ORDINARY:
+ ++col;
+ break;
+ case TAB:
+ col = (col + 8) & ~7;
+ break;
+ }
+ tp->t_column = col;
+ return (-1);
+}
+
+/*
+ * Ioctls for all tty devices. Called after line-discipline specific ioctl
+ * has been called to do discipline-specific functions and/or reject any
+ * of these ioctl commands.
+ */
+/* ARGSUSED */
+int
+ttioctl(tp, cmd, data, flag)
+ register struct tty *tp;
+ int cmd, flag;
+ void *data;
+{
+ extern struct tty *constty; /* Temporary virtual console. */
+ extern int nlinesw;
+ register struct proc *p;
+ int s, error;
+
+ p = curproc; /* XXX */
+
+ /* If the ioctl involves modification, hang if in the background. */
+ switch (cmd) {
+ case TIOCFLUSH:
+ case TIOCSETA:
+ case TIOCSETD:
+ case TIOCSETAF:
+ case TIOCSETAW:
+#ifdef notdef
+ case TIOCSPGRP:
+#endif
+ case TIOCSTI:
+ case TIOCSWINSZ:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ case TIOCLBIC:
+ case TIOCLBIS:
+ case TIOCLSET:
+ case TIOCSETC:
+ case OTIOCSETD:
+ case TIOCSETN:
+ case TIOCSETP:
+ case TIOCSLTC:
+#endif
+ while (isbackground(curproc, tp) &&
+ p->p_pgrp->pg_jobc && (p->p_flag & P_PPWAIT) == 0 &&
+ (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+ (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
+ pgsignal(p->p_pgrp, SIGTTOU, 1);
+ if (error = ttysleep(tp,
+ &lbolt, TTOPRI | PCATCH, ttybg, 0))
+ return (error);
+ }
+ break;
+ }
+
+ switch (cmd) { /* Process the ioctl. */
+ case FIOASYNC: /* set/clear async i/o */
+ s = spltty();
+ if (*(int *)data)
+ SET(tp->t_state, TS_ASYNC);
+ else
+ CLR(tp->t_state, TS_ASYNC);
+ splx(s);
+ break;
+ case FIONBIO: /* set/clear non-blocking i/o */
+ break; /* XXX: delete. */
+ case FIONREAD: /* get # bytes to read */
+ *(int *)data = ttnread(tp);
+ break;
+ case TIOCEXCL: /* set exclusive use of tty */
+ s = spltty();
+ SET(tp->t_state, TS_XCLUDE);
+ splx(s);
+ break;
+ case TIOCFLUSH: { /* flush buffers */
+ register int flags = *(int *)data;
+
+ if (flags == 0)
+ flags = FREAD | FWRITE;
+ else
+ flags &= FREAD | FWRITE;
+ ttyflush(tp, flags);
+ break;
+ }
+ case TIOCCONS: /* become virtual console */
+ if (*(int *)data) {
+ if (constty && constty != tp &&
+ ISSET(constty->t_state, TS_CARR_ON | TS_ISOPEN) ==
+ (TS_CARR_ON | TS_ISOPEN))
+ return (EBUSY);
+#ifndef UCONSOLE
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+#endif
+ constty = tp;
+ } else if (tp == constty)
+ constty = NULL;
+ break;
+ case TIOCDRAIN: /* wait till output drained */
+ if (error = ttywait(tp))
+ return (error);
+ break;
+ case TIOCGETA: { /* get termios struct */
+ struct termios *t = (struct termios *)data;
+
+ bcopy(&tp->t_termios, t, sizeof(struct termios));
+ break;
+ }
+ case TIOCGETD: /* get line discipline */
+ *(int *)data = tp->t_line;
+ break;
+ case TIOCGWINSZ: /* get window size */
+ *(struct winsize *)data = tp->t_winsize;
+ break;
+ case TIOCGPGRP: /* get pgrp of tty */
+ if (!isctty(p, tp))
+ return (ENOTTY);
+ *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+ break;
+#ifdef TIOCHPCL
+ case TIOCHPCL: /* hang up on last close */
+ s = spltty();
+ SET(tp->t_cflag, HUPCL);
+ splx(s);
+ break;
+#endif
+ case TIOCNXCL: /* reset exclusive use of tty */
+ s = spltty();
+ CLR(tp->t_state, TS_XCLUDE);
+ splx(s);
+ break;
+ case TIOCOUTQ: /* output queue size */
+ *(int *)data = tp->t_outq.c_cc;
+ break;
+ case TIOCSETA: /* set termios struct */
+ case TIOCSETAW: /* drain output, set */
+ case TIOCSETAF: { /* drn out, fls in, set */
+ register struct termios *t = (struct termios *)data;
+
+ s = spltty();
+ if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
+ if (error = ttywait(tp)) {
+ splx(s);
+ return (error);
+ }
+ if (cmd == TIOCSETAF)
+ ttyflush(tp, FREAD);
+ }
+ if (!ISSET(t->c_cflag, CIGNORE)) {
+ /*
+ * Set device hardware.
+ */
+ if (tp->t_param && (error = (*tp->t_param)(tp, t))) {
+ splx(s);
+ return (error);
+ } else {
+ if (!ISSET(tp->t_state, TS_CARR_ON) &&
+ ISSET(tp->t_cflag, CLOCAL) &&
+ !ISSET(t->c_cflag, CLOCAL)) {
+ CLR(tp->t_state, TS_ISOPEN);
+ SET(tp->t_state, TS_WOPEN);
+ ttwakeup(tp);
+ }
+ tp->t_cflag = t->c_cflag;
+ tp->t_ispeed = t->c_ispeed;
+ tp->t_ospeed = t->c_ospeed;
+ }
+ ttsetwater(tp);
+ }
+ if (cmd != TIOCSETAF) {
+ if (ISSET(t->c_lflag, ICANON) !=
+ ISSET(tp->t_lflag, ICANON))
+ if (ISSET(t->c_lflag, ICANON)) {
+ SET(tp->t_lflag, PENDIN);
+ ttwakeup(tp);
+ } else {
+ struct clist tq;
+
+ catq(&tp->t_rawq, &tp->t_canq);
+ tq = tp->t_rawq;
+ tp->t_rawq = tp->t_canq;
+ tp->t_canq = tq;
+ CLR(tp->t_lflag, PENDIN);
+ }
+ }
+ tp->t_iflag = t->c_iflag;
+ tp->t_oflag = t->c_oflag;
+ /*
+ * Make the EXTPROC bit read only.
+ */
+ if (ISSET(tp->t_lflag, EXTPROC))
+ SET(t->c_lflag, EXTPROC);
+ else
+ CLR(t->c_lflag, EXTPROC);
+ tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN);
+ bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc));
+ splx(s);
+ break;
+ }
+ case TIOCSETD: { /* set line discipline */
+ register int t = *(int *)data;
+ dev_t device = tp->t_dev;
+
+ if ((u_int)t >= nlinesw)
+ return (ENXIO);
+ if (t != tp->t_line) {
+ s = spltty();
+ (*linesw[tp->t_line].l_close)(tp, flag);
+ error = (*linesw[t].l_open)(device, tp);
+ if (error) {
+ (void)(*linesw[tp->t_line].l_open)(device, tp);
+ splx(s);
+ return (error);
+ }
+ tp->t_line = t;
+ splx(s);
+ }
+ break;
+ }
+ case TIOCSTART: /* start output, like ^Q */
+ s = spltty();
+ if (ISSET(tp->t_state, TS_TTSTOP) ||
+ ISSET(tp->t_lflag, FLUSHO)) {
+ CLR(tp->t_lflag, FLUSHO);
+ CLR(tp->t_state, TS_TTSTOP);
+ ttstart(tp);
+ }
+ splx(s);
+ break;
+ case TIOCSTI: /* simulate terminal input */
+ if (p->p_ucred->cr_uid && (flag & FREAD) == 0)
+ return (EPERM);
+ if (p->p_ucred->cr_uid && !isctty(p, tp))
+ return (EACCES);
+ (*linesw[tp->t_line].l_rint)(*(u_char *)data, tp);
+ break;
+ case TIOCSTOP: /* stop output, like ^S */
+ s = spltty();
+ if (!ISSET(tp->t_state, TS_TTSTOP)) {
+ SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, 0);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+ }
+ splx(s);
+ break;
+ case TIOCSCTTY: /* become controlling tty */
+ /* Session ctty vnode pointer set in vnode layer. */
+ if (!SESS_LEADER(p) ||
+ (p->p_session->s_ttyvp || tp->t_session) &&
+ (tp->t_session != p->p_session))
+ return (EPERM);
+ tp->t_session = p->p_session;
+ tp->t_pgrp = p->p_pgrp;
+ p->p_session->s_ttyp = tp;
+ p->p_flag |= P_CONTROLT;
+ break;
+ case TIOCSPGRP: { /* set pgrp of tty */
+ register struct pgrp *pgrp = pgfind(*(int *)data);
+
+ if (!isctty(p, tp))
+ return (ENOTTY);
+ else if (pgrp == NULL || pgrp->pg_session != p->p_session)
+ return (EPERM);
+ tp->t_pgrp = pgrp;
+ break;
+ }
+ case TIOCSWINSZ: /* set window size */
+ if (bcmp((caddr_t)&tp->t_winsize, data,
+ sizeof (struct winsize))) {
+ tp->t_winsize = *(struct winsize *)data;
+ pgsignal(tp->t_pgrp, SIGWINCH, 1);
+ }
+ break;
+ default:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ return (ttcompat(tp, cmd, data, flag));
+#else
+ return (-1);
+#endif
+ }
+ return (0);
+}
+
+int
+ttselect(device, rw, p)
+ dev_t device;
+ int rw;
+ struct proc *p;
+{
+ register struct tty *tp;
+ int nread, s;
+
+ tp = &cdevsw[major(device)].d_ttys[minor(device)];
+
+ s = spltty();
+ switch (rw) {
+ case FREAD:
+ nread = ttnread(tp);
+ if (nread > 0 || !ISSET(tp->t_cflag, CLOCAL) &&
+ !ISSET(tp->t_state, TS_CARR_ON))
+ goto win;
+ selrecord(p, &tp->t_rsel);
+ break;
+ case FWRITE:
+ if (tp->t_outq.c_cc <= tp->t_lowat) {
+win: splx(s);
+ return (1);
+ }
+ selrecord(p, &tp->t_wsel);
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+static int
+ttnread(tp)
+ struct tty *tp;
+{
+ int nread;
+
+ if (ISSET(tp->t_lflag, PENDIN))
+ ttypend(tp);
+ nread = tp->t_canq.c_cc;
+ if (!ISSET(tp->t_lflag, ICANON))
+ nread += tp->t_rawq.c_cc;
+ return (nread);
+}
+
+/*
+ * Wait for output to drain.
+ */
+int
+ttywait(tp)
+ register struct tty *tp;
+{
+ int error, s;
+
+ error = 0;
+ s = spltty();
+ while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) &&
+ (ISSET(tp->t_state, TS_CARR_ON) || ISSET(tp->t_cflag, CLOCAL))
+ && tp->t_oproc) {
+ (*tp->t_oproc)(tp);
+ SET(tp->t_state, TS_ASLEEP);
+ if (error = ttysleep(tp,
+ &tp->t_outq, TTOPRI | PCATCH, ttyout, 0))
+ break;
+ }
+ splx(s);
+ return (error);
+}
+
+/*
+ * Flush if successfully wait.
+ */
+int
+ttywflush(tp)
+ struct tty *tp;
+{
+ int error;
+
+ if ((error = ttywait(tp)) == 0)
+ ttyflush(tp, FREAD);
+ return (error);
+}
+
+/*
+ * Flush tty read and/or write queues, notifying anyone waiting.
+ */
+void
+ttyflush(tp, rw)
+ register struct tty *tp;
+ int rw;
+{
+ register int s;
+
+ s = spltty();
+ if (rw & FREAD) {
+ FLUSHQ(&tp->t_canq);
+ FLUSHQ(&tp->t_rawq);
+ tp->t_rocount = 0;
+ tp->t_rocol = 0;
+ CLR(tp->t_state, TS_LOCAL);
+ ttwakeup(tp);
+ }
+ if (rw & FWRITE) {
+ CLR(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, rw);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp, rw);
+#endif
+ FLUSHQ(&tp->t_outq);
+ wakeup((caddr_t)&tp->t_outq);
+ selwakeup(&tp->t_wsel);
+ }
+ splx(s);
+}
+
+/*
+ * Copy in the default termios characters.
+ */
+void
+ttychars(tp)
+ struct tty *tp;
+{
+
+ bcopy(ttydefchars, tp->t_cc, sizeof(ttydefchars));
+}
+
+/*
+ * Send stop character on input overflow.
+ */
+static void
+ttyblock(tp)
+ register struct tty *tp;
+{
+ register int total;
+
+ total = tp->t_rawq.c_cc + tp->t_canq.c_cc;
+ if (tp->t_rawq.c_cc > TTYHOG) {
+ ttyflush(tp, FREAD | FWRITE);
+ CLR(tp->t_state, TS_TBLOCK);
+ }
+ /*
+ * Block further input iff: current input > threshold
+ * AND input is available to user program.
+ */
+ if (total >= TTYHOG / 2 &&
+ !ISSET(tp->t_state, TS_TBLOCK) &&
+ !ISSET(tp->t_lflag, ICANON) || tp->t_canq.c_cc > 0 &&
+ tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
+ if (putc(tp->t_cc[VSTOP], &tp->t_outq) == 0) {
+ SET(tp->t_state, TS_TBLOCK);
+ ttstart(tp);
+ }
+ }
+}
+
+void
+ttrstrt(tp_arg)
+ void *tp_arg;
+{
+ struct tty *tp;
+ int s;
+
+#ifdef DIAGNOSTIC
+ if (tp_arg == NULL)
+ panic("ttrstrt");
+#endif
+ tp = tp_arg;
+ s = spltty();
+
+ CLR(tp->t_state, TS_TIMEOUT);
+ ttstart(tp);
+
+ splx(s);
+}
+
+int
+ttstart(tp)
+ struct tty *tp;
+{
+
+ if (tp->t_oproc != NULL) /* XXX: Kludge for pty. */
+ (*tp->t_oproc)(tp);
+ return (0);
+}
+
+/*
+ * "close" a line discipline
+ */
+int
+ttylclose(tp, flag)
+ struct tty *tp;
+ int flag;
+{
+
+ if (flag & IO_NDELAY)
+ ttyflush(tp, FREAD | FWRITE);
+ else
+ ttywflush(tp);
+ return (0);
+}
+
+/*
+ * Handle modem control transition on a tty.
+ * Flag indicates new state of carrier.
+ * Returns 0 if the line should be turned off, otherwise 1.
+ */
+int
+ttymodem(tp, flag)
+ register struct tty *tp;
+ int flag;
+{
+
+ if (!ISSET(tp->t_state, TS_WOPEN) && ISSET(tp->t_cflag, MDMBUF)) {
+ /*
+ * MDMBUF: do flow control according to carrier flag
+ */
+ if (flag) {
+ CLR(tp->t_state, TS_TTSTOP);
+ ttstart(tp);
+ } else if (!ISSET(tp->t_state, TS_TTSTOP)) {
+ SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, 0);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+ }
+ } else if (flag == 0) {
+ /*
+ * Lost carrier.
+ */
+ CLR(tp->t_state, TS_CARR_ON);
+ if (ISSET(tp->t_state, TS_ISOPEN) &&
+ !ISSET(tp->t_cflag, CLOCAL)) {
+ if (tp->t_session && tp->t_session->s_leader)
+ psignal(tp->t_session->s_leader, SIGHUP);
+ ttyflush(tp, FREAD | FWRITE);
+ return (0);
+ }
+ } else {
+ /*
+ * Carrier now on.
+ */
+ SET(tp->t_state, TS_CARR_ON);
+ ttwakeup(tp);
+ }
+ return (1);
+}
+
+/*
+ * Default modem control routine (for other line disciplines).
+ * Return argument flag, to turn off device on carrier drop.
+ */
+int
+nullmodem(tp, flag)
+ register struct tty *tp;
+ int flag;
+{
+
+ if (flag)
+ SET(tp->t_state, TS_CARR_ON);
+ else {
+ CLR(tp->t_state, TS_CARR_ON);
+ if (!ISSET(tp->t_cflag, CLOCAL)) {
+ if (tp->t_session && tp->t_session->s_leader)
+ psignal(tp->t_session->s_leader, SIGHUP);
+ return (0);
+ }
+ }
+ return (1);
+}
+
+/*
+ * Reinput pending characters after state switch
+ * call at spltty().
+ */
+void
+ttypend(tp)
+ register struct tty *tp;
+{
+ struct clist tq;
+ register c;
+
+ CLR(tp->t_lflag, PENDIN);
+ SET(tp->t_state, TS_TYPEN);
+ tq = tp->t_rawq;
+ tp->t_rawq.c_cc = 0;
+ tp->t_rawq.c_cf = tp->t_rawq.c_cl = 0;
+ while ((c = getc(&tq)) >= 0)
+ ttyinput(c, tp);
+ CLR(tp->t_state, TS_TYPEN);
+}
+
+/*
+ * Process a read call on a tty device.
+ */
+int
+ttread(tp, uio, flag)
+ register struct tty *tp;
+ struct uio *uio;
+ int flag;
+{
+ register struct clist *qp;
+ register int c;
+ register long lflag;
+ register u_char *cc = tp->t_cc;
+ register struct proc *p = curproc;
+ int s, first, error = 0;
+
+loop: lflag = tp->t_lflag;
+ s = spltty();
+ /*
+ * take pending input first
+ */
+ if (ISSET(lflag, PENDIN))
+ ttypend(tp);
+ splx(s);
+
+ /*
+ * Hang process if it's in the background.
+ */
+ if (isbackground(p, tp)) {
+ if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+ (p->p_sigmask & sigmask(SIGTTIN)) ||
+ p->p_flag & P_PPWAIT || p->p_pgrp->pg_jobc == 0)
+ return (EIO);
+ pgsignal(p->p_pgrp, SIGTTIN, 1);
+ if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+ return (error);
+ goto loop;
+ }
+
+ /*
+ * If canonical, use the canonical queue,
+ * else use the raw queue.
+ *
+ * (should get rid of clists...)
+ */
+ qp = ISSET(lflag, ICANON) ? &tp->t_canq : &tp->t_rawq;
+
+ /*
+ * If there is no input, sleep on rawq
+ * awaiting hardware receipt and notification.
+ * If we have data, we don't need to check for carrier.
+ */
+ s = spltty();
+ if (qp->c_cc <= 0) {
+ int carrier;
+
+ carrier = ISSET(tp->t_state, TS_CARR_ON) ||
+ ISSET(tp->t_cflag, CLOCAL);
+ if (!carrier && ISSET(tp->t_state, TS_ISOPEN)) {
+ splx(s);
+ return (0); /* EOF */
+ }
+ if (flag & IO_NDELAY) {
+ splx(s);
+ return (EWOULDBLOCK);
+ }
+ error = ttysleep(tp, &tp->t_rawq, TTIPRI | PCATCH,
+ carrier ? ttyin : ttopen, 0);
+ splx(s);
+ if (error)
+ return (error);
+ goto loop;
+ }
+ splx(s);
+
+ /*
+ * Input present, check for input mapping and processing.
+ */
+ first = 1;
+ while ((c = getc(qp)) >= 0) {
+ /*
+ * delayed suspend (^Y)
+ */
+ if (CCEQ(cc[VDSUSP], c) && ISSET(lflag, ISIG)) {
+ pgsignal(tp->t_pgrp, SIGTSTP, 1);
+ if (first) {
+ if (error = ttysleep(tp,
+ &lbolt, TTIPRI | PCATCH, ttybg, 0))
+ break;
+ goto loop;
+ }
+ break;
+ }
+ /*
+ * Interpret EOF only in canonical mode.
+ */
+ if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON))
+ break;
+ /*
+ * Give user character.
+ */
+ error = ureadc(c, uio);
+ if (error)
+ break;
+ if (uio->uio_resid == 0)
+ break;
+ /*
+ * In canonical mode check for a "break character"
+ * marking the end of a "line of input".
+ */
+ if (ISSET(lflag, ICANON) && TTBREAKC(c))
+ break;
+ first = 0;
+ }
+ /*
+ * Look to unblock output now that (presumably)
+ * the input queue has gone down.
+ */
+ s = spltty();
+ if (ISSET(tp->t_state, TS_TBLOCK) && tp->t_rawq.c_cc < TTYHOG/5) {
+ if (cc[VSTART] != _POSIX_VDISABLE &&
+ putc(cc[VSTART], &tp->t_outq) == 0) {
+ CLR(tp->t_state, TS_TBLOCK);
+ ttstart(tp);
+ }
+ }
+ splx(s);
+ return (error);
+}
+
+/*
+ * Check the output queue on tp for space for a kernel message (from uprintf
+ * or tprintf). Allow some space over the normal hiwater mark so we don't
+ * lose messages due to normal flow control, but don't let the tty run amok.
+ * Sleeps here are not interruptible, but we return prematurely if new signals
+ * arrive.
+ */
+int
+ttycheckoutq(tp, wait)
+ register struct tty *tp;
+ int wait;
+{
+ int hiwat, s, oldsig;
+
+ hiwat = tp->t_hiwat;
+ s = spltty();
+ oldsig = wait ? curproc->p_siglist : 0;
+ if (tp->t_outq.c_cc > hiwat + 200)
+ while (tp->t_outq.c_cc > hiwat) {
+ ttstart(tp);
+ if (wait == 0 || curproc->p_siglist != oldsig) {
+ splx(s);
+ return (0);
+ }
+ timeout((void (*)__P((void *)))wakeup,
+ (void *)&tp->t_outq, hz);
+ SET(tp->t_state, TS_ASLEEP);
+ sleep((caddr_t)&tp->t_outq, PZERO - 1);
+ }
+ splx(s);
+ return (1);
+}
+
+/*
+ * Process a write call on a tty device.
+ */
+int
+ttwrite(tp, uio, flag)
+ register struct tty *tp;
+ register struct uio *uio;
+ int flag;
+{
+ register char *cp;
+ register int cc, ce;
+ register struct proc *p;
+ int i, hiwat, cnt, error, s;
+ char obuf[OBUFSIZ];
+
+ hiwat = tp->t_hiwat;
+ cnt = uio->uio_resid;
+ error = 0;
+ cc = 0;
+loop:
+ s = spltty();
+ if (!ISSET(tp->t_state, TS_CARR_ON) &&
+ !ISSET(tp->t_cflag, CLOCAL)) {
+ if (ISSET(tp->t_state, TS_ISOPEN)) {
+ splx(s);
+ return (EIO);
+ } else if (flag & IO_NDELAY) {
+ splx(s);
+ error = EWOULDBLOCK;
+ goto out;
+ } else {
+ /* Sleep awaiting carrier. */
+ error = ttysleep(tp,
+ &tp->t_rawq, TTIPRI | PCATCH,ttopen, 0);
+ splx(s);
+ if (error)
+ goto out;
+ goto loop;
+ }
+ }
+ splx(s);
+ /*
+ * Hang the process if it's in the background.
+ */
+ p = curproc;
+ if (isbackground(p, tp) &&
+ ISSET(tp->t_lflag, TOSTOP) && (p->p_flag & P_PPWAIT) == 0 &&
+ (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+ (p->p_sigmask & sigmask(SIGTTOU)) == 0 &&
+ p->p_pgrp->pg_jobc) {
+ pgsignal(p->p_pgrp, SIGTTOU, 1);
+ if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+ goto out;
+ goto loop;
+ }
+ /*
+ * Process the user's data in at most OBUFSIZ chunks. Perform any
+ * output translation. Keep track of high water mark, sleep on
+ * overflow awaiting device aid in acquiring new space.
+ */
+ while (uio->uio_resid > 0 || cc > 0) {
+ if (ISSET(tp->t_lflag, FLUSHO)) {
+ uio->uio_resid = 0;
+ return (0);
+ }
+ if (tp->t_outq.c_cc > hiwat)
+ goto ovhiwat;
+ /*
+ * Grab a hunk of data from the user, unless we have some
+ * leftover from last time.
+ */
+ if (cc == 0) {
+ cc = min(uio->uio_resid, OBUFSIZ);
+ cp = obuf;
+ error = uiomove(cp, cc, uio);
+ if (error) {
+ cc = 0;
+ break;
+ }
+ }
+ /*
+ * If nothing fancy need be done, grab those characters we
+ * can handle without any of ttyoutput's processing and
+ * just transfer them to the output q. For those chars
+ * which require special processing (as indicated by the
+ * bits in char_type), call ttyoutput. After processing
+ * a hunk of data, look for FLUSHO so ^O's will take effect
+ * immediately.
+ */
+ while (cc > 0) {
+ if (!ISSET(tp->t_oflag, OPOST))
+ ce = cc;
+ else {
+ ce = cc - scanc((u_int)cc, (u_char *)cp,
+ (u_char *)char_type, CCLASSMASK);
+ /*
+ * If ce is zero, then we're processing
+ * a special character through ttyoutput.
+ */
+ if (ce == 0) {
+ tp->t_rocount = 0;
+ if (ttyoutput(*cp, tp) >= 0) {
+ /* No Clists, wait a bit. */
+ ttstart(tp);
+ if (error = ttysleep(tp, &lbolt,
+ TTOPRI | PCATCH, ttybuf, 0))
+ break;
+ goto loop;
+ }
+ cp++;
+ cc--;
+ if (ISSET(tp->t_lflag, FLUSHO) ||
+ tp->t_outq.c_cc > hiwat)
+ goto ovhiwat;
+ continue;
+ }
+ }
+ /*
+ * A bunch of normal characters have been found.
+ * Transfer them en masse to the output queue and
+ * continue processing at the top of the loop.
+ * If there are any further characters in this
+ * <= OBUFSIZ chunk, the first should be a character
+ * requiring special handling by ttyoutput.
+ */
+ tp->t_rocount = 0;
+ i = b_to_q(cp, ce, &tp->t_outq);
+ ce -= i;
+ tp->t_column += ce;
+ cp += ce, cc -= ce, tk_nout += ce;
+ tp->t_outcc += ce;
+ if (i > 0) {
+ /* No Clists, wait a bit. */
+ ttstart(tp);
+ if (error = ttysleep(tp,
+ &lbolt, TTOPRI | PCATCH, ttybuf, 0))
+ break;
+ goto loop;
+ }
+ if (ISSET(tp->t_lflag, FLUSHO) ||
+ tp->t_outq.c_cc > hiwat)
+ break;
+ }
+ ttstart(tp);
+ }
+out:
+ /*
+ * If cc is nonzero, we leave the uio structure inconsistent, as the
+ * offset and iov pointers have moved forward, but it doesn't matter
+ * (the call will either return short or restart with a new uio).
+ */
+ uio->uio_resid += cc;
+ return (error);
+
+ovhiwat:
+ ttstart(tp);
+ s = spltty();
+ /*
+ * This can only occur if FLUSHO is set in t_lflag,
+ * or if ttstart/oproc is synchronous (or very fast).
+ */
+ if (tp->t_outq.c_cc <= hiwat) {
+ splx(s);
+ goto loop;
+ }
+ if (flag & IO_NDELAY) {
+ splx(s);
+ uio->uio_resid += cc;
+ return (uio->uio_resid == cnt ? EWOULDBLOCK : 0);
+ }
+ SET(tp->t_state, TS_ASLEEP);
+ error = ttysleep(tp, &tp->t_outq, TTOPRI | PCATCH, ttyout, 0);
+ splx(s);
+ if (error)
+ goto out;
+ goto loop;
+}
+
+/*
+ * Rubout one character from the rawq of tp
+ * as cleanly as possible.
+ */
+void
+ttyrub(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register char *cp;
+ register int savecol;
+ int tabc, s;
+
+ if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC))
+ return;
+ CLR(tp->t_lflag, FLUSHO);
+ if (ISSET(tp->t_lflag, ECHOE)) {
+ if (tp->t_rocount == 0) {
+ /*
+ * Screwed by ttwrite; retype
+ */
+ ttyretype(tp);
+ return;
+ }
+ if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE))
+ ttyrubo(tp, 2);
+ else {
+ CLR(c, ~TTY_CHARMASK);
+ switch (CCLASS(c)) {
+ case ORDINARY:
+ ttyrubo(tp, 1);
+ break;
+ case BACKSPACE:
+ case CONTROL:
+ case NEWLINE:
+ case RETURN:
+ case VTAB:
+ if (ISSET(tp->t_lflag, ECHOCTL))
+ ttyrubo(tp, 2);
+ break;
+ case TAB:
+ if (tp->t_rocount < tp->t_rawq.c_cc) {
+ ttyretype(tp);
+ return;
+ }
+ s = spltty();
+ savecol = tp->t_column;
+ SET(tp->t_state, TS_CNTTB);
+ SET(tp->t_lflag, FLUSHO);
+ tp->t_column = tp->t_rocol;
+ cp = tp->t_rawq.c_cf;
+ if (cp)
+ tabc = *cp; /* XXX FIX NEXTC */
+ for (; cp; cp = nextc(&tp->t_rawq, cp, &tabc))
+ ttyecho(tabc, tp);
+ CLR(tp->t_lflag, FLUSHO);
+ CLR(tp->t_state, TS_CNTTB);
+ splx(s);
+
+ /* savecol will now be length of the tab. */
+ savecol -= tp->t_column;
+ tp->t_column += savecol;
+ if (savecol > 8)
+ savecol = 8; /* overflow screw */
+ while (--savecol >= 0)
+ (void)ttyoutput('\b', tp);
+ break;
+ default: /* XXX */
+#define PANICSTR "ttyrub: would panic c = %d, val = %d\n"
+ (void)printf(PANICSTR, c, CCLASS(c));
+#ifdef notdef
+ panic(PANICSTR, c, CCLASS(c));
+#endif
+ }
+ }
+ } else if (ISSET(tp->t_lflag, ECHOPRT)) {
+ if (!ISSET(tp->t_state, TS_ERASE)) {
+ SET(tp->t_state, TS_ERASE);
+ (void)ttyoutput('\\', tp);
+ }
+ ttyecho(c, tp);
+ } else
+ ttyecho(tp->t_cc[VERASE], tp);
+ --tp->t_rocount;
+}
+
+/*
+ * Back over cnt characters, erasing them.
+ */
+static void
+ttyrubo(tp, cnt)
+ register struct tty *tp;
+ int cnt;
+{
+
+ while (cnt-- > 0) {
+ (void)ttyoutput('\b', tp);
+ (void)ttyoutput(' ', tp);
+ (void)ttyoutput('\b', tp);
+ }
+}
+
+/*
+ * ttyretype --
+ * Reprint the rawq line. Note, it is assumed that c_cc has already
+ * been checked.
+ */
+void
+ttyretype(tp)
+ register struct tty *tp;
+{
+ register char *cp;
+ int s, c;
+
+ /* Echo the reprint character. */
+ if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE)
+ ttyecho(tp->t_cc[VREPRINT], tp);
+
+ (void)ttyoutput('\n', tp);
+
+ /*
+ * XXX
+ * FIX: NEXTC IS BROKEN - DOESN'T CHECK QUOTE
+ * BIT OF FIRST CHAR.
+ */
+ s = spltty();
+ for (cp = tp->t_canq.c_cf, c = (cp != NULL ? *cp : 0);
+ cp != NULL; cp = nextc(&tp->t_canq, cp, &c))
+ ttyecho(c, tp);
+ for (cp = tp->t_rawq.c_cf, c = (cp != NULL ? *cp : 0);
+ cp != NULL; cp = nextc(&tp->t_rawq, cp, &c))
+ ttyecho(c, tp);
+ CLR(tp->t_state, TS_ERASE);
+ splx(s);
+
+ tp->t_rocount = tp->t_rawq.c_cc;
+ tp->t_rocol = 0;
+}
+
+/*
+ * Echo a typed character to the terminal.
+ */
+static void
+ttyecho(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+
+ if (!ISSET(tp->t_state, TS_CNTTB))
+ CLR(tp->t_lflag, FLUSHO);
+ if ((!ISSET(tp->t_lflag, ECHO) &&
+ (!ISSET(tp->t_lflag, ECHONL) || c == '\n')) ||
+ ISSET(tp->t_lflag, EXTPROC))
+ return;
+ if (ISSET(tp->t_lflag, ECHOCTL) &&
+ (ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n' ||
+ ISSET(c, TTY_CHARMASK) == 0177)) {
+ (void)ttyoutput('^', tp);
+ CLR(c, ~TTY_CHARMASK);
+ if (c == 0177)
+ c = '?';
+ else
+ c += 'A' - 1;
+ }
+ (void)ttyoutput(c, tp);
+}
+
+/*
+ * Wake up any readers on a tty.
+ */
+void
+ttwakeup(tp)
+ register struct tty *tp;
+{
+
+ selwakeup(&tp->t_rsel);
+ if (ISSET(tp->t_state, TS_ASYNC))
+ pgsignal(tp->t_pgrp, SIGIO, 1);
+ wakeup((caddr_t)&tp->t_rawq);
+}
+
+/*
+ * Look up a code for a specified speed in a conversion table;
+ * used by drivers to map software speed values to hardware parameters.
+ */
+int
+ttspeedtab(speed, table)
+ int speed;
+ register struct speedtab *table;
+{
+
+ for ( ; table->sp_speed != -1; table++)
+ if (table->sp_speed == speed)
+ return (table->sp_code);
+ return (-1);
+}
+
+/*
+ * Set tty hi and low water marks.
+ *
+ * Try to arrange the dynamics so there's about one second
+ * from hi to low water.
+ *
+ */
+void
+ttsetwater(tp)
+ struct tty *tp;
+{
+ register int cps, x;
+
+#define CLAMP(x, h, l) ((x) > h ? h : ((x) < l) ? l : (x))
+
+ cps = tp->t_ospeed / 10;
+ tp->t_lowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT);
+ x += cps;
+ x = CLAMP(x, TTMAXHIWAT, TTMINHIWAT);
+ tp->t_hiwat = roundup(x, CBSIZE);
+#undef CLAMP
+}
+
+/*
+ * Report on state of foreground process group.
+ */
+void
+ttyinfo(tp)
+ register struct tty *tp;
+{
+ register struct proc *p, *pick;
+ struct timeval utime, stime;
+ int tmp;
+
+ if (ttycheckoutq(tp,0) == 0)
+ return;
+
+ /* Print load average. */
+ tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
+ ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100);
+
+ if (tp->t_session == NULL)
+ ttyprintf(tp, "not a controlling terminal\n");
+ else if (tp->t_pgrp == NULL)
+ ttyprintf(tp, "no foreground process group\n");
+ else if ((p = tp->t_pgrp->pg_mem) == NULL)
+ ttyprintf(tp, "empty foreground process group\n");
+ else {
+ /* Pick interesting process. */
+ for (pick = NULL; p != NULL; p = p->p_pgrpnxt)
+ if (proc_compare(pick, p))
+ pick = p;
+
+ ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid,
+ pick->p_stat == SRUN ? "running" :
+ pick->p_wmesg ? pick->p_wmesg : "iowait");
+
+ calcru(pick, &utime, &stime, NULL);
+
+ /* Print user time. */
+ ttyprintf(tp, "%d.%02du ",
+ utime.tv_sec, (utime.tv_usec + 5000) / 10000);
+
+ /* Print system time. */
+ ttyprintf(tp, "%d.%02ds ",
+ stime.tv_sec, (stime.tv_usec + 5000) / 10000);
+
+#define pgtok(a) (((a) * NBPG) / 1024)
+ /* Print percentage cpu, resident set size. */
+ tmp = pick->p_pctcpu * 10000 + FSCALE / 2 >> FSHIFT;
+ ttyprintf(tp, "%d%% %dk\n",
+ tmp / 100,
+ pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 :
+#ifdef pmap_resident_count
+ pgtok(pmap_resident_count(&pick->p_vmspace->vm_pmap))
+#else
+ pgtok(pick->p_vmspace->vm_rssize)
+#endif
+ );
+ }
+ tp->t_rocount = 0; /* so pending input will be retyped if BS */
+}
+
+/*
+ * Returns 1 if p2 is "better" than p1
+ *
+ * The algorithm for picking the "interesting" process is thus:
+ *
+ * 1) Only foreground processes are eligible - implied.
+ * 2) Runnable processes are favored over anything else. The runner
+ * with the highest cpu utilization is picked (p_estcpu). Ties are
+ * broken by picking the highest pid.
+ * 3) The sleeper with the shortest sleep time is next. With ties,
+ * we pick out just "short-term" sleepers (P_SINTR == 0).
+ * 4) Further ties are broken by picking the highest pid.
+ */
+#define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL))
+#define TESTAB(a, b) ((a)<<1 | (b))
+#define ONLYA 2
+#define ONLYB 1
+#define BOTH 3
+
+static int
+proc_compare(p1, p2)
+ register struct proc *p1, *p2;
+{
+
+ if (p1 == NULL)
+ return (1);
+ /*
+ * see if at least one of them is runnable
+ */
+ switch (TESTAB(ISRUN(p1), ISRUN(p2))) {
+ case ONLYA:
+ return (0);
+ case ONLYB:
+ return (1);
+ case BOTH:
+ /*
+ * tie - favor one with highest recent cpu utilization
+ */
+ if (p2->p_estcpu > p1->p_estcpu)
+ return (1);
+ if (p1->p_estcpu > p2->p_estcpu)
+ return (0);
+ return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+ }
+ /*
+ * weed out zombies
+ */
+ switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) {
+ case ONLYA:
+ return (1);
+ case ONLYB:
+ return (0);
+ case BOTH:
+ return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+ }
+ /*
+ * pick the one with the smallest sleep time
+ */
+ if (p2->p_slptime > p1->p_slptime)
+ return (0);
+ if (p1->p_slptime > p2->p_slptime)
+ return (1);
+ /*
+ * favor one sleeping in a non-interruptible sleep
+ */
+ if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0)
+ return (1);
+ if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0)
+ return (0);
+ return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+}
+
+/*
+ * Output char to tty; console putchar style.
+ */
+int
+tputchar(c, tp)
+ int c;
+ struct tty *tp;
+{
+ register int s;
+
+ s = spltty();
+ if (ISSET(tp->t_state,
+ TS_CARR_ON | TS_ISOPEN) != (TS_CARR_ON | TS_ISOPEN)) {
+ splx(s);
+ return (-1);
+ }
+ if (c == '\n')
+ (void)ttyoutput('\r', tp);
+ (void)ttyoutput(c, tp);
+ ttstart(tp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Sleep on chan, returning ERESTART if tty changed while we napped and
+ * returning any errors (e.g. EINTR/ETIMEDOUT) reported by tsleep. If
+ * the tty is revoked, restarting a pending call will redo validation done
+ * at the start of the call.
+ */
+int
+ttysleep(tp, chan, pri, wmesg, timo)
+ struct tty *tp;
+ void *chan;
+ int pri, timo;
+ char *wmesg;
+{
+ int error;
+ short gen;
+
+ gen = tp->t_gen;
+ if (error = tsleep(chan, pri, wmesg, timo))
+ return (error);
+ return (tp->t_gen == gen ? 0 : ERESTART);
+}
diff --git a/sys/kern/tty_compat.c b/sys/kern/tty_compat.c
new file mode 100644
index 0000000..a6a39d9
--- /dev/null
+++ b/sys/kern/tty_compat.c
@@ -0,0 +1,411 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_compat.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * mapping routines for old line discipline (yuck)
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/termios.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+int ttydebug = 0;
+
+static struct speedtab compatspeeds[] = {
+ { 38400, 15 },
+ { 19200, 14 },
+ { 9600, 13 },
+ { 4800, 12 },
+ { 2400, 11 },
+ { 1800, 10 },
+ { 1200, 9 },
+ { 600, 8 },
+ { 300, 7 },
+ { 200, 6 },
+ { 150, 5 },
+ { 134, 4 },
+ { 110, 3 },
+ { 75, 2 },
+ { 50, 1 },
+ { 0, 0 },
+ { -1, -1 },
+};
+static int compatspcodes[16] = {
+ 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200,
+ 1800, 2400, 4800, 9600, 19200, 38400,
+};
+
+/*ARGSUSED*/
+ttcompat(tp, com, data, flag)
+ register struct tty *tp;
+ int com;
+ caddr_t data;
+ int flag;
+{
+
+ switch (com) {
+ case TIOCGETP: {
+ register struct sgttyb *sg = (struct sgttyb *)data;
+ register u_char *cc = tp->t_cc;
+ register speed;
+
+ speed = ttspeedtab(tp->t_ospeed, compatspeeds);
+ sg->sg_ospeed = (speed == -1) ? 15 : speed;
+ if (tp->t_ispeed == 0)
+ sg->sg_ispeed = sg->sg_ospeed;
+ else {
+ speed = ttspeedtab(tp->t_ispeed, compatspeeds);
+ sg->sg_ispeed = (speed == -1) ? 15 : speed;
+ }
+ sg->sg_erase = cc[VERASE];
+ sg->sg_kill = cc[VKILL];
+ sg->sg_flags = ttcompatgetflags(tp);
+ break;
+ }
+
+ case TIOCSETP:
+ case TIOCSETN: {
+ register struct sgttyb *sg = (struct sgttyb *)data;
+ struct termios term;
+ int speed;
+
+ term = tp->t_termios;
+ if ((speed = sg->sg_ispeed) > 15 || speed < 0)
+ term.c_ispeed = speed;
+ else
+ term.c_ispeed = compatspcodes[speed];
+ if ((speed = sg->sg_ospeed) > 15 || speed < 0)
+ term.c_ospeed = speed;
+ else
+ term.c_ospeed = compatspcodes[speed];
+ term.c_cc[VERASE] = sg->sg_erase;
+ term.c_cc[VKILL] = sg->sg_kill;
+ tp->t_flags = tp->t_flags&0xffff0000 | sg->sg_flags&0xffff;
+ ttcompatsetflags(tp, &term);
+ return (ttioctl(tp, com == TIOCSETP ? TIOCSETAF : TIOCSETA,
+ &term, flag));
+ }
+
+ case TIOCGETC: {
+ struct tchars *tc = (struct tchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ tc->t_intrc = cc[VINTR];
+ tc->t_quitc = cc[VQUIT];
+ tc->t_startc = cc[VSTART];
+ tc->t_stopc = cc[VSTOP];
+ tc->t_eofc = cc[VEOF];
+ tc->t_brkc = cc[VEOL];
+ break;
+ }
+ case TIOCSETC: {
+ struct tchars *tc = (struct tchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ cc[VINTR] = tc->t_intrc;
+ cc[VQUIT] = tc->t_quitc;
+ cc[VSTART] = tc->t_startc;
+ cc[VSTOP] = tc->t_stopc;
+ cc[VEOF] = tc->t_eofc;
+ cc[VEOL] = tc->t_brkc;
+ if (tc->t_brkc == -1)
+ cc[VEOL2] = _POSIX_VDISABLE;
+ break;
+ }
+ case TIOCSLTC: {
+ struct ltchars *ltc = (struct ltchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ cc[VSUSP] = ltc->t_suspc;
+ cc[VDSUSP] = ltc->t_dsuspc;
+ cc[VREPRINT] = ltc->t_rprntc;
+ cc[VDISCARD] = ltc->t_flushc;
+ cc[VWERASE] = ltc->t_werasc;
+ cc[VLNEXT] = ltc->t_lnextc;
+ break;
+ }
+ case TIOCGLTC: {
+ struct ltchars *ltc = (struct ltchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ ltc->t_suspc = cc[VSUSP];
+ ltc->t_dsuspc = cc[VDSUSP];
+ ltc->t_rprntc = cc[VREPRINT];
+ ltc->t_flushc = cc[VDISCARD];
+ ltc->t_werasc = cc[VWERASE];
+ ltc->t_lnextc = cc[VLNEXT];
+ break;
+ }
+ case TIOCLBIS:
+ case TIOCLBIC:
+ case TIOCLSET: {
+ struct termios term;
+
+ term = tp->t_termios;
+ if (com == TIOCLSET)
+ tp->t_flags = (tp->t_flags&0xffff) | *(int *)data<<16;
+ else {
+ tp->t_flags =
+ (ttcompatgetflags(tp)&0xffff0000)|(tp->t_flags&0xffff);
+ if (com == TIOCLBIS)
+ tp->t_flags |= *(int *)data<<16;
+ else
+ tp->t_flags &= ~(*(int *)data<<16);
+ }
+ ttcompatsetlflags(tp, &term);
+ return (ttioctl(tp, TIOCSETA, &term, flag));
+ }
+ case TIOCLGET:
+ *(int *)data = ttcompatgetflags(tp)>>16;
+ if (ttydebug)
+ printf("CLGET: returning %x\n", *(int *)data);
+ break;
+
+ case OTIOCGETD:
+ *(int *)data = tp->t_line ? tp->t_line : 2;
+ break;
+
+ case OTIOCSETD: {
+ int ldisczero = 0;
+
+ return (ttioctl(tp, TIOCSETD,
+ *(int *)data == 2 ? (caddr_t)&ldisczero : data, flag));
+ }
+
+ case OTIOCCONS:
+ *(int *)data = 1;
+ return (ttioctl(tp, TIOCCONS, data, flag));
+
+ default:
+ return (-1);
+ }
+ return (0);
+}
+
+ttcompatgetflags(tp)
+ register struct tty *tp;
+{
+ register long iflag = tp->t_iflag;
+ register long lflag = tp->t_lflag;
+ register long oflag = tp->t_oflag;
+ register long cflag = tp->t_cflag;
+ register flags = 0;
+
+ if (iflag&IXOFF)
+ flags |= TANDEM;
+ if (iflag&ICRNL || oflag&ONLCR)
+ flags |= CRMOD;
+ if (cflag&PARENB) {
+ if (iflag&INPCK) {
+ if (cflag&PARODD)
+ flags |= ODDP;
+ else
+ flags |= EVENP;
+ } else
+ flags |= EVENP | ODDP;
+ } else {
+ if ((tp->t_flags&LITOUT) && !(oflag&OPOST))
+ flags |= LITOUT;
+ if (tp->t_flags&PASS8)
+ flags |= PASS8;
+ }
+
+ if ((lflag&ICANON) == 0) {
+ /* fudge */
+ if (iflag&IXON || lflag&ISIG || lflag&IEXTEN || cflag&PARENB)
+ flags |= CBREAK;
+ else
+ flags |= RAW;
+ }
+ if (cflag&MDMBUF)
+ flags |= MDMBUF;
+ if ((cflag&HUPCL) == 0)
+ flags |= NOHANG;
+ if (oflag&OXTABS)
+ flags |= XTABS;
+ if (lflag&ECHOE)
+ flags |= CRTERA|CRTBS;
+ if (lflag&ECHOKE)
+ flags |= CRTKIL|CRTBS;
+ if (lflag&ECHOPRT)
+ flags |= PRTERA;
+ if (lflag&ECHOCTL)
+ flags |= CTLECH;
+ if ((iflag&IXANY) == 0)
+ flags |= DECCTQ;
+ flags |= lflag&(ECHO|TOSTOP|FLUSHO|PENDIN|NOFLSH);
+if (ttydebug)
+ printf("getflags: %x\n", flags);
+ return (flags);
+}
+
+ttcompatsetflags(tp, t)
+ register struct tty *tp;
+ register struct termios *t;
+{
+ register flags = tp->t_flags;
+ register long iflag = t->c_iflag;
+ register long oflag = t->c_oflag;
+ register long lflag = t->c_lflag;
+ register long cflag = t->c_cflag;
+
+ if (flags & RAW) {
+ iflag &= IXOFF;
+ oflag &= ~OPOST;
+ lflag &= ~(ECHOCTL|ISIG|ICANON|IEXTEN);
+ } else {
+ iflag |= BRKINT|IXON|IMAXBEL;
+ oflag |= OPOST;
+ lflag |= ISIG|IEXTEN|ECHOCTL; /* XXX was echoctl on ? */
+ if (flags & XTABS)
+ oflag |= OXTABS;
+ else
+ oflag &= ~OXTABS;
+ if (flags & CBREAK)
+ lflag &= ~ICANON;
+ else
+ lflag |= ICANON;
+ if (flags&CRMOD) {
+ iflag |= ICRNL;
+ oflag |= ONLCR;
+ } else {
+ iflag &= ~ICRNL;
+ oflag &= ~ONLCR;
+ }
+ }
+ if (flags&ECHO)
+ lflag |= ECHO;
+ else
+ lflag &= ~ECHO;
+
+ if (flags&(RAW|LITOUT|PASS8)) {
+ cflag &= ~(CSIZE|PARENB);
+ cflag |= CS8;
+ if ((flags&(RAW|PASS8)) == 0)
+ iflag |= ISTRIP;
+ else
+ iflag &= ~ISTRIP;
+ } else {
+ cflag &= ~CSIZE;
+ cflag |= CS7|PARENB;
+ iflag |= ISTRIP;
+ }
+ if ((flags&(EVENP|ODDP)) == EVENP) {
+ iflag |= INPCK;
+ cflag &= ~PARODD;
+ } else if ((flags&(EVENP|ODDP)) == ODDP) {
+ iflag |= INPCK;
+ cflag |= PARODD;
+ } else
+ iflag &= ~INPCK;
+ if (flags&LITOUT)
+ oflag &= ~OPOST; /* move earlier ? */
+ if (flags&TANDEM)
+ iflag |= IXOFF;
+ else
+ iflag &= ~IXOFF;
+ t->c_iflag = iflag;
+ t->c_oflag = oflag;
+ t->c_lflag = lflag;
+ t->c_cflag = cflag;
+}
+
+ttcompatsetlflags(tp, t)
+ register struct tty *tp;
+ register struct termios *t;
+{
+ register flags = tp->t_flags;
+ register long iflag = t->c_iflag;
+ register long oflag = t->c_oflag;
+ register long lflag = t->c_lflag;
+ register long cflag = t->c_cflag;
+
+ if (flags&CRTERA)
+ lflag |= ECHOE;
+ else
+ lflag &= ~ECHOE;
+ if (flags&CRTKIL)
+ lflag |= ECHOKE;
+ else
+ lflag &= ~ECHOKE;
+ if (flags&PRTERA)
+ lflag |= ECHOPRT;
+ else
+ lflag &= ~ECHOPRT;
+ if (flags&CTLECH)
+ lflag |= ECHOCTL;
+ else
+ lflag &= ~ECHOCTL;
+ if ((flags&DECCTQ) == 0)
+ iflag |= IXANY;
+ else
+ iflag &= ~IXANY;
+ if (flags & MDMBUF)
+ cflag |= MDMBUF;
+ else
+ cflag &= ~MDMBUF;
+ if (flags&NOHANG)
+ cflag &= ~HUPCL;
+ else
+ cflag |= HUPCL;
+ lflag &= ~(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+ lflag |= flags&(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+ if (flags&(LITOUT|PASS8)) {
+ iflag &= ~ISTRIP;
+ cflag &= ~(CSIZE|PARENB);
+ cflag |= CS8;
+ if (flags&LITOUT)
+ oflag &= ~OPOST;
+ if ((flags&(PASS8|RAW)) == 0)
+ iflag |= ISTRIP;
+ } else if ((flags&RAW) == 0) {
+ cflag &= ~CSIZE;
+ cflag |= CS7|PARENB;
+ oflag |= OPOST;
+ }
+ t->c_iflag = iflag;
+ t->c_oflag = oflag;
+ t->c_lflag = lflag;
+ t->c_cflag = cflag;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
diff --git a/sys/kern/tty_conf.c b/sys/kern/tty_conf.c
new file mode 100644
index 0000000..b53edb42
--- /dev/null
+++ b/sys/kern/tty_conf.c
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_conf.c 8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+
+#define ttynodisc ((int (*) __P((dev_t, struct tty *)))enodev)
+#define ttyerrclose ((int (*) __P((struct tty *, int flags)))enodev)
+#define ttyerrio ((int (*) __P((struct tty *, struct uio *, int)))enodev)
+#define ttyerrinput ((int (*) __P((int c, struct tty *)))enodev)
+#define ttyerrstart ((int (*) __P((struct tty *)))enodev)
+
+int nullioctl __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+
+#include "tb.h"
+#if NTB > 0
+int tbopen __P((dev_t dev, struct tty *tp));
+int tbclose __P((struct tty *tp, int flags));
+int tbread __P((struct tty *, struct uio *, int flags));
+int tbioctl __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+int tbinput __P((int c, struct tty *tp));
+#endif
+
+#include "sl.h"
+#if NSL > 0
+int slopen __P((dev_t dev, struct tty *tp));
+int slclose __P((struct tty *tp, int flags));
+int sltioctl __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+int slinput __P((int c, struct tty *tp));
+int slstart __P((struct tty *tp));
+#endif
+
+
+struct linesw linesw[] =
+{
+ { ttyopen, ttylclose, ttread, ttwrite, nullioctl,
+ ttyinput, ttstart, ttymodem }, /* 0- termios */
+
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem }, /* 1- defunct */
+
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem }, /* 2- defunct */
+
+#if NTB > 0
+ { tbopen, tbclose, tbread, enodev, tbioctl,
+ tbinput, ttstart, nullmodem }, /* 3- TABLDISC */
+#else
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem },
+#endif
+
+#if NSL > 0
+ { slopen, slclose, ttyerrio, ttyerrio, sltioctl,
+ slinput, slstart, nullmodem }, /* 4- SLIPDISC */
+#else
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem },
+#endif
+};
+
+int nlinesw = sizeof (linesw) / sizeof (linesw[0]);
+
+/*
+ * Do nothing specific version of line
+ * discipline specific ioctl command.
+ */
+/*ARGSUSED*/
+nullioctl(tp, cmd, data, flags, p)
+ struct tty *tp;
+ int cmd;
+ char *data;
+ int flags;
+ struct proc *p;
+{
+
+#ifdef lint
+ tp = tp; data = data; flags = flags; p = p;
+#endif
+ return (-1);
+}
diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
new file mode 100644
index 0000000..0e6911b
--- /dev/null
+++ b/sys/kern/tty_pty.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_pty.c 8.2 (Berkeley) 9/23/93
+ */
+
+/*
+ * Pseudo-teletype Driver
+ * (Actually two drivers, requiring two entries in 'cdevsw')
+ */
+#include "pty.h" /* XXX */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+
+#if NPTY == 1
+#undef NPTY
+#define NPTY 32 /* crude XXX */
+#endif
+
+#define BUFSIZ 100 /* Chunk size iomoved to/from user */
+
+/*
+ * pts == /dev/tty[pqrs]?
+ * ptc == /dev/pty[pqrs]?
+ */
+struct tty pt_tty[NPTY]; /* XXX */
+struct pt_ioctl {
+ int pt_flags;
+ struct selinfo pt_selr, pt_selw;
+ u_char pt_send;
+ u_char pt_ucntl;
+} pt_ioctl[NPTY]; /* XXX */
+int npty = NPTY; /* for pstat -t */
+
+#define PF_PKT 0x08 /* packet mode */
+#define PF_STOPPED 0x10 /* user told stopped */
+#define PF_REMOTE 0x20 /* remote and flow controlled input */
+#define PF_NOSTOP 0x40
+#define PF_UCNTL 0x80 /* user control mode */
+
+void ptsstop __P((struct tty *, int));
+
+/*
+ * Establish n (or default if n is 1) ptys in the system.
+ *
+ * XXX cdevsw & pstat require the array `pty[]' to be an array
+ */
+void
+ptyattach(n)
+ int n;
+{
+#ifdef notyet
+ char *mem;
+ register u_long ntb;
+#define DEFAULT_NPTY 32
+
+ /* maybe should allow 0 => none? */
+ if (n <= 1)
+ n = DEFAULT_NPTY;
+ ntb = n * sizeof(struct tty);
+ mem = malloc(ntb + ALIGNBYTES + n * sizeof(struct pt_ioctl),
+ M_DEVBUF, M_WAITOK);
+ pt_tty = (struct tty *)mem;
+ mem = (char *)ALIGN(mem + ntb);
+ pt_ioctl = (struct pt_ioctl *)mem;
+ npty = n;
+#endif
+}
+
+/*ARGSUSED*/
+ptsopen(dev, flag, devtype, p)
+ dev_t dev;
+ int flag, devtype;
+ struct proc *p;
+{
+ register struct tty *tp;
+ int error;
+
+ if (minor(dev) >= npty)
+ return (ENXIO);
+ tp = &pt_tty[minor(dev)];
+ if ((tp->t_state & TS_ISOPEN) == 0) {
+ tp->t_state |= TS_WOPEN;
+ ttychars(tp); /* Set up default chars */
+ tp->t_iflag = TTYDEF_IFLAG;
+ tp->t_oflag = TTYDEF_OFLAG;
+ tp->t_lflag = TTYDEF_LFLAG;
+ tp->t_cflag = TTYDEF_CFLAG;
+ tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+ ttsetwater(tp); /* would be done in xxparam() */
+ } else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0)
+ return (EBUSY);
+ if (tp->t_oproc) /* Ctrlr still around. */
+ tp->t_state |= TS_CARR_ON;
+ while ((tp->t_state & TS_CARR_ON) == 0) {
+ tp->t_state |= TS_WOPEN;
+ if (flag&FNONBLOCK)
+ break;
+ if (error = ttysleep(tp, (caddr_t)&tp->t_rawq, TTIPRI | PCATCH,
+ ttopen, 0))
+ return (error);
+ }
+ error = (*linesw[tp->t_line].l_open)(dev, tp);
+ ptcwakeup(tp, FREAD|FWRITE);
+ return (error);
+}
+
+ptsclose(dev, flag, mode, p)
+ dev_t dev;
+ int flag, mode;
+ struct proc *p;
+{
+ register struct tty *tp;
+ int err;
+
+ tp = &pt_tty[minor(dev)];
+ err = (*linesw[tp->t_line].l_close)(tp, flag);
+ err |= ttyclose(tp);
+ ptcwakeup(tp, FREAD|FWRITE);
+ return (err);
+}
+
+ptsread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ struct proc *p = curproc;
+ register struct tty *tp = &pt_tty[minor(dev)];
+ register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ int error = 0;
+
+again:
+ if (pti->pt_flags & PF_REMOTE) {
+ while (isbackground(p, tp)) {
+ if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+ (p->p_sigmask & sigmask(SIGTTIN)) ||
+ p->p_pgrp->pg_jobc == 0 ||
+ p->p_flag & P_PPWAIT)
+ return (EIO);
+ pgsignal(p->p_pgrp, SIGTTIN, 1);
+ if (error = ttysleep(tp, (caddr_t)&lbolt,
+ TTIPRI | PCATCH, ttybg, 0))
+ return (error);
+ }
+ if (tp->t_canq.c_cc == 0) {
+ if (flag & IO_NDELAY)
+ return (EWOULDBLOCK);
+ if (error = ttysleep(tp, (caddr_t)&tp->t_canq,
+ TTIPRI | PCATCH, ttyin, 0))
+ return (error);
+ goto again;
+ }
+ while (tp->t_canq.c_cc > 1 && uio->uio_resid > 0)
+ if (ureadc(getc(&tp->t_canq), uio) < 0) {
+ error = EFAULT;
+ break;
+ }
+ if (tp->t_canq.c_cc == 1)
+ (void) getc(&tp->t_canq);
+ if (tp->t_canq.c_cc)
+ return (error);
+ } else
+ if (tp->t_oproc)
+ error = (*linesw[tp->t_line].l_read)(tp, uio, flag);
+ ptcwakeup(tp, FWRITE);
+ return (error);
+}
+
+/*
+ * Write to pseudo-tty.
+ * Wakeups of controlling tty will happen
+ * indirectly, when tty driver calls ptsstart.
+ */
+ptswrite(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct tty *tp;
+
+ tp = &pt_tty[minor(dev)];
+ if (tp->t_oproc == 0)
+ return (EIO);
+ return ((*linesw[tp->t_line].l_write)(tp, uio, flag));
+}
+
+/*
+ * Start output on pseudo-tty.
+ * Wake up process selecting or sleeping for input from controlling tty.
+ */
+void
+ptsstart(tp)
+ struct tty *tp;
+{
+ register struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+ if (tp->t_state & TS_TTSTOP)
+ return;
+ if (pti->pt_flags & PF_STOPPED) {
+ pti->pt_flags &= ~PF_STOPPED;
+ pti->pt_send = TIOCPKT_START;
+ }
+ ptcwakeup(tp, FREAD);
+}
+
+ptcwakeup(tp, flag)
+ struct tty *tp;
+ int flag;
+{
+ struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+ if (flag & FREAD) {
+ selwakeup(&pti->pt_selr);
+ wakeup((caddr_t)&tp->t_outq.c_cf);
+ }
+ if (flag & FWRITE) {
+ selwakeup(&pti->pt_selw);
+ wakeup((caddr_t)&tp->t_rawq.c_cf);
+ }
+}
+
+/*ARGSUSED*/
+#ifdef __STDC__
+ptcopen(dev_t dev, int flag, int devtype, struct proc *p)
+#else
+ptcopen(dev, flag, devtype, p)
+ dev_t dev;
+ int flag, devtype;
+ struct proc *p;
+#endif
+{
+ register struct tty *tp;
+ struct pt_ioctl *pti;
+
+ if (minor(dev) >= npty)
+ return (ENXIO);
+ tp = &pt_tty[minor(dev)];
+ if (tp->t_oproc)
+ return (EIO);
+ tp->t_oproc = ptsstart;
+#ifdef sun4c
+ tp->t_stop = ptsstop;
+#endif
+ (void)(*linesw[tp->t_line].l_modem)(tp, 1);
+ tp->t_lflag &= ~EXTPROC;
+ pti = &pt_ioctl[minor(dev)];
+ pti->pt_flags = 0;
+ pti->pt_send = 0;
+ pti->pt_ucntl = 0;
+ return (0);
+}
+
+ptcclose(dev)
+ dev_t dev;
+{
+ register struct tty *tp;
+
+ tp = &pt_tty[minor(dev)];
+ (void)(*linesw[tp->t_line].l_modem)(tp, 0);
+ tp->t_state &= ~TS_CARR_ON;
+ tp->t_oproc = 0; /* mark closed */
+ tp->t_session = 0;
+ return (0);
+}
+
+ptcread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ char buf[BUFSIZ];
+ int error = 0, cc;
+
+ /*
+ * We want to block until the slave
+ * is open, and there's something to read;
+ * but if we lost the slave or we're NBIO,
+ * then return the appropriate error instead.
+ */
+ for (;;) {
+ if (tp->t_state&TS_ISOPEN) {
+ if (pti->pt_flags&PF_PKT && pti->pt_send) {
+ error = ureadc((int)pti->pt_send, uio);
+ if (error)
+ return (error);
+ if (pti->pt_send & TIOCPKT_IOCTL) {
+ cc = min(uio->uio_resid,
+ sizeof(tp->t_termios));
+ uiomove(&tp->t_termios, cc, uio);
+ }
+ pti->pt_send = 0;
+ return (0);
+ }
+ if (pti->pt_flags&PF_UCNTL && pti->pt_ucntl) {
+ error = ureadc((int)pti->pt_ucntl, uio);
+ if (error)
+ return (error);
+ pti->pt_ucntl = 0;
+ return (0);
+ }
+ if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0)
+ break;
+ }
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (0); /* EOF */
+ if (flag & IO_NDELAY)
+ return (EWOULDBLOCK);
+ if (error = tsleep((caddr_t)&tp->t_outq.c_cf, TTIPRI | PCATCH,
+ ttyin, 0))
+ return (error);
+ }
+ if (pti->pt_flags & (PF_PKT|PF_UCNTL))
+ error = ureadc(0, uio);
+ while (uio->uio_resid > 0 && error == 0) {
+ cc = q_to_b(&tp->t_outq, buf, min(uio->uio_resid, BUFSIZ));
+ if (cc <= 0)
+ break;
+ error = uiomove(buf, cc, uio);
+ }
+ if (tp->t_outq.c_cc <= tp->t_lowat) {
+ if (tp->t_state&TS_ASLEEP) {
+ tp->t_state &= ~TS_ASLEEP;
+ wakeup((caddr_t)&tp->t_outq);
+ }
+ selwakeup(&tp->t_wsel);
+ }
+ return (error);
+}
+
+void
+ptsstop(tp, flush)
+ register struct tty *tp;
+ int flush;
+{
+ struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+ int flag;
+
+ /* note: FLUSHREAD and FLUSHWRITE already ok */
+ if (flush == 0) {
+ flush = TIOCPKT_STOP;
+ pti->pt_flags |= PF_STOPPED;
+ } else
+ pti->pt_flags &= ~PF_STOPPED;
+ pti->pt_send |= flush;
+ /* change of perspective */
+ flag = 0;
+ if (flush & FREAD)
+ flag |= FWRITE;
+ if (flush & FWRITE)
+ flag |= FREAD;
+ ptcwakeup(tp, flag);
+}
+
+ptcselect(dev, rw, p)
+ dev_t dev;
+ int rw;
+ struct proc *p;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ int s;
+
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (1);
+ switch (rw) {
+
+ case FREAD:
+ /*
+ * Need to block timeouts (ttrstart).
+ */
+ s = spltty();
+ if ((tp->t_state&TS_ISOPEN) &&
+ tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) {
+ splx(s);
+ return (1);
+ }
+ splx(s);
+ /* FALLTHROUGH */
+
+ case 0: /* exceptional */
+ if ((tp->t_state&TS_ISOPEN) &&
+ (pti->pt_flags&PF_PKT && pti->pt_send ||
+ pti->pt_flags&PF_UCNTL && pti->pt_ucntl))
+ return (1);
+ selrecord(p, &pti->pt_selr);
+ break;
+
+
+ case FWRITE:
+ if (tp->t_state&TS_ISOPEN) {
+ if (pti->pt_flags & PF_REMOTE) {
+ if (tp->t_canq.c_cc == 0)
+ return (1);
+ } else {
+ if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2)
+ return (1);
+ if (tp->t_canq.c_cc == 0 && (tp->t_iflag&ICANON))
+ return (1);
+ }
+ }
+ selrecord(p, &pti->pt_selw);
+ break;
+
+ }
+ return (0);
+}
+
+ptcwrite(dev, uio, flag)
+ dev_t dev;
+ register struct uio *uio;
+ int flag;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ register u_char *cp;
+ register int cc = 0;
+ u_char locbuf[BUFSIZ];
+ int cnt = 0;
+ struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ int error = 0;
+
+again:
+ if ((tp->t_state&TS_ISOPEN) == 0)
+ goto block;
+ if (pti->pt_flags & PF_REMOTE) {
+ if (tp->t_canq.c_cc)
+ goto block;
+ while (uio->uio_resid > 0 && tp->t_canq.c_cc < TTYHOG - 1) {
+ if (cc == 0) {
+ cc = min(uio->uio_resid, BUFSIZ);
+ cc = min(cc, TTYHOG - 1 - tp->t_canq.c_cc);
+ cp = locbuf;
+ error = uiomove((caddr_t)cp, cc, uio);
+ if (error)
+ return (error);
+ /* check again for safety */
+ if ((tp->t_state&TS_ISOPEN) == 0)
+ return (EIO);
+ }
+ if (cc)
+ (void) b_to_q((char *)cp, cc, &tp->t_canq);
+ cc = 0;
+ }
+ (void) putc(0, &tp->t_canq);
+ ttwakeup(tp);
+ wakeup((caddr_t)&tp->t_canq);
+ return (0);
+ }
+ while (uio->uio_resid > 0) {
+ if (cc == 0) {
+ cc = min(uio->uio_resid, BUFSIZ);
+ cp = locbuf;
+ error = uiomove((caddr_t)cp, cc, uio);
+ if (error)
+ return (error);
+ /* check again for safety */
+ if ((tp->t_state&TS_ISOPEN) == 0)
+ return (EIO);
+ }
+ while (cc > 0) {
+ if ((tp->t_rawq.c_cc + tp->t_canq.c_cc) >= TTYHOG - 2 &&
+ (tp->t_canq.c_cc > 0 || !(tp->t_iflag&ICANON))) {
+ wakeup((caddr_t)&tp->t_rawq);
+ goto block;
+ }
+ (*linesw[tp->t_line].l_rint)(*cp++, tp);
+ cnt++;
+ cc--;
+ }
+ cc = 0;
+ }
+ return (0);
+block:
+ /*
+ * Come here to wait for slave to open, for space
+ * in outq, or space in rawq.
+ */
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (EIO);
+ if (flag & IO_NDELAY) {
+ /* adjust for data copied in but not written */
+ uio->uio_resid += cc;
+ if (cnt == 0)
+ return (EWOULDBLOCK);
+ return (0);
+ }
+ if (error = tsleep((caddr_t)&tp->t_rawq.c_cf, TTOPRI | PCATCH,
+ ttyout, 0)) {
+ /* adjust for data copied in but not written */
+ uio->uio_resid += cc;
+ return (error);
+ }
+ goto again;
+}
+
+/*ARGSUSED*/
+ptyioctl(dev, cmd, data, flag, p)
+ dev_t dev;
+ int cmd;
+ caddr_t data;
+ int flag;
+ struct proc *p;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ register u_char *cc = tp->t_cc;
+ int stop, error;
+
+ /*
+ * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG.
+ * ttywflush(tp) will hang if there are characters in the outq.
+ */
+ if (cmd == TIOCEXT) {
+ /*
+ * When the EXTPROC bit is being toggled, we need
+ * to send an TIOCPKT_IOCTL if the packet driver
+ * is turned on.
+ */
+ if (*(int *)data) {
+ if (pti->pt_flags & PF_PKT) {
+ pti->pt_send |= TIOCPKT_IOCTL;
+ ptcwakeup(tp, FREAD);
+ }
+ tp->t_lflag |= EXTPROC;
+ } else {
+ if ((tp->t_state & EXTPROC) &&
+ (pti->pt_flags & PF_PKT)) {
+ pti->pt_send |= TIOCPKT_IOCTL;
+ ptcwakeup(tp, FREAD);
+ }
+ tp->t_lflag &= ~EXTPROC;
+ }
+ return(0);
+ } else
+ if (cdevsw[major(dev)].d_open == ptcopen)
+ switch (cmd) {
+
+ case TIOCGPGRP:
+ /*
+ * We aviod calling ttioctl on the controller since,
+ * in that case, tp must be the controlling terminal.
+ */
+ *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0;
+ return (0);
+
+ case TIOCPKT:
+ if (*(int *)data) {
+ if (pti->pt_flags & PF_UCNTL)
+ return (EINVAL);
+ pti->pt_flags |= PF_PKT;
+ } else
+ pti->pt_flags &= ~PF_PKT;
+ return (0);
+
+ case TIOCUCNTL:
+ if (*(int *)data) {
+ if (pti->pt_flags & PF_PKT)
+ return (EINVAL);
+ pti->pt_flags |= PF_UCNTL;
+ } else
+ pti->pt_flags &= ~PF_UCNTL;
+ return (0);
+
+ case TIOCREMOTE:
+ if (*(int *)data)
+ pti->pt_flags |= PF_REMOTE;
+ else
+ pti->pt_flags &= ~PF_REMOTE;
+ ttyflush(tp, FREAD|FWRITE);
+ return (0);
+
+#ifdef COMPAT_43
+ case TIOCSETP:
+ case TIOCSETN:
+#endif
+ case TIOCSETD:
+ case TIOCSETA:
+ case TIOCSETAW:
+ case TIOCSETAF:
+ ndflush(&tp->t_outq, tp->t_outq.c_cc);
+ break;
+
+ case TIOCSIG:
+ if (*(unsigned int *)data >= NSIG)
+ return(EINVAL);
+ if ((tp->t_lflag&NOFLSH) == 0)
+ ttyflush(tp, FREAD|FWRITE);
+ pgsignal(tp->t_pgrp, *(unsigned int *)data, 1);
+ if ((*(unsigned int *)data == SIGINFO) &&
+ ((tp->t_lflag&NOKERNINFO) == 0))
+ ttyinfo(tp);
+ return(0);
+ }
+ error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p);
+ if (error < 0)
+ error = ttioctl(tp, cmd, data, flag);
+ if (error < 0) {
+ if (pti->pt_flags & PF_UCNTL &&
+ (cmd & ~0xff) == UIOCCMD(0)) {
+ if (cmd & 0xff) {
+ pti->pt_ucntl = (u_char)cmd;
+ ptcwakeup(tp, FREAD);
+ }
+ return (0);
+ }
+ error = ENOTTY;
+ }
+ /*
+ * If external processing and packet mode send ioctl packet.
+ */
+ if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) {
+ switch(cmd) {
+ case TIOCSETA:
+ case TIOCSETAW:
+ case TIOCSETAF:
+#ifdef COMPAT_43
+ case TIOCSETP:
+ case TIOCSETN:
+#endif
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ case TIOCSETC:
+ case TIOCSLTC:
+ case TIOCLBIS:
+ case TIOCLBIC:
+ case TIOCLSET:
+#endif
+ pti->pt_send |= TIOCPKT_IOCTL;
+ ptcwakeup(tp, FREAD);
+ default:
+ break;
+ }
+ }
+ stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s'))
+ && CCEQ(cc[VSTART], CTRL('q'));
+ if (pti->pt_flags & PF_NOSTOP) {
+ if (stop) {
+ pti->pt_send &= ~TIOCPKT_NOSTOP;
+ pti->pt_send |= TIOCPKT_DOSTOP;
+ pti->pt_flags &= ~PF_NOSTOP;
+ ptcwakeup(tp, FREAD);
+ }
+ } else {
+ if (!stop) {
+ pti->pt_send &= ~TIOCPKT_DOSTOP;
+ pti->pt_send |= TIOCPKT_NOSTOP;
+ pti->pt_flags |= PF_NOSTOP;
+ ptcwakeup(tp, FREAD);
+ }
+ }
+ return (error);
+}
diff --git a/sys/kern/tty_subr.c b/sys/kern/tty_subr.c
new file mode 100644
index 0000000..fe8f000
--- /dev/null
+++ b/sys/kern/tty_subr.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)tty_subr.c 8.2 (Berkeley) 9/5/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+char cwaiting;
+struct cblock *cfree, *cfreelist;
+int cfreecount, nclist;
+
+void
+clist_init()
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+getc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+q_to_b(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+ndqb(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+void
+ndflush(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+putc(a1, a2)
+ char a1;
+ struct clist *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+b_to_q(a1, a2, a3)
+ char *a1;
+ int a2;
+ struct clist *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+char *
+nextc(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char *)0);
+}
+
+unputc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+void
+catq(a1, a2)
+ struct clist *a1, *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
diff --git a/sys/kern/tty_tb.c b/sys/kern/tty_tb.c
new file mode 100644
index 0000000..242301a
--- /dev/null
+++ b/sys/kern/tty_tb.c
@@ -0,0 +1,366 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_tb.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include "tb.h"
+#if NTB > 0
+
+/*
+ * Line discipline for RS232 tablets;
+ * supplies binary coordinate data.
+ */
+#include <sys/param.h>
+#include <sys/tablet.h>
+#include <sys/tty.h>
+
+/*
+ * Tablet configuration table.
+ */
+struct tbconf {
+ short tbc_recsize; /* input record size in bytes */
+ short tbc_uiosize; /* size of data record returned user */
+ int tbc_sync; /* mask for finding sync byte/bit */
+ int (*tbc_decode)();/* decoding routine */
+ char *tbc_run; /* enter run mode sequence */
+ char *tbc_point; /* enter point mode sequence */
+ char *tbc_stop; /* stop sequence */
+ char *tbc_start; /* start/restart sequence */
+ int tbc_flags;
+#define TBF_POL 0x1 /* polhemus hack */
+#define TBF_INPROX 0x2 /* tablet has proximity info */
+};
+
+static int tbdecode(), gtcodecode(), poldecode();
+static int tblresdecode(), tbhresdecode();
+
+struct tbconf tbconf[TBTYPE] = {
+{ 0 },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "6", "4" },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "\1CN", "\1RT", "\2", "\4" },
+{ 8, sizeof (struct gtcopos), 0200, gtcodecode },
+{17, sizeof (struct polpos), 0200, poldecode, 0, 0, "\21", "\5\22\2\23",
+ TBF_POL },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CN", "\1PT", "\2", "\4",
+ TBF_INPROX },
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CN", "\1PT", "\2", "\4",
+ TBF_INPROX },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CL\33", "\1PT\33", 0, 0},
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CL\33", "\1PT\33", 0, 0},
+};
+
+/*
+ * Tablet state
+ */
+struct tb {
+ int tbflags; /* mode & type bits */
+#define TBMAXREC 17 /* max input record size */
+ char cbuf[TBMAXREC]; /* input buffer */
+ union {
+ struct tbpos tbpos;
+ struct gtcopos gtcopos;
+ struct polpos polpos;
+ } rets; /* processed state */
+#define NTBS 16
+} tb[NTBS];
+
+/*
+ * Open as tablet discipline; called on discipline change.
+ */
+/*ARGSUSED*/
+tbopen(dev, tp)
+ dev_t dev;
+ register struct tty *tp;
+{
+ register struct tb *tbp;
+
+ if (tp->t_line == TABLDISC)
+ return (ENODEV);
+ ttywflush(tp);
+ for (tbp = tb; tbp < &tb[NTBS]; tbp++)
+ if (tbp->tbflags == 0)
+ break;
+ if (tbp >= &tb[NTBS])
+ return (EBUSY);
+ tbp->tbflags = TBTIGER|TBPOINT; /* default */
+ tp->t_cp = tbp->cbuf;
+ tp->t_inbuf = 0;
+ bzero((caddr_t)&tbp->rets, sizeof (tbp->rets));
+ tp->T_LINEP = (caddr_t)tbp;
+ tp->t_flags |= LITOUT;
+ return (0);
+}
+
+/*
+ * Line discipline change or last device close.
+ */
+tbclose(tp)
+ register struct tty *tp;
+{
+ register int s;
+ int modebits = TBPOINT|TBSTOP;
+
+ tbioctl(tp, BIOSMODE, &modebits, 0);
+ s = spltty();
+ ((struct tb *)tp->T_LINEP)->tbflags = 0;
+ tp->t_cp = 0;
+ tp->t_inbuf = 0;
+ tp->t_rawq.c_cc = 0; /* clear queues -- paranoid */
+ tp->t_canq.c_cc = 0;
+ tp->t_line = 0; /* paranoid: avoid races */
+ splx(s);
+}
+
+/*
+ * Read from a tablet line.
+ * Characters have been buffered in a buffer and decoded.
+ */
+tbread(tp, uio)
+ register struct tty *tp;
+ struct uio *uio;
+{
+ register struct tb *tbp = (struct tb *)tp->T_LINEP;
+ register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+ int ret;
+
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (EIO);
+ ret = uiomove(&tbp->rets, tc->tbc_uiosize, uio);
+ if (tc->tbc_flags&TBF_POL)
+ tbp->rets.polpos.p_key = ' ';
+ return (ret);
+}
+
+/*
+ * Low level character input routine.
+ * Stuff the character in the buffer, and decode
+ * if all the chars are there.
+ *
+ * This routine could be expanded in-line in the receiver
+ * interrupt routine to make it run as fast as possible.
+ */
+tbinput(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register struct tb *tbp = (struct tb *)tp->T_LINEP;
+ register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+
+ if (tc->tbc_recsize == 0 || tc->tbc_decode == 0) /* paranoid? */
+ return;
+ /*
+ * Locate sync bit/byte or reset input buffer.
+ */
+ if (c&tc->tbc_sync || tp->t_inbuf == tc->tbc_recsize) {
+ tp->t_cp = tbp->cbuf;
+ tp->t_inbuf = 0;
+ }
+ *tp->t_cp++ = c&0177;
+ /*
+ * Call decode routine only if a full record has been collected.
+ */
+ if (++tp->t_inbuf == tc->tbc_recsize)
+ (*tc->tbc_decode)(tc, tbp->cbuf, &tbp->rets);
+}
+
+/*
+ * Decode GTCO 8 byte format (high res, tilt, and pressure).
+ */
+static
+gtcodecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct gtcopos *tbpos;
+{
+
+ tbpos->pressure = *cp >> 2;
+ tbpos->status = (tbpos->pressure > 16) | TBINPROX; /* half way down */
+ tbpos->xpos = (*cp++ & 03) << 14;
+ tbpos->xpos |= *cp++ << 7;
+ tbpos->xpos |= *cp++;
+ tbpos->ypos = (*cp++ & 03) << 14;
+ tbpos->ypos |= *cp++ << 7;
+ tbpos->ypos |= *cp++;
+ tbpos->xtilt = *cp++;
+ tbpos->ytilt = *cp++;
+ tbpos->scount++;
+}
+
+/*
+ * Decode old Hitachi 5 byte format (low res).
+ */
+static
+tbdecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct tbpos *tbpos;
+{
+ register char byte;
+
+ byte = *cp++;
+ tbpos->status = (byte&0100) ? TBINPROX : 0;
+ byte &= ~0100;
+ if (byte > 036)
+ tbpos->status |= 1 << ((byte-040)/2);
+ tbpos->xpos = *cp++ << 7;
+ tbpos->xpos |= *cp++;
+ if (tbpos->xpos < 256) /* tablet wraps around at 256 */
+ tbpos->status &= ~TBINPROX; /* make it out of proximity */
+ tbpos->ypos = *cp++ << 7;
+ tbpos->ypos |= *cp++;
+ tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 5-byte format (low res).
+ */
+static
+tblresdecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct tbpos *tbpos;
+{
+
+ *cp &= ~0100; /* mask sync bit */
+ tbpos->status = (*cp++ >> 2) | TBINPROX;
+ if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+ tbpos->status &= ~(020|TBINPROX);
+ tbpos->xpos = *cp++;
+ tbpos->xpos |= *cp++ << 6;
+ tbpos->ypos = *cp++;
+ tbpos->ypos |= *cp++ << 6;
+ tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 6-byte format (high res).
+ */
+static
+tbhresdecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct tbpos *tbpos;
+{
+ char byte;
+
+ byte = *cp++;
+ tbpos->xpos = (byte & 03) << 14;
+ tbpos->xpos |= *cp++ << 7;
+ tbpos->xpos |= *cp++;
+ tbpos->ypos = *cp++ << 14;
+ tbpos->ypos |= *cp++ << 7;
+ tbpos->ypos |= *cp++;
+ tbpos->status = (byte >> 2) | TBINPROX;
+ if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+ tbpos->status &= ~(020|TBINPROX);
+ tbpos->scount++;
+}
+
+/*
+ * Polhemus decode.
+ */
+static
+poldecode(tc, cp, polpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct polpos *polpos;
+{
+
+ polpos->p_x = cp[4] | cp[3]<<7 | (cp[9] & 0x03) << 14;
+ polpos->p_y = cp[6] | cp[5]<<7 | (cp[9] & 0x0c) << 12;
+ polpos->p_z = cp[8] | cp[7]<<7 | (cp[9] & 0x30) << 10;
+ polpos->p_azi = cp[11] | cp[10]<<7 | (cp[16] & 0x03) << 14;
+ polpos->p_pit = cp[13] | cp[12]<<7 | (cp[16] & 0x0c) << 12;
+ polpos->p_rol = cp[15] | cp[14]<<7 | (cp[16] & 0x30) << 10;
+ polpos->p_stat = cp[1] | cp[0]<<7;
+ if (cp[2] != ' ')
+ polpos->p_key = cp[2];
+}
+
+/*ARGSUSED*/
+tbioctl(tp, cmd, data, flag)
+ struct tty *tp;
+ caddr_t data;
+{
+ register struct tb *tbp = (struct tb *)tp->T_LINEP;
+
+ switch (cmd) {
+
+ case BIOGMODE:
+ *(int *)data = tbp->tbflags & TBMODE;
+ break;
+
+ case BIOSTYPE:
+ if (tbconf[*(int *)data & TBTYPE].tbc_recsize == 0 ||
+ tbconf[*(int *)data & TBTYPE].tbc_decode == 0)
+ return (EINVAL);
+ tbp->tbflags &= ~TBTYPE;
+ tbp->tbflags |= *(int *)data & TBTYPE;
+ /* fall thru... to set mode bits */
+
+ case BIOSMODE: {
+ register struct tbconf *tc;
+
+ tbp->tbflags &= ~TBMODE;
+ tbp->tbflags |= *(int *)data & TBMODE;
+ tc = &tbconf[tbp->tbflags & TBTYPE];
+ if (tbp->tbflags&TBSTOP) {
+ if (tc->tbc_stop)
+ ttyout(tc->tbc_stop, tp);
+ } else if (tc->tbc_start)
+ ttyout(tc->tbc_start, tp);
+ if (tbp->tbflags&TBPOINT) {
+ if (tc->tbc_point)
+ ttyout(tc->tbc_point, tp);
+ } else if (tc->tbc_run)
+ ttyout(tc->tbc_run, tp);
+ ttstart(tp);
+ break;
+ }
+
+ case BIOGTYPE:
+ *(int *)data = tbp->tbflags & TBTYPE;
+ break;
+
+ case TIOCSETD:
+ case TIOCGETD:
+ case TIOCGETP:
+ case TIOCGETC:
+ return (-1); /* pass thru... */
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
+#endif
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
new file mode 100644
index 0000000..964fc6f
--- /dev/null
+++ b/sys/kern/tty_tty.c
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_tty.c 8.2 (Berkeley) 9/23/93
+ */
+
+/*
+ * Indirect driver for controlling tty.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+/*ARGSUSED*/
+cttyopen(dev, flag, mode, p)
+ dev_t dev;
+ int flag, mode;
+ struct proc *p;
+{
+ struct vnode *ttyvp = cttyvp(p);
+ int error;
+
+ if (ttyvp == NULL)
+ return (ENXIO);
+ VOP_LOCK(ttyvp);
+#ifdef PARANOID
+ /*
+ * Since group is tty and mode is 620 on most terminal lines
+ * and since sessions protect terminals from processes outside
+ * your session, this check is probably no longer necessary.
+ * Since it inhibits setuid root programs that later switch
+ * to another user from accessing /dev/tty, we have decided
+ * to delete this test. (mckusick 5/93)
+ */
+ error = VOP_ACCESS(ttyvp,
+ (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), p->p_ucred, p);
+ if (!error)
+#endif /* PARANOID */
+ error = VOP_OPEN(ttyvp, flag, NOCRED, p);
+ VOP_UNLOCK(ttyvp);
+ return (error);
+}
+
+/*ARGSUSED*/
+cttyread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct vnode *ttyvp = cttyvp(uio->uio_procp);
+ int error;
+
+ if (ttyvp == NULL)
+ return (EIO);
+ VOP_LOCK(ttyvp);
+ error = VOP_READ(ttyvp, uio, flag, NOCRED);
+ VOP_UNLOCK(ttyvp);
+ return (error);
+}
+
+/*ARGSUSED*/
+cttywrite(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct vnode *ttyvp = cttyvp(uio->uio_procp);
+ int error;
+
+ if (ttyvp == NULL)
+ return (EIO);
+ VOP_LOCK(ttyvp);
+ error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
+ VOP_UNLOCK(ttyvp);
+ return (error);
+}
+
+/*ARGSUSED*/
+cttyioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ int cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ struct vnode *ttyvp = cttyvp(p);
+
+ if (ttyvp == NULL)
+ return (EIO);
+ if (cmd == TIOCNOTTY) {
+ if (!SESS_LEADER(p)) {
+ p->p_flag &= ~P_CONTROLT;
+ return (0);
+ } else
+ return (EINVAL);
+ }
+ return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p));
+}
+
+/*ARGSUSED*/
+cttyselect(dev, flag, p)
+ dev_t dev;
+ int flag;
+ struct proc *p;
+{
+ struct vnode *ttyvp = cttyvp(p);
+
+ if (ttyvp == NULL)
+ return (1); /* try operation to get EOF/failure */
+ return (VOP_SELECT(ttyvp, flag, FREAD|FWRITE, NOCRED, p));
+}
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
new file mode 100644
index 0000000..8834dbf
--- /dev/null
+++ b/sys/kern/uipc_domain.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+void pffasttimo __P((void *));
+void pfslowtimo __P((void *));
+
+#define ADDDOMAIN(x) { \
+ extern struct domain __CONCAT(x,domain); \
+ __CONCAT(x,domain.dom_next) = domains; \
+ domains = &__CONCAT(x,domain); \
+}
+
+domaininit()
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+#undef unix
+#ifndef lint
+ ADDDOMAIN(unix);
+ ADDDOMAIN(route);
+#ifdef INET
+ ADDDOMAIN(inet);
+#endif
+#ifdef NS
+ ADDDOMAIN(ns);
+#endif
+#ifdef ISO
+ ADDDOMAIN(iso);
+#endif
+#ifdef CCITT
+ ADDDOMAIN(ccitt);
+#endif
+#include "imp.h"
+#if NIMP > 0
+ ADDDOMAIN(imp);
+#endif
+#endif
+
+ for (dp = domains; dp; dp = dp->dom_next) {
+ if (dp->dom_init)
+ (*dp->dom_init)();
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_init)
+ (*pr->pr_init)();
+ }
+
+if (max_linkhdr < 16) /* XXX */
+max_linkhdr = 16;
+ max_hdr = max_linkhdr + max_protohdr;
+ max_datalen = MHLEN - max_hdr;
+ timeout(pffasttimo, (void *)0, 1);
+ timeout(pfslowtimo, (void *)0, 1);
+}
+
+struct protosw *
+pffindtype(family, type)
+ int family, type;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ goto found;
+ return (0);
+found:
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_type && pr->pr_type == type)
+ return (pr);
+ return (0);
+}
+
+struct protosw *
+pffindproto(family, protocol, type)
+ int family, protocol, type;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+ struct protosw *maybe = 0;
+
+ if (family == 0)
+ return (0);
+ for (dp = domains; dp; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ goto found;
+ return (0);
+found:
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
+ if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
+ return (pr);
+
+ if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
+ pr->pr_protocol == 0 && maybe == (struct protosw *)0)
+ maybe = pr;
+ }
+ return (maybe);
+}
+
+net_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+ int family, protocol;
+
+ /*
+ * All sysctl names at this level are nonterminal;
+ * next two components are protocol family and protocol number,
+ * then at least one addition component.
+ */
+ if (namelen < 3)
+ return (EISDIR); /* overloaded */
+ family = name[0];
+ protocol = name[1];
+
+ if (family == 0)
+ return (0);
+ for (dp = domains; dp; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ goto found;
+ return (ENOPROTOOPT);
+found:
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_protocol == protocol && pr->pr_sysctl)
+ return ((*pr->pr_sysctl)(name + 2, namelen - 2,
+ oldp, oldlenp, newp, newlen));
+ return (ENOPROTOOPT);
+}
+
+pfctlinput(cmd, sa)
+ int cmd;
+ struct sockaddr *sa;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_ctlinput)
+ (*pr->pr_ctlinput)(cmd, sa, (caddr_t)0);
+}
+
+void
+pfslowtimo(arg)
+ void *arg;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_slowtimo)
+ (*pr->pr_slowtimo)();
+ timeout(pfslowtimo, (void *)0, hz/2);
+}
+
+void
+pffasttimo(arg)
+ void *arg;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_fasttimo)
+ (*pr->pr_fasttimo)();
+ timeout(pffasttimo, (void *)0, hz/5);
+}
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
new file mode 100644
index 0000000..b71c634
--- /dev/null
+++ b/sys/kern/uipc_mbuf.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/map.h>
+#define MBTYPES
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+
+#include <vm/vm.h>
+
+extern vm_map_t mb_map;
+struct mbuf *mbutl;
+char *mclrefcnt;
+
+mbinit()
+{
+ int s;
+
+#if CLBYTES < 4096
+#define NCL_INIT (4096/CLBYTES)
+#else
+#define NCL_INIT 1
+#endif
+ s = splimp();
+ if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
+ goto bad;
+ splx(s);
+ return;
+bad:
+ panic("mbinit");
+}
+
+/*
+ * Allocate some number of mbuf clusters
+ * and place on cluster free list.
+ * Must be called at splimp.
+ */
+/* ARGSUSED */
+m_clalloc(ncl, nowait)
+ register int ncl;
+ int nowait;
+{
+ static int logged;
+ register caddr_t p;
+ register int i;
+ int npg;
+
+ npg = ncl * CLSIZE;
+ p = (caddr_t)kmem_malloc(mb_map, ctob(npg), !nowait);
+ if (p == NULL) {
+ if (logged == 0) {
+ logged++;
+ log(LOG_ERR, "mb_map full\n");
+ }
+ return (0);
+ }
+ ncl = ncl * CLBYTES / MCLBYTES;
+ for (i = 0; i < ncl; i++) {
+ ((union mcluster *)p)->mcl_next = mclfree;
+ mclfree = (union mcluster *)p;
+ p += MCLBYTES;
+ mbstat.m_clfree++;
+ }
+ mbstat.m_clusters += ncl;
+ return (1);
+}
+
+/*
+ * When MGET failes, ask protocols to free space when short of memory,
+ * then re-attempt to allocate an mbuf.
+ */
+struct mbuf *
+m_retry(i, t)
+ int i, t;
+{
+ register struct mbuf *m;
+
+ m_reclaim();
+#define m_retry(i, t) (struct mbuf *)0
+ MGET(m, i, t);
+#undef m_retry
+ return (m);
+}
+
+/*
+ * As above; retry an MGETHDR.
+ */
+struct mbuf *
+m_retryhdr(i, t)
+ int i, t;
+{
+ register struct mbuf *m;
+
+ m_reclaim();
+#define m_retryhdr(i, t) (struct mbuf *)0
+ MGETHDR(m, i, t);
+#undef m_retryhdr
+ return (m);
+}
+
+m_reclaim()
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+ int s = splimp();
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_drain)
+ (*pr->pr_drain)();
+ splx(s);
+ mbstat.m_drain++;
+}
+
+/*
+ * Space allocation routines.
+ * These are also available as macros
+ * for critical paths.
+ */
+struct mbuf *
+m_get(nowait, type)
+ int nowait, type;
+{
+ register struct mbuf *m;
+
+ MGET(m, nowait, type);
+ return (m);
+}
+
+struct mbuf *
+m_gethdr(nowait, type)
+ int nowait, type;
+{
+ register struct mbuf *m;
+
+ MGETHDR(m, nowait, type);
+ return (m);
+}
+
+struct mbuf *
+m_getclr(nowait, type)
+ int nowait, type;
+{
+ register struct mbuf *m;
+
+ MGET(m, nowait, type);
+ if (m == 0)
+ return (0);
+ bzero(mtod(m, caddr_t), MLEN);
+ return (m);
+}
+
+struct mbuf *
+m_free(m)
+ struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ MFREE(m, n);
+ return (n);
+}
+
+void
+m_freem(m)
+ register struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ if (m == NULL)
+ return;
+ do {
+ MFREE(m, n);
+ } while (m = n);
+}
+
+/*
+ * Mbuffer utility routines.
+ */
+
+/*
+ * Lesser-used path for M_PREPEND:
+ * allocate new mbuf to prepend to chain,
+ * copy junk along.
+ */
+struct mbuf *
+m_prepend(m, len, how)
+ register struct mbuf *m;
+ int len, how;
+{
+ struct mbuf *mn;
+
+ MGET(mn, how, m->m_type);
+ if (mn == (struct mbuf *)NULL) {
+ m_freem(m);
+ return ((struct mbuf *)NULL);
+ }
+ if (m->m_flags & M_PKTHDR) {
+ M_COPY_PKTHDR(mn, m);
+ m->m_flags &= ~M_PKTHDR;
+ }
+ mn->m_next = m;
+ m = mn;
+ if (len < MHLEN)
+ MH_ALIGN(m, len);
+ m->m_len = len;
+ return (m);
+}
+
+/*
+ * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
+ * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
+ * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
+ */
+int MCFail;
+
+struct mbuf *
+m_copym(m, off0, len, wait)
+ register struct mbuf *m;
+ int off0, wait;
+ register int len;
+{
+ register struct mbuf *n, **np;
+ register int off = off0;
+ struct mbuf *top;
+ int copyhdr = 0;
+
+ if (off < 0 || len < 0)
+ panic("m_copym");
+ if (off == 0 && m->m_flags & M_PKTHDR)
+ copyhdr = 1;
+ while (off > 0) {
+ if (m == 0)
+ panic("m_copym");
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+ np = &top;
+ top = 0;
+ while (len > 0) {
+ if (m == 0) {
+ if (len != M_COPYALL)
+ panic("m_copym");
+ break;
+ }
+ MGET(n, wait, m->m_type);
+ *np = n;
+ if (n == 0)
+ goto nospace;
+ if (copyhdr) {
+ M_COPY_PKTHDR(n, m);
+ if (len == M_COPYALL)
+ n->m_pkthdr.len -= off0;
+ else
+ n->m_pkthdr.len = len;
+ copyhdr = 0;
+ }
+ n->m_len = min(len, m->m_len - off);
+ if (m->m_flags & M_EXT) {
+ n->m_data = m->m_data + off;
+ mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+ n->m_ext = m->m_ext;
+ n->m_flags |= M_EXT;
+ } else
+ bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+ (unsigned)n->m_len);
+ if (len != M_COPYALL)
+ len -= n->m_len;
+ off = 0;
+ m = m->m_next;
+ np = &n->m_next;
+ }
+ if (top == 0)
+ MCFail++;
+ return (top);
+nospace:
+ m_freem(top);
+ MCFail++;
+ return (0);
+}
+
+/*
+ * Copy data from an mbuf chain starting "off" bytes from the beginning,
+ * continuing for "len" bytes, into the indicated buffer.
+ */
+m_copydata(m, off, len, cp)
+ register struct mbuf *m;
+ register int off;
+ register int len;
+ caddr_t cp;
+{
+ register unsigned count;
+
+ if (off < 0 || len < 0)
+ panic("m_copydata");
+ while (off > 0) {
+ if (m == 0)
+ panic("m_copydata");
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+ while (len > 0) {
+ if (m == 0)
+ panic("m_copydata");
+ count = min(m->m_len - off, len);
+ bcopy(mtod(m, caddr_t) + off, cp, count);
+ len -= count;
+ cp += count;
+ off = 0;
+ m = m->m_next;
+ }
+}
+
+/*
+ * Concatenate mbuf chain n to m.
+ * Both chains must be of the same type (e.g. MT_DATA).
+ * Any m_pkthdr is not updated.
+ */
+m_cat(m, n)
+ register struct mbuf *m, *n;
+{
+ while (m->m_next)
+ m = m->m_next;
+ while (n) {
+ if (m->m_flags & M_EXT ||
+ m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+ /* just join the two chains */
+ m->m_next = n;
+ return;
+ }
+ /* splat the data from one into the other */
+ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+ (u_int)n->m_len);
+ m->m_len += n->m_len;
+ n = m_free(n);
+ }
+}
+
+m_adj(mp, req_len)
+ struct mbuf *mp;
+ int req_len;
+{
+ register int len = req_len;
+ register struct mbuf *m;
+ register count;
+
+ if ((m = mp) == NULL)
+ return;
+ if (len >= 0) {
+ /*
+ * Trim from head.
+ */
+ while (m != NULL && len > 0) {
+ if (m->m_len <= len) {
+ len -= m->m_len;
+ m->m_len = 0;
+ m = m->m_next;
+ } else {
+ m->m_len -= len;
+ m->m_data += len;
+ len = 0;
+ }
+ }
+ m = mp;
+ if (mp->m_flags & M_PKTHDR)
+ m->m_pkthdr.len -= (req_len - len);
+ } else {
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ len = -len;
+ count = 0;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len >= len) {
+ m->m_len -= len;
+ if (mp->m_flags & M_PKTHDR)
+ mp->m_pkthdr.len -= len;
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ m = mp;
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len = count;
+ for (; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+ }
+}
+
+/*
+ * Rearange an mbuf chain so that len bytes are contiguous
+ * and in the data area of an mbuf (so that mtod and dtom
+ * will work for a structure of size len). Returns the resulting
+ * mbuf chain on success, frees it and returns null on failure.
+ * If there is room, it will add up to max_protohdr-len extra bytes to the
+ * contiguous region in an attempt to avoid being called next time.
+ */
+int MPFail;
+
+struct mbuf *
+m_pullup(n, len)
+ register struct mbuf *n;
+ int len;
+{
+ register struct mbuf *m;
+ register int count;
+ int space;
+
+ /*
+ * If first mbuf has no cluster, and has room for len bytes
+ * without shifting current data, pullup into it,
+ * otherwise allocate a new mbuf to prepend to the chain.
+ */
+ if ((n->m_flags & M_EXT) == 0 &&
+ n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
+ if (n->m_len >= len)
+ return (n);
+ m = n;
+ n = n->m_next;
+ len -= m->m_len;
+ } else {
+ if (len > MHLEN)
+ goto bad;
+ MGET(m, M_DONTWAIT, n->m_type);
+ if (m == 0)
+ goto bad;
+ m->m_len = 0;
+ if (n->m_flags & M_PKTHDR) {
+ M_COPY_PKTHDR(m, n);
+ n->m_flags &= ~M_PKTHDR;
+ }
+ }
+ space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+ do {
+ count = min(min(max(len, max_protohdr), space), n->m_len);
+ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+ (unsigned)count);
+ len -= count;
+ m->m_len += count;
+ n->m_len -= count;
+ space -= count;
+ if (n->m_len)
+ n->m_data += count;
+ else
+ n = m_free(n);
+ } while (len > 0 && n);
+ if (len > 0) {
+ (void) m_free(m);
+ goto bad;
+ }
+ m->m_next = n;
+ return (m);
+bad:
+ m_freem(n);
+ MPFail++;
+ return (0);
+}
+
+/*
+ * Partition an mbuf chain in two pieces, returning the tail --
+ * all but the first len0 bytes. In case of failure, it returns NULL and
+ * attempts to restore the chain to its original state.
+ */
+struct mbuf *
+m_split(m0, len0, wait)
+ register struct mbuf *m0;
+ int len0, wait;
+{
+ register struct mbuf *m, *n;
+ unsigned len = len0, remain;
+
+ for (m = m0; m && len > m->m_len; m = m->m_next)
+ len -= m->m_len;
+ if (m == 0)
+ return (0);
+ remain = m->m_len - len;
+ if (m0->m_flags & M_PKTHDR) {
+ MGETHDR(n, wait, m0->m_type);
+ if (n == 0)
+ return (0);
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ n->m_pkthdr.len = m0->m_pkthdr.len - len0;
+ m0->m_pkthdr.len = len0;
+ if (m->m_flags & M_EXT)
+ goto extpacket;
+ if (remain > MHLEN) {
+ /* m can't be the lead packet */
+ MH_ALIGN(n, 0);
+ n->m_next = m_split(m, len, wait);
+ if (n->m_next == 0) {
+ (void) m_free(n);
+ return (0);
+ } else
+ return (n);
+ } else
+ MH_ALIGN(n, remain);
+ } else if (remain == 0) {
+ n = m->m_next;
+ m->m_next = 0;
+ return (n);
+ } else {
+ MGET(n, wait, m->m_type);
+ if (n == 0)
+ return (0);
+ M_ALIGN(n, remain);
+ }
+extpacket:
+ if (m->m_flags & M_EXT) {
+ n->m_flags |= M_EXT;
+ n->m_ext = m->m_ext;
+ mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+ m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
+ n->m_data = m->m_data + len;
+ } else {
+ bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
+ }
+ n->m_len = remain;
+ m->m_len = len;
+ n->m_next = m->m_next;
+ m->m_next = 0;
+ return (n);
+}
+/*
+ * Routine to copy from device local memory into mbufs.
+ */
+struct mbuf *
+m_devget(buf, totlen, off0, ifp, copy)
+ char *buf;
+ int totlen, off0;
+ struct ifnet *ifp;
+ void (*copy)();
+{
+ register struct mbuf *m;
+ struct mbuf *top = 0, **mp = &top;
+ register int off = off0, len;
+ register char *cp;
+ char *epkt;
+
+ cp = buf;
+ epkt = cp + totlen;
+ if (off) {
+ cp += off + 2 * sizeof(u_short);
+ totlen -= 2 * sizeof(u_short);
+ }
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == 0)
+ return (0);
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.len = totlen;
+ m->m_len = MHLEN;
+
+ while (totlen > 0) {
+ if (top) {
+ MGET(m, M_DONTWAIT, MT_DATA);
+ if (m == 0) {
+ m_freem(top);
+ return (0);
+ }
+ m->m_len = MLEN;
+ }
+ len = min(totlen, epkt - cp);
+ if (len >= MINCLSIZE) {
+ MCLGET(m, M_DONTWAIT);
+ if (m->m_flags & M_EXT)
+ m->m_len = len = min(len, MCLBYTES);
+ else
+ len = m->m_len;
+ } else {
+ /*
+ * Place initial small packet/header at end of mbuf.
+ */
+ if (len < m->m_len) {
+ if (top == 0 && len + max_linkhdr <= m->m_len)
+ m->m_data += max_linkhdr;
+ m->m_len = len;
+ } else
+ len = m->m_len;
+ }
+ if (copy)
+ copy(cp, mtod(m, caddr_t), (unsigned)len);
+ else
+ bcopy(cp, mtod(m, caddr_t), (unsigned)len);
+ cp += len;
+ *mp = m;
+ mp = &m->m_next;
+ totlen -= len;
+ if (cp == epkt)
+ cp = buf;
+ }
+ return (top);
+}
diff --git a/sys/kern/uipc_proto.c b/sys/kern/uipc_proto.c
new file mode 100644
index 0000000..da9828a
--- /dev/null
+++ b/sys/kern/uipc_proto.c
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_proto.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+/*
+ * Definitions of protocols supported in the UNIX domain.
+ */
+
+int uipc_usrreq(), raw_usrreq();
+void raw_init(),raw_input(),raw_ctlinput();
+extern struct domain unixdomain; /* or at least forward */
+
+struct protosw unixsw[] = {
+{ SOCK_STREAM, &unixdomain, 0, PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
+ 0, 0, 0, 0,
+ uipc_usrreq,
+ 0, 0, 0, 0,
+},
+{ SOCK_DGRAM, &unixdomain, 0, PR_ATOMIC|PR_ADDR|PR_RIGHTS,
+ 0, 0, 0, 0,
+ uipc_usrreq,
+ 0, 0, 0, 0,
+},
+{ 0, 0, 0, 0,
+ raw_input, 0, raw_ctlinput, 0,
+ raw_usrreq,
+ raw_init, 0, 0, 0,
+}
+};
+
+int unp_externalize(), unp_dispose();
+
+struct domain unixdomain =
+ { AF_UNIX, "unix", 0, unp_externalize, unp_dispose,
+ unixsw, &unixsw[sizeof(unixsw)/sizeof(unixsw[0])] };
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
new file mode 100644
index 0000000..ed09ee6
--- /dev/null
+++ b/sys/kern/uipc_socket.c
@@ -0,0 +1,1024 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Socket operation routines.
+ * These routines are called by the routines in
+ * sys_socket.c or from a system process, and
+ * implement the semantics of socket operations by
+ * switching out to the protocol specific routines.
+ */
+/*ARGSUSED*/
+socreate(dom, aso, type, proto)
+ int dom;
+ struct socket **aso;
+ register int type;
+ int proto;
+{
+ struct proc *p = curproc; /* XXX */
+ register struct protosw *prp;
+ register struct socket *so;
+ register int error;
+
+ if (proto)
+ prp = pffindproto(dom, proto, type);
+ else
+ prp = pffindtype(dom, type);
+ if (prp == 0 || prp->pr_usrreq == 0)
+ return (EPROTONOSUPPORT);
+ if (prp->pr_type != type)
+ return (EPROTOTYPE);
+ MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
+ bzero((caddr_t)so, sizeof(*so));
+ so->so_type = type;
+ if (p->p_ucred->cr_uid == 0)
+ so->so_state = SS_PRIV;
+ so->so_proto = prp;
+ error =
+ (*prp->pr_usrreq)(so, PRU_ATTACH,
+ (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
+ if (error) {
+ so->so_state |= SS_NOFDREF;
+ sofree(so);
+ return (error);
+ }
+ *aso = so;
+ return (0);
+}
+
+sobind(so, nam)
+ struct socket *so;
+ struct mbuf *nam;
+{
+ int s = splnet();
+ int error;
+
+ error =
+ (*so->so_proto->pr_usrreq)(so, PRU_BIND,
+ (struct mbuf *)0, nam, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+solisten(so, backlog)
+ register struct socket *so;
+ int backlog;
+{
+ int s = splnet(), error;
+
+ error =
+ (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+ if (error) {
+ splx(s);
+ return (error);
+ }
+ if (so->so_q == 0)
+ so->so_options |= SO_ACCEPTCONN;
+ if (backlog < 0)
+ backlog = 0;
+ so->so_qlimit = min(backlog, SOMAXCONN);
+ splx(s);
+ return (0);
+}
+
+sofree(so)
+ register struct socket *so;
+{
+
+ if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+ return;
+ if (so->so_head) {
+ if (!soqremque(so, 0) && !soqremque(so, 1))
+ panic("sofree dq");
+ so->so_head = 0;
+ }
+ sbrelease(&so->so_snd);
+ sorflush(so);
+ FREE(so, M_SOCKET);
+}
+
+/*
+ * Close a socket on last file table reference removal.
+ * Initiate disconnect if connected.
+ * Free socket when disconnect complete.
+ */
+soclose(so)
+ register struct socket *so;
+{
+ int s = splnet(); /* conservative */
+ int error = 0;
+
+ if (so->so_options & SO_ACCEPTCONN) {
+ while (so->so_q0)
+ (void) soabort(so->so_q0);
+ while (so->so_q)
+ (void) soabort(so->so_q);
+ }
+ if (so->so_pcb == 0)
+ goto discard;
+ if (so->so_state & SS_ISCONNECTED) {
+ if ((so->so_state & SS_ISDISCONNECTING) == 0) {
+ error = sodisconnect(so);
+ if (error)
+ goto drop;
+ }
+ if (so->so_options & SO_LINGER) {
+ if ((so->so_state & SS_ISDISCONNECTING) &&
+ (so->so_state & SS_NBIO))
+ goto drop;
+ while (so->so_state & SS_ISCONNECTED)
+ if (error = tsleep((caddr_t)&so->so_timeo,
+ PSOCK | PCATCH, netcls, so->so_linger))
+ break;
+ }
+ }
+drop:
+ if (so->so_pcb) {
+ int error2 =
+ (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+ if (error == 0)
+ error = error2;
+ }
+discard:
+ if (so->so_state & SS_NOFDREF)
+ panic("soclose: NOFDREF");
+ so->so_state |= SS_NOFDREF;
+ sofree(so);
+ splx(s);
+ return (error);
+}
+
+/*
+ * Must be called at splnet...
+ */
+soabort(so)
+ struct socket *so;
+{
+
+ return (
+ (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+}
+
+soaccept(so, nam)
+ register struct socket *so;
+ struct mbuf *nam;
+{
+ int s = splnet();
+ int error;
+
+ if ((so->so_state & SS_NOFDREF) == 0)
+ panic("soaccept: !NOFDREF");
+ so->so_state &= ~SS_NOFDREF;
+ error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
+ (struct mbuf *)0, nam, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+soconnect(so, nam)
+ register struct socket *so;
+ struct mbuf *nam;
+{
+ int s;
+ int error;
+
+ if (so->so_options & SO_ACCEPTCONN)
+ return (EOPNOTSUPP);
+ s = splnet();
+ /*
+ * If protocol is connection-based, can only connect once.
+ * Otherwise, if connected, try to disconnect first.
+ * This allows user to disconnect by connecting to, e.g.,
+ * a null address.
+ */
+ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
+ ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
+ (error = sodisconnect(so))))
+ error = EISCONN;
+ else
+ error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
+ (struct mbuf *)0, nam, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+soconnect2(so1, so2)
+ register struct socket *so1;
+ struct socket *so2;
+{
+ int s = splnet();
+ int error;
+
+ error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
+ (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+sodisconnect(so)
+ register struct socket *so;
+{
+ int s = splnet();
+ int error;
+
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ if (so->so_state & SS_ISDISCONNECTING) {
+ error = EALREADY;
+ goto bad;
+ }
+ error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+bad:
+ splx(s);
+ return (error);
+}
+
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
+/*
+ * Send on a socket.
+ * If send must go all at once and message is larger than
+ * send buffering, then hard error.
+ * Lock against other senders.
+ * If must go all at once and not enough room now, then
+ * inform user that this would block and do nothing.
+ * Otherwise, if nonblocking, send as much as possible.
+ * The data to be sent is described by "uio" if nonzero,
+ * otherwise by the mbuf chain "top" (which must be null
+ * if uio is not). Data provided in mbuf chain must be small
+ * enough to send all at once.
+ *
+ * Returns nonzero on error, timeout or signal; callers
+ * must check for short counts if EINTR/ERESTART are returned.
+ * Data and control buffers are freed on return.
+ */
+sosend(so, addr, uio, top, control, flags)
+ register struct socket *so;
+ struct mbuf *addr;
+ struct uio *uio;
+ struct mbuf *top;
+ struct mbuf *control;
+ int flags;
+{
+ struct proc *p = curproc; /* XXX */
+ struct mbuf **mp;
+ register struct mbuf *m;
+ register long space, len, resid;
+ int clen = 0, error, s, dontroute, mlen;
+ int atomic = sosendallatonce(so) || top;
+
+ if (uio)
+ resid = uio->uio_resid;
+ else
+ resid = top->m_pkthdr.len;
+ /*
+ * In theory resid should be unsigned.
+ * However, space must be signed, as it might be less than 0
+ * if we over-committed, and we must use a signed comparison
+ * of space and resid. On the other hand, a negative resid
+ * causes us to loop sending 0-length segments to the protocol.
+ */
+ if (resid < 0)
+ return (EINVAL);
+ dontroute =
+ (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
+ (so->so_proto->pr_flags & PR_ATOMIC);
+ p->p_stats->p_ru.ru_msgsnd++;
+ if (control)
+ clen = control->m_len;
+#define snderr(errno) { error = errno; splx(s); goto release; }
+
+restart:
+ if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
+ goto out;
+ do {
+ s = splnet();
+ if (so->so_state & SS_CANTSENDMORE)
+ snderr(EPIPE);
+ if (so->so_error)
+ snderr(so->so_error);
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ if ((so->so_state & SS_ISCONFIRMING) == 0 &&
+ !(resid == 0 && clen != 0))
+ snderr(ENOTCONN);
+ } else if (addr == 0)
+ snderr(EDESTADDRREQ);
+ }
+ space = sbspace(&so->so_snd);
+ if (flags & MSG_OOB)
+ space += 1024;
+ if (atomic && resid > so->so_snd.sb_hiwat ||
+ clen > so->so_snd.sb_hiwat)
+ snderr(EMSGSIZE);
+ if (space < resid + clen && uio &&
+ (atomic || space < so->so_snd.sb_lowat || space < clen)) {
+ if (so->so_state & SS_NBIO)
+ snderr(EWOULDBLOCK);
+ sbunlock(&so->so_snd);
+ error = sbwait(&so->so_snd);
+ splx(s);
+ if (error)
+ goto out;
+ goto restart;
+ }
+ splx(s);
+ mp = &top;
+ space -= clen;
+ do {
+ if (uio == NULL) {
+ /*
+ * Data is prepackaged in "top".
+ */
+ resid = 0;
+ if (flags & MSG_EOR)
+ top->m_flags |= M_EOR;
+ } else do {
+ if (top == 0) {
+ MGETHDR(m, M_WAIT, MT_DATA);
+ mlen = MHLEN;
+ m->m_pkthdr.len = 0;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ } else {
+ MGET(m, M_WAIT, MT_DATA);
+ mlen = MLEN;
+ }
+ if (resid >= MINCLSIZE && space >= MCLBYTES) {
+ MCLGET(m, M_WAIT);
+ if ((m->m_flags & M_EXT) == 0)
+ goto nopages;
+ mlen = MCLBYTES;
+#ifdef MAPPED_MBUFS
+ len = min(MCLBYTES, resid);
+#else
+ if (atomic && top == 0) {
+ len = min(MCLBYTES - max_hdr, resid);
+ m->m_data += max_hdr;
+ } else
+ len = min(MCLBYTES, resid);
+#endif
+ space -= MCLBYTES;
+ } else {
+nopages:
+ len = min(min(mlen, resid), space);
+ space -= len;
+ /*
+ * For datagram protocols, leave room
+ * for protocol headers in first mbuf.
+ */
+ if (atomic && top == 0 && len < mlen)
+ MH_ALIGN(m, len);
+ }
+ error = uiomove(mtod(m, caddr_t), (int)len, uio);
+ resid = uio->uio_resid;
+ m->m_len = len;
+ *mp = m;
+ top->m_pkthdr.len += len;
+ if (error)
+ goto release;
+ mp = &m->m_next;
+ if (resid <= 0) {
+ if (flags & MSG_EOR)
+ top->m_flags |= M_EOR;
+ break;
+ }
+ } while (space > 0 && atomic);
+ if (dontroute)
+ so->so_options |= SO_DONTROUTE;
+ s = splnet(); /* XXX */
+ error = (*so->so_proto->pr_usrreq)(so,
+ (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
+ top, addr, control);
+ splx(s);
+ if (dontroute)
+ so->so_options &= ~SO_DONTROUTE;
+ clen = 0;
+ control = 0;
+ top = 0;
+ mp = &top;
+ if (error)
+ goto release;
+ } while (resid && space > 0);
+ } while (resid);
+
+release:
+ sbunlock(&so->so_snd);
+out:
+ if (top)
+ m_freem(top);
+ if (control)
+ m_freem(control);
+ return (error);
+}
+
+/*
+ * Implement receive operations on a socket.
+ * We depend on the way that records are added to the sockbuf
+ * by sbappend*. In particular, each record (mbufs linked through m_next)
+ * must begin with an address if the protocol so specifies,
+ * followed by an optional mbuf or mbufs containing ancillary data,
+ * and then zero or more mbufs of data.
+ * In order to avoid blocking network interrupts for the entire time here,
+ * we splx() while doing the actual copy to user space.
+ * Although the sockbuf is locked, new data may still be appended,
+ * and thus we must maintain consistency of the sockbuf during that time.
+ *
+ * The caller may receive the data as a single mbuf chain by supplying
+ * an mbuf **mp0 for use in returning the chain. The uio is then used
+ * only for the count in uio_resid.
+ */
+soreceive(so, paddr, uio, mp0, controlp, flagsp)
+ register struct socket *so;
+ struct mbuf **paddr;
+ struct uio *uio;
+ struct mbuf **mp0;
+ struct mbuf **controlp;
+ int *flagsp;
+{
+ register struct mbuf *m, **mp;
+ register int flags, len, error, s, offset;
+ struct protosw *pr = so->so_proto;
+ struct mbuf *nextrecord;
+ int moff, type;
+ int orig_resid = uio->uio_resid;
+
+ mp = mp0;
+ if (paddr)
+ *paddr = 0;
+ if (controlp)
+ *controlp = 0;
+ if (flagsp)
+ flags = *flagsp &~ MSG_EOR;
+ else
+ flags = 0;
+ if (flags & MSG_OOB) {
+ m = m_get(M_WAIT, MT_DATA);
+ error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
+ m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
+ if (error)
+ goto bad;
+ do {
+ error = uiomove(mtod(m, caddr_t),
+ (int) min(uio->uio_resid, m->m_len), uio);
+ m = m_free(m);
+ } while (uio->uio_resid && error == 0 && m);
+bad:
+ if (m)
+ m_freem(m);
+ return (error);
+ }
+ if (mp)
+ *mp = (struct mbuf *)0;
+ if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
+ (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+ (struct mbuf *)0, (struct mbuf *)0);
+
+restart:
+ if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
+ return (error);
+ s = splnet();
+
+ m = so->so_rcv.sb_mb;
+ /*
+ * If we have less data than requested, block awaiting more
+ * (subject to any timeout) if:
+ * 1. the current count is less than the low water mark, or
+ * 2. MSG_WAITALL is set, and it is possible to do the entire
+ * receive operation at once if we block (resid <= hiwat).
+ * 3. MSG_DONTWAIT is not set
+ * If MSG_WAITALL is set but resid is larger than the receive buffer,
+ * we have to do the receive in sections, and thus risk returning
+ * a short count if a timeout or signal occurs after we start.
+ */
+ if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
+ so->so_rcv.sb_cc < uio->uio_resid) &&
+ (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+ ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
+ m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
+#ifdef DIAGNOSTIC
+ if (m == 0 && so->so_rcv.sb_cc)
+ panic("receive 1");
+#endif
+ if (so->so_error) {
+ if (m)
+ goto dontblock;
+ error = so->so_error;
+ if ((flags & MSG_PEEK) == 0)
+ so->so_error = 0;
+ goto release;
+ }
+ if (so->so_state & SS_CANTRCVMORE) {
+ if (m)
+ goto dontblock;
+ else
+ goto release;
+ }
+ for (; m; m = m->m_next)
+ if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
+ m = so->so_rcv.sb_mb;
+ goto dontblock;
+ }
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ error = ENOTCONN;
+ goto release;
+ }
+ if (uio->uio_resid == 0)
+ goto release;
+ if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
+ error = EWOULDBLOCK;
+ goto release;
+ }
+ sbunlock(&so->so_rcv);
+ error = sbwait(&so->so_rcv);
+ splx(s);
+ if (error)
+ return (error);
+ goto restart;
+ }
+dontblock:
+ if (uio->uio_procp)
+ uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
+ nextrecord = m->m_nextpkt;
+ if (pr->pr_flags & PR_ADDR) {
+#ifdef DIAGNOSTIC
+ if (m->m_type != MT_SONAME)
+ panic("receive 1a");
+#endif
+ orig_resid = 0;
+ if (flags & MSG_PEEK) {
+ if (paddr)
+ *paddr = m_copy(m, 0, m->m_len);
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ if (paddr) {
+ *paddr = m;
+ so->so_rcv.sb_mb = m->m_next;
+ m->m_next = 0;
+ m = so->so_rcv.sb_mb;
+ } else {
+ MFREE(m, so->so_rcv.sb_mb);
+ m = so->so_rcv.sb_mb;
+ }
+ }
+ }
+ while (m && m->m_type == MT_CONTROL && error == 0) {
+ if (flags & MSG_PEEK) {
+ if (controlp)
+ *controlp = m_copy(m, 0, m->m_len);
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ if (controlp) {
+ if (pr->pr_domain->dom_externalize &&
+ mtod(m, struct cmsghdr *)->cmsg_type ==
+ SCM_RIGHTS)
+ error = (*pr->pr_domain->dom_externalize)(m);
+ *controlp = m;
+ so->so_rcv.sb_mb = m->m_next;
+ m->m_next = 0;
+ m = so->so_rcv.sb_mb;
+ } else {
+ MFREE(m, so->so_rcv.sb_mb);
+ m = so->so_rcv.sb_mb;
+ }
+ }
+ if (controlp) {
+ orig_resid = 0;
+ controlp = &(*controlp)->m_next;
+ }
+ }
+ if (m) {
+ if ((flags & MSG_PEEK) == 0)
+ m->m_nextpkt = nextrecord;
+ type = m->m_type;
+ if (type == MT_OOBDATA)
+ flags |= MSG_OOB;
+ }
+ moff = 0;
+ offset = 0;
+ while (m && uio->uio_resid > 0 && error == 0) {
+ if (m->m_type == MT_OOBDATA) {
+ if (type != MT_OOBDATA)
+ break;
+ } else if (type == MT_OOBDATA)
+ break;
+#ifdef DIAGNOSTIC
+ else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
+ panic("receive 3");
+#endif
+ so->so_state &= ~SS_RCVATMARK;
+ len = uio->uio_resid;
+ if (so->so_oobmark && len > so->so_oobmark - offset)
+ len = so->so_oobmark - offset;
+ if (len > m->m_len - moff)
+ len = m->m_len - moff;
+ /*
+ * If mp is set, just pass back the mbufs.
+ * Otherwise copy them out via the uio, then free.
+ * Sockbuf must be consistent here (points to current mbuf,
+ * it points to next record) when we drop priority;
+ * we must note any additions to the sockbuf when we
+ * block interrupts again.
+ */
+ if (mp == 0) {
+ splx(s);
+ error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
+ s = splnet();
+ } else
+ uio->uio_resid -= len;
+ if (len == m->m_len - moff) {
+ if (m->m_flags & M_EOR)
+ flags |= MSG_EOR;
+ if (flags & MSG_PEEK) {
+ m = m->m_next;
+ moff = 0;
+ } else {
+ nextrecord = m->m_nextpkt;
+ sbfree(&so->so_rcv, m);
+ if (mp) {
+ *mp = m;
+ mp = &m->m_next;
+ so->so_rcv.sb_mb = m = m->m_next;
+ *mp = (struct mbuf *)0;
+ } else {
+ MFREE(m, so->so_rcv.sb_mb);
+ m = so->so_rcv.sb_mb;
+ }
+ if (m)
+ m->m_nextpkt = nextrecord;
+ }
+ } else {
+ if (flags & MSG_PEEK)
+ moff += len;
+ else {
+ if (mp)
+ *mp = m_copym(m, 0, len, M_WAIT);
+ m->m_data += len;
+ m->m_len -= len;
+ so->so_rcv.sb_cc -= len;
+ }
+ }
+ if (so->so_oobmark) {
+ if ((flags & MSG_PEEK) == 0) {
+ so->so_oobmark -= len;
+ if (so->so_oobmark == 0) {
+ so->so_state |= SS_RCVATMARK;
+ break;
+ }
+ } else {
+ offset += len;
+ if (offset == so->so_oobmark)
+ break;
+ }
+ }
+ if (flags & MSG_EOR)
+ break;
+ /*
+ * If the MSG_WAITALL flag is set (for non-atomic socket),
+ * we must not quit until "uio->uio_resid == 0" or an error
+ * termination. If a signal/timeout occurs, return
+ * with a short count but without error.
+ * Keep sockbuf locked against other readers.
+ */
+ while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
+ !sosendallatonce(so) && !nextrecord) {
+ if (so->so_error || so->so_state & SS_CANTRCVMORE)
+ break;
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ sbunlock(&so->so_rcv);
+ splx(s);
+ return (0);
+ }
+ if (m = so->so_rcv.sb_mb)
+ nextrecord = m->m_nextpkt;
+ }
+ }
+
+ if (m && pr->pr_flags & PR_ATOMIC) {
+ flags |= MSG_TRUNC;
+ if ((flags & MSG_PEEK) == 0)
+ (void) sbdroprecord(&so->so_rcv);
+ }
+ if ((flags & MSG_PEEK) == 0) {
+ if (m == 0)
+ so->so_rcv.sb_mb = nextrecord;
+ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+ (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+ (struct mbuf *)flags, (struct mbuf *)0,
+ (struct mbuf *)0);
+ }
+ if (orig_resid == uio->uio_resid && orig_resid &&
+ (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
+ sbunlock(&so->so_rcv);
+ splx(s);
+ goto restart;
+ }
+
+ if (flagsp)
+ *flagsp |= flags;
+release:
+ sbunlock(&so->so_rcv);
+ splx(s);
+ return (error);
+}
+
+soshutdown(so, how)
+ register struct socket *so;
+ register int how;
+{
+ register struct protosw *pr = so->so_proto;
+
+ how++;
+ if (how & FREAD)
+ sorflush(so);
+ if (how & FWRITE)
+ return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+ return (0);
+}
+
+sorflush(so)
+ register struct socket *so;
+{
+ register struct sockbuf *sb = &so->so_rcv;
+ register struct protosw *pr = so->so_proto;
+ register int s;
+ struct sockbuf asb;
+
+ sb->sb_flags |= SB_NOINTR;
+ (void) sblock(sb, M_WAITOK);
+ s = splimp();
+ socantrcvmore(so);
+ sbunlock(sb);
+ asb = *sb;
+ bzero((caddr_t)sb, sizeof (*sb));
+ splx(s);
+ if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
+ (*pr->pr_domain->dom_dispose)(asb.sb_mb);
+ sbrelease(&asb);
+}
+
+sosetopt(so, level, optname, m0)
+ register struct socket *so;
+ int level, optname;
+ struct mbuf *m0;
+{
+ int error = 0;
+ register struct mbuf *m = m0;
+
+ if (level != SOL_SOCKET) {
+ if (so->so_proto && so->so_proto->pr_ctloutput)
+ return ((*so->so_proto->pr_ctloutput)
+ (PRCO_SETOPT, so, level, optname, &m0));
+ error = ENOPROTOOPT;
+ } else {
+ switch (optname) {
+
+ case SO_LINGER:
+ if (m == NULL || m->m_len != sizeof (struct linger)) {
+ error = EINVAL;
+ goto bad;
+ }
+ so->so_linger = mtod(m, struct linger *)->l_linger;
+ /* fall thru... */
+
+ case SO_DEBUG:
+ case SO_KEEPALIVE:
+ case SO_DONTROUTE:
+ case SO_USELOOPBACK:
+ case SO_BROADCAST:
+ case SO_REUSEADDR:
+ case SO_REUSEPORT:
+ case SO_OOBINLINE:
+ if (m == NULL || m->m_len < sizeof (int)) {
+ error = EINVAL;
+ goto bad;
+ }
+ if (*mtod(m, int *))
+ so->so_options |= optname;
+ else
+ so->so_options &= ~optname;
+ break;
+
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ case SO_SNDLOWAT:
+ case SO_RCVLOWAT:
+ if (m == NULL || m->m_len < sizeof (int)) {
+ error = EINVAL;
+ goto bad;
+ }
+ switch (optname) {
+
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ if (sbreserve(optname == SO_SNDBUF ?
+ &so->so_snd : &so->so_rcv,
+ (u_long) *mtod(m, int *)) == 0) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ break;
+
+ case SO_SNDLOWAT:
+ so->so_snd.sb_lowat = *mtod(m, int *);
+ break;
+ case SO_RCVLOWAT:
+ so->so_rcv.sb_lowat = *mtod(m, int *);
+ break;
+ }
+ break;
+
+ case SO_SNDTIMEO:
+ case SO_RCVTIMEO:
+ {
+ struct timeval *tv;
+ short val;
+
+ if (m == NULL || m->m_len < sizeof (*tv)) {
+ error = EINVAL;
+ goto bad;
+ }
+ tv = mtod(m, struct timeval *);
+ if (tv->tv_sec > SHRT_MAX / hz - hz) {
+ error = EDOM;
+ goto bad;
+ }
+ val = tv->tv_sec * hz + tv->tv_usec / tick;
+
+ switch (optname) {
+
+ case SO_SNDTIMEO:
+ so->so_snd.sb_timeo = val;
+ break;
+ case SO_RCVTIMEO:
+ so->so_rcv.sb_timeo = val;
+ break;
+ }
+ break;
+ }
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
+ (void) ((*so->so_proto->pr_ctloutput)
+ (PRCO_SETOPT, so, level, optname, &m0));
+ m = NULL; /* freed by protocol */
+ }
+ }
+bad:
+ if (m)
+ (void) m_free(m);
+ return (error);
+}
+
+sogetopt(so, level, optname, mp)
+ register struct socket *so;
+ int level, optname;
+ struct mbuf **mp;
+{
+ register struct mbuf *m;
+
+ if (level != SOL_SOCKET) {
+ if (so->so_proto && so->so_proto->pr_ctloutput) {
+ return ((*so->so_proto->pr_ctloutput)
+ (PRCO_GETOPT, so, level, optname, mp));
+ } else
+ return (ENOPROTOOPT);
+ } else {
+ m = m_get(M_WAIT, MT_SOOPTS);
+ m->m_len = sizeof (int);
+
+ switch (optname) {
+
+ case SO_LINGER:
+ m->m_len = sizeof (struct linger);
+ mtod(m, struct linger *)->l_onoff =
+ so->so_options & SO_LINGER;
+ mtod(m, struct linger *)->l_linger = so->so_linger;
+ break;
+
+ case SO_USELOOPBACK:
+ case SO_DONTROUTE:
+ case SO_DEBUG:
+ case SO_KEEPALIVE:
+ case SO_REUSEADDR:
+ case SO_REUSEPORT:
+ case SO_BROADCAST:
+ case SO_OOBINLINE:
+ *mtod(m, int *) = so->so_options & optname;
+ break;
+
+ case SO_TYPE:
+ *mtod(m, int *) = so->so_type;
+ break;
+
+ case SO_ERROR:
+ *mtod(m, int *) = so->so_error;
+ so->so_error = 0;
+ break;
+
+ case SO_SNDBUF:
+ *mtod(m, int *) = so->so_snd.sb_hiwat;
+ break;
+
+ case SO_RCVBUF:
+ *mtod(m, int *) = so->so_rcv.sb_hiwat;
+ break;
+
+ case SO_SNDLOWAT:
+ *mtod(m, int *) = so->so_snd.sb_lowat;
+ break;
+
+ case SO_RCVLOWAT:
+ *mtod(m, int *) = so->so_rcv.sb_lowat;
+ break;
+
+ case SO_SNDTIMEO:
+ case SO_RCVTIMEO:
+ {
+ int val = (optname == SO_SNDTIMEO ?
+ so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
+
+ m->m_len = sizeof(struct timeval);
+ mtod(m, struct timeval *)->tv_sec = val / hz;
+ mtod(m, struct timeval *)->tv_usec =
+ (val % hz) / tick;
+ break;
+ }
+
+ default:
+ (void)m_free(m);
+ return (ENOPROTOOPT);
+ }
+ *mp = m;
+ return (0);
+ }
+}
+
+sohasoutofband(so)
+ register struct socket *so;
+{
+ struct proc *p;
+
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGURG);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGURG);
+ selwakeup(&so->so_rcv.sb_sel);
+}
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
new file mode 100644
index 0000000..d4af592
--- /dev/null
+++ b/sys/kern/uipc_socket2.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char netio[] = "netio";
+char netcon[] = "netcon";
+char netcls[] = "netcls";
+
+u_long sb_max = SB_MAX; /* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups. Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established. When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed. The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn(). When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ *
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+soisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= SS_ISCONNECTING;
+}
+
+soisconnected(so)
+ register struct socket *so;
+{
+ register struct socket *head = so->so_head;
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+ so->so_state |= SS_ISCONNECTED;
+ if (head && soqremque(so, 0)) {
+ soqinsque(head, so, 1);
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ } else {
+ wakeup((caddr_t)&so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+ }
+}
+
+soisdisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~SS_ISCONNECTING;
+ so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+soisdisconnected(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called. If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+ register struct socket *head;
+ int connstatus;
+{
+ register struct socket *so;
+ int soqueue = connstatus ? 1 : 0;
+
+ if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+ return ((struct socket *)0);
+ MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+ if (so == NULL)
+ return ((struct socket *)0);
+ bzero((caddr_t)so, sizeof(*so));
+ so->so_type = head->so_type;
+ so->so_options = head->so_options &~ SO_ACCEPTCONN;
+ so->so_linger = head->so_linger;
+ so->so_state = head->so_state | SS_NOFDREF;
+ so->so_proto = head->so_proto;
+ so->so_timeo = head->so_timeo;
+ so->so_pgid = head->so_pgid;
+ (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+ soqinsque(head, so, soqueue);
+ if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+ (void) soqremque(so, soqueue);
+ (void) free((caddr_t)so, M_SOCKET);
+ return ((struct socket *)0);
+ }
+ if (connstatus) {
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ so->so_state |= connstatus;
+ }
+ return (so);
+}
+
+soqinsque(head, so, q)
+ register struct socket *head, *so;
+ int q;
+{
+
+ register struct socket **prev;
+ so->so_head = head;
+ if (q == 0) {
+ head->so_q0len++;
+ so->so_q0 = 0;
+ for (prev = &(head->so_q0); *prev; )
+ prev = &((*prev)->so_q0);
+ } else {
+ head->so_qlen++;
+ so->so_q = 0;
+ for (prev = &(head->so_q); *prev; )
+ prev = &((*prev)->so_q);
+ }
+ *prev = so;
+}
+
+soqremque(so, q)
+ register struct socket *so;
+ int q;
+{
+ register struct socket *head, *prev, *next;
+
+ head = so->so_head;
+ prev = head;
+ for (;;) {
+ next = q ? prev->so_q : prev->so_q0;
+ if (next == so)
+ break;
+ if (next == 0)
+ return (0);
+ prev = next;
+ }
+ if (q == 0) {
+ prev->so_q0 = next->so_q0;
+ head->so_q0len--;
+ } else {
+ prev->so_q = next->so_q;
+ head->so_qlen--;
+ }
+ next->so_q0 = next->so_q = 0;
+ next->so_head = 0;
+ return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+socantsendmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTSENDMORE;
+ sowwakeup(so);
+}
+
+socantrcvmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTRCVMORE;
+ sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+sbwait(sb)
+ struct sockbuf *sb;
+{
+
+ sb->sb_flags |= SB_WAIT;
+ return (tsleep((caddr_t)&sb->sb_cc,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+ sb->sb_timeo));
+}
+
+/*
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+sb_lock(sb)
+ register struct sockbuf *sb;
+{
+ int error;
+
+ while (sb->sb_flags & SB_LOCK) {
+ sb->sb_flags |= SB_WANT;
+ if (error = tsleep((caddr_t)&sb->sb_flags,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+ netio, 0))
+ return (error);
+ }
+ sb->sb_flags |= SB_LOCK;
+ return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+sowakeup(so, sb)
+ register struct socket *so;
+ register struct sockbuf *sb;
+{
+ struct proc *p;
+
+ selwakeup(&sb->sb_sel);
+ sb->sb_flags &= ~SB_SEL;
+ if (sb->sb_flags & SB_WAIT) {
+ sb->sb_flags &= ~SB_WAIT;
+ wakeup((caddr_t)&sb->sb_cc);
+ }
+ if (so->so_state & SS_ASYNC) {
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGIO);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGIO);
+ }
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data. Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field. Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ * name, then a record containing that name must be present before
+ * any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ * just additional data associated with the message), and there are
+ * ``rights'' to be received, then a record containing this data
+ * should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ * a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve(). This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits). The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+soreserve(so, sndcc, rcvcc)
+ register struct socket *so;
+ u_long sndcc, rcvcc;
+{
+
+ if (sbreserve(&so->so_snd, sndcc) == 0)
+ goto bad;
+ if (sbreserve(&so->so_rcv, rcvcc) == 0)
+ goto bad2;
+ if (so->so_rcv.sb_lowat == 0)
+ so->so_rcv.sb_lowat = 1;
+ if (so->so_snd.sb_lowat == 0)
+ so->so_snd.sb_lowat = MCLBYTES;
+ if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+ return (0);
+bad2:
+ sbrelease(&so->so_snd);
+bad:
+ return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+sbreserve(sb, cc)
+ struct sockbuf *sb;
+ u_long cc;
+{
+
+ if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+ return (0);
+ sb->sb_hiwat = cc;
+ sb->sb_mbmax = min(cc * 2, sb_max);
+ if (sb->sb_lowat > sb->sb_hiwat)
+ sb->sb_lowat = sb->sb_hiwat;
+ return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+sbrelease(sb)
+ struct sockbuf *sb;
+{
+
+ sbflush(sb);
+ sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added. sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used. To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used. In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement. Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb. The additional space associated
+ * the mbuf chain is recorded in sb. Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+sbappend(sb, m)
+ struct sockbuf *sb;
+ struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ if (m == 0)
+ return;
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ do {
+ if (n->m_flags & M_EOR) {
+ sbappendrecord(sb, m); /* XXXXXX!!!! */
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ }
+ sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+sbcheck(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m;
+ register int len = 0, mbcnt = 0;
+
+ for (m = sb->sb_mb; m; m = m->m_next) {
+ len += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT)
+ mbcnt += m->m_ext.ext_size;
+ if (m->m_nextpkt)
+ panic("sbcheck nextpkt");
+ }
+ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+ printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+ mbcnt, sb->sb_mbcnt);
+ panic("sbcheck");
+ }
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+sbappendrecord(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+
+ if (m0 == 0)
+ return;
+ if (m = sb->sb_mb)
+ while (m->m_nextpkt)
+ m = m->m_nextpkt;
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ if (m)
+ m->m_nextpkt = m0;
+ else
+ sb->sb_mb = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+sbinsertoob(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+ register struct mbuf **mp;
+
+ if (m0 == 0)
+ return;
+ for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+ again:
+ switch (m->m_type) {
+
+ case MT_OOBDATA:
+ continue; /* WANT next train */
+
+ case MT_CONTROL:
+ if (m = m->m_next)
+ goto again; /* inspect THIS train further */
+ }
+ break;
+ }
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ m0->m_nextpkt = *mp;
+ *mp = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket. If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+sbappendaddr(sb, asa, m0, control)
+ register struct sockbuf *sb;
+ struct sockaddr *asa;
+ struct mbuf *m0, *control;
+{
+ register struct mbuf *m, *n;
+ int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+ if (m0)
+ space += m0->m_pkthdr.len;
+ for (n = control; n; n = n->m_next) {
+ space += n->m_len;
+ if (n->m_next == 0) /* keep pointer to last control buf */
+ break;
+ }
+ if (space > sbspace(sb))
+ return (0);
+ if (asa->sa_len > MLEN)
+ return (0);
+ MGET(m, M_DONTWAIT, MT_SONAME);
+ if (m == 0)
+ return (0);
+ m->m_len = asa->sa_len;
+ bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+ if (n)
+ n->m_next = m0; /* concatenate data to control */
+ else
+ control = m0;
+ m->m_next = control;
+ for (n = m; n; n = n->m_next)
+ sballoc(sb, n);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = m;
+ } else
+ sb->sb_mb = m;
+ return (1);
+}
+
+sbappendcontrol(sb, m0, control)
+ struct sockbuf *sb;
+ struct mbuf *control, *m0;
+{
+ register struct mbuf *m, *n;
+ int space = 0;
+
+ if (control == 0)
+ panic("sbappendcontrol");
+ for (m = control; ; m = m->m_next) {
+ space += m->m_len;
+ if (m->m_next == 0)
+ break;
+ }
+ n = m; /* save pointer to last control buffer */
+ for (m = m0; m; m = m->m_next)
+ space += m->m_len;
+ if (space > sbspace(sb))
+ return (0);
+ n->m_next = m0; /* concatenate data to control */
+ for (m = control; m; m = m->m_next)
+ sballoc(sb, m);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = control;
+ } else
+ sb->sb_mb = control;
+ return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n. If n
+ * is null, the buffer is presumed empty.
+ */
+sbcompress(sb, m, n)
+ register struct sockbuf *sb;
+ register struct mbuf *m, *n;
+{
+ register int eor = 0;
+ register struct mbuf *o;
+
+ while (m) {
+ eor |= m->m_flags & M_EOR;
+ if (m->m_len == 0 &&
+ (eor == 0 ||
+ (((o = m->m_next) || (o = n)) &&
+ o->m_type == m->m_type))) {
+ m = m_free(m);
+ continue;
+ }
+ if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+ (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+ n->m_type == m->m_type) {
+ bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+ (unsigned)m->m_len);
+ n->m_len += m->m_len;
+ sb->sb_cc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mb = m;
+ sballoc(sb, m);
+ n = m;
+ m->m_flags &= ~M_EOR;
+ m = m->m_next;
+ n->m_next = 0;
+ }
+ if (eor) {
+ if (n)
+ n->m_flags |= eor;
+ else
+ printf("semi-panic: sbcompress\n");
+ }
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+sbflush(sb)
+ register struct sockbuf *sb;
+{
+
+ if (sb->sb_flags & SB_LOCK)
+ panic("sbflush");
+ while (sb->sb_mbcnt)
+ sbdrop(sb, (int)sb->sb_cc);
+ if (sb->sb_cc || sb->sb_mb)
+ panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+sbdrop(sb, len)
+ register struct sockbuf *sb;
+ register int len;
+{
+ register struct mbuf *m, *mn;
+ struct mbuf *next;
+
+ next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ while (len > 0) {
+ if (m == 0) {
+ if (next == 0)
+ panic("sbdrop");
+ m = next;
+ next = m->m_nextpkt;
+ continue;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ m->m_data += len;
+ sb->sb_cc -= len;
+ break;
+ }
+ len -= m->m_len;
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ while (m && m->m_len == 0) {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ if (m) {
+ sb->sb_mb = m;
+ m->m_nextpkt = next;
+ } else
+ sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+sbdroprecord(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m, *mn;
+
+ m = sb->sb_mb;
+ if (m) {
+ sb->sb_mb = m->m_nextpkt;
+ do {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ } while (m = mn);
+ }
+}
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
new file mode 100644
index 0000000..89b7ffd
--- /dev/null
+++ b/sys/kern/uipc_syscalls.c
@@ -0,0 +1,1217 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * System call interface to the socket abstraction.
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+#define COMPAT_OLDSOCK
+#endif
+
+extern struct fileops socketops;
+
+struct socket_args {
+ int domain;
+ int type;
+ int protocol;
+};
+socket(p, uap, retval)
+ struct proc *p;
+ register struct socket_args *uap;
+ int *retval;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct socket *so;
+ struct file *fp;
+ int fd, error;
+
+ if (error = falloc(p, &fp, &fd))
+ return (error);
+ fp->f_flag = FREAD|FWRITE;
+ fp->f_type = DTYPE_SOCKET;
+ fp->f_ops = &socketops;
+ if (error = socreate(uap->domain, &so, uap->type, uap->protocol)) {
+ fdp->fd_ofiles[fd] = 0;
+ ffree(fp);
+ } else {
+ fp->f_data = (caddr_t)so;
+ *retval = fd;
+ }
+ return (error);
+}
+
+struct bind_args {
+ int s;
+ caddr_t name;
+ int namelen;
+};
+/* ARGSUSED */
+bind(p, uap, retval)
+ struct proc *p;
+ register struct bind_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *nam;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME))
+ return (error);
+ error = sobind((struct socket *)fp->f_data, nam);
+ m_freem(nam);
+ return (error);
+}
+
+struct listen_args {
+ int s;
+ int backlog;
+};
+/* ARGSUSED */
+listen(p, uap, retval)
+ struct proc *p;
+ register struct listen_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ return (solisten((struct socket *)fp->f_data, uap->backlog));
+}
+
+struct accept_args {
+ int s;
+ caddr_t name;
+ int *anamelen;
+#ifdef COMPAT_OLDSOCK
+ int compat_43; /* pseudo */
+#endif
+};
+
+#ifdef COMPAT_OLDSOCK
+accept(p, uap, retval)
+ struct proc *p;
+ struct accept_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 0;
+ return (accept1(p, uap, retval));
+}
+
+oaccept(p, uap, retval)
+ struct proc *p;
+ struct accept_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 1;
+ return (accept1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define accept1 accept
+#endif
+
+accept1(p, uap, retval)
+ struct proc *p;
+ register struct accept_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *nam;
+ int namelen, error, s;
+ register struct socket *so;
+
+ if (uap->name && (error = copyin((caddr_t)uap->anamelen,
+ (caddr_t)&namelen, sizeof (namelen))))
+ return (error);
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ s = splnet();
+ so = (struct socket *)fp->f_data;
+ if ((so->so_options & SO_ACCEPTCONN) == 0) {
+ splx(s);
+ return (EINVAL);
+ }
+ if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
+ splx(s);
+ return (EWOULDBLOCK);
+ }
+ while (so->so_qlen == 0 && so->so_error == 0) {
+ if (so->so_state & SS_CANTRCVMORE) {
+ so->so_error = ECONNABORTED;
+ break;
+ }
+ if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+ netcon, 0)) {
+ splx(s);
+ return (error);
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ return (error);
+ }
+ if (error = falloc(p, &fp, retval)) {
+ splx(s);
+ return (error);
+ }
+ { struct socket *aso = so->so_q;
+ if (soqremque(aso, 1) == 0)
+ panic("accept");
+ so = aso;
+ }
+ fp->f_type = DTYPE_SOCKET;
+ fp->f_flag = FREAD|FWRITE;
+ fp->f_ops = &socketops;
+ fp->f_data = (caddr_t)so;
+ nam = m_get(M_WAIT, MT_SONAME);
+ (void) soaccept(so, nam);
+ if (uap->name) {
+#ifdef COMPAT_OLDSOCK
+ if (uap->compat_43)
+ mtod(nam, struct osockaddr *)->sa_family =
+ mtod(nam, struct sockaddr *)->sa_family;
+#endif
+ if (namelen > nam->m_len)
+ namelen = nam->m_len;
+ /* SHOULD COPY OUT A CHAIN HERE */
+ if ((error = copyout(mtod(nam, caddr_t), (caddr_t)uap->name,
+ (u_int)namelen)) == 0)
+ error = copyout((caddr_t)&namelen,
+ (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
+ }
+ m_freem(nam);
+ splx(s);
+ return (error);
+}
+
+struct connect_args {
+ int s;
+ caddr_t name;
+ int namelen;
+};
+/* ARGSUSED */
+connect(p, uap, retval)
+ struct proc *p;
+ register struct connect_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ register struct socket *so;
+ struct mbuf *nam;
+ int error, s;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ so = (struct socket *)fp->f_data;
+ if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
+ return (EALREADY);
+ if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME))
+ return (error);
+ error = soconnect(so, nam);
+ if (error)
+ goto bad;
+ if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+ m_freem(nam);
+ return (EINPROGRESS);
+ }
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
+ if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+ netcon, 0))
+ break;
+ if (error == 0) {
+ error = so->so_error;
+ so->so_error = 0;
+ }
+ splx(s);
+bad:
+ so->so_state &= ~SS_ISCONNECTING;
+ m_freem(nam);
+ if (error == ERESTART)
+ error = EINTR;
+ return (error);
+}
+
+struct socketpair_args {
+ int domain;
+ int type;
+ int protocol;
+ int *rsv;
+};
+socketpair(p, uap, retval)
+ struct proc *p;
+ register struct socketpair_args *uap;
+ int retval[];
+{
+ register struct filedesc *fdp = p->p_fd;
+ struct file *fp1, *fp2;
+ struct socket *so1, *so2;
+ int fd, error, sv[2];
+
+ if (error = socreate(uap->domain, &so1, uap->type, uap->protocol))
+ return (error);
+ if (error = socreate(uap->domain, &so2, uap->type, uap->protocol))
+ goto free1;
+ if (error = falloc(p, &fp1, &fd))
+ goto free2;
+ sv[0] = fd;
+ fp1->f_flag = FREAD|FWRITE;
+ fp1->f_type = DTYPE_SOCKET;
+ fp1->f_ops = &socketops;
+ fp1->f_data = (caddr_t)so1;
+ if (error = falloc(p, &fp2, &fd))
+ goto free3;
+ fp2->f_flag = FREAD|FWRITE;
+ fp2->f_type = DTYPE_SOCKET;
+ fp2->f_ops = &socketops;
+ fp2->f_data = (caddr_t)so2;
+ sv[1] = fd;
+ if (error = soconnect2(so1, so2))
+ goto free4;
+ if (uap->type == SOCK_DGRAM) {
+ /*
+ * Datagram socket connection is asymmetric.
+ */
+ if (error = soconnect2(so2, so1))
+ goto free4;
+ }
+ error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
+ retval[0] = sv[0]; /* XXX ??? */
+ retval[1] = sv[1]; /* XXX ??? */
+ return (error);
+free4:
+ ffree(fp2);
+ fdp->fd_ofiles[sv[1]] = 0;
+free3:
+ ffree(fp1);
+ fdp->fd_ofiles[sv[0]] = 0;
+free2:
+ (void)soclose(so2);
+free1:
+ (void)soclose(so1);
+ return (error);
+}
+
+struct sendto_args {
+ int s;
+ caddr_t buf;
+ size_t len;
+ int flags;
+ caddr_t to;
+ int tolen;
+};
+sendto(p, uap, retval)
+ struct proc *p;
+ register struct sendto_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+
+ msg.msg_name = uap->to;
+ msg.msg_namelen = uap->tolen;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = 0;
+#ifdef COMPAT_OLDSOCK
+ msg.msg_flags = 0;
+#endif
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ return (sendit(p, uap->s, &msg, uap->flags, retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+struct osend_args {
+ int s;
+ caddr_t buf;
+ int len;
+ int flags;
+};
+osend(p, uap, retval)
+ struct proc *p;
+ register struct osend_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+
+ msg.msg_name = 0;
+ msg.msg_namelen = 0;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ msg.msg_control = 0;
+ msg.msg_flags = 0;
+ return (sendit(p, uap->s, &msg, uap->flags, retval));
+}
+
+#define MSG_COMPAT 0x8000
+struct osendmsg_args {
+ int s;
+ caddr_t msg;
+ int flags;
+};
+osendmsg(p, uap, retval)
+ struct proc *p;
+ register struct osendmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *iov;
+ int error;
+
+ if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+ if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+ goto done;
+ msg.msg_flags = MSG_COMPAT;
+ msg.msg_iov = iov;
+ error = sendit(p, uap->s, &msg, uap->flags, retval);
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+#endif
+
+struct sendmsg_args {
+ int s;
+ caddr_t msg;
+ int flags;
+};
+sendmsg(p, uap, retval)
+ struct proc *p;
+ register struct sendmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *iov;
+ int error;
+
+ if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+ if (msg.msg_iovlen &&
+ (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
+ goto done;
+ msg.msg_iov = iov;
+#ifdef COMPAT_OLDSOCK
+ msg.msg_flags = 0;
+#endif
+ error = sendit(p, uap->s, &msg, uap->flags, retval);
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+
+sendit(p, s, mp, flags, retsize)
+ register struct proc *p;
+ int s;
+ register struct msghdr *mp;
+ int flags, *retsize;
+{
+ struct file *fp;
+ struct uio auio;
+ register struct iovec *iov;
+ register int i;
+ struct mbuf *to, *control;
+ int len, error;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (error = getsock(p->p_fd, s, &fp))
+ return (error);
+ auio.uio_iov = mp->msg_iov;
+ auio.uio_iovcnt = mp->msg_iovlen;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_procp = p;
+ auio.uio_offset = 0; /* XXX */
+ auio.uio_resid = 0;
+ iov = mp->msg_iov;
+ for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+ if (iov->iov_len < 0)
+ return (EINVAL);
+ if ((auio.uio_resid += iov->iov_len) < 0)
+ return (EINVAL);
+ }
+ if (mp->msg_name) {
+ if (error = sockargs(&to, mp->msg_name, mp->msg_namelen,
+ MT_SONAME))
+ return (error);
+ } else
+ to = 0;
+ if (mp->msg_control) {
+ if (mp->msg_controllen < sizeof(struct cmsghdr)
+#ifdef COMPAT_OLDSOCK
+ && mp->msg_flags != MSG_COMPAT
+#endif
+ ) {
+ error = EINVAL;
+ goto bad;
+ }
+ if (error = sockargs(&control, mp->msg_control,
+ mp->msg_controllen, MT_CONTROL))
+ goto bad;
+#ifdef COMPAT_OLDSOCK
+ if (mp->msg_flags == MSG_COMPAT) {
+ register struct cmsghdr *cm;
+
+ M_PREPEND(control, sizeof(*cm), M_WAIT);
+ if (control == 0) {
+ error = ENOBUFS;
+ goto bad;
+ } else {
+ cm = mtod(control, struct cmsghdr *);
+ cm->cmsg_len = control->m_len;
+ cm->cmsg_level = SOL_SOCKET;
+ cm->cmsg_type = SCM_RIGHTS;
+ }
+ }
+#endif
+ } else
+ control = 0;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO)) {
+ int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ len = auio.uio_resid;
+ if (error = sosend((struct socket *)fp->f_data, to, &auio,
+ (struct mbuf *)0, control, flags)) {
+ if (auio.uio_resid != len && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ if (error == EPIPE)
+ psignal(p, SIGPIPE);
+ }
+ if (error == 0)
+ *retsize = len - auio.uio_resid;
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, s, UIO_WRITE,
+ ktriov, *retsize, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+bad:
+ if (to)
+ m_freem(to);
+ return (error);
+}
+
+struct recvfrom_args {
+ int s;
+ caddr_t buf;
+ size_t len;
+ int flags;
+ caddr_t from;
+ int *fromlenaddr;
+};
+
+#ifdef COMPAT_OLDSOCK
+orecvfrom(p, uap, retval)
+ struct proc *p;
+ struct recvfrom_args *uap;
+ int *retval;
+{
+
+ uap->flags |= MSG_COMPAT;
+ return (recvfrom(p, uap, retval));
+}
+#endif
+
+recvfrom(p, uap, retval)
+ struct proc *p;
+ register struct recvfrom_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+ int error;
+
+ if (uap->fromlenaddr) {
+ if (error = copyin((caddr_t)uap->fromlenaddr,
+ (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)))
+ return (error);
+ } else
+ msg.msg_namelen = 0;
+ msg.msg_name = uap->from;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ msg.msg_control = 0;
+ msg.msg_flags = uap->flags;
+ return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+struct orecv_args {
+ int s;
+ caddr_t buf;
+ int len;
+ int flags;
+};
+orecv(p, uap, retval)
+ struct proc *p;
+ register struct orecv_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+
+ msg.msg_name = 0;
+ msg.msg_namelen = 0;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ msg.msg_control = 0;
+ msg.msg_flags = uap->flags;
+ return (recvit(p, uap->s, &msg, (caddr_t)0, retval));
+}
+
+/*
+ * Old recvmsg. This code takes advantage of the fact that the old msghdr
+ * overlays the new one, missing only the flags, and with the (old) access
+ * rights where the control fields are now.
+ */
+struct orecvmsg_args {
+ int s;
+ struct omsghdr *msg;
+ int flags;
+};
+orecvmsg(p, uap, retval)
+ struct proc *p;
+ register struct orecvmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *iov;
+ int error;
+
+ if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
+ sizeof (struct omsghdr)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+ msg.msg_flags = uap->flags | MSG_COMPAT;
+ if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+ goto done;
+ msg.msg_iov = iov;
+ error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval);
+
+ if (msg.msg_controllen && error == 0)
+ error = copyout((caddr_t)&msg.msg_controllen,
+ (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+#endif
+
+struct recvmsg_args {
+ int s;
+ struct msghdr *msg;
+ int flags;
+};
+recvmsg(p, uap, retval)
+ struct proc *p;
+ register struct recvmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
+ register int error;
+
+ if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+#ifdef COMPAT_OLDSOCK
+ msg.msg_flags = uap->flags &~ MSG_COMPAT;
+#else
+ msg.msg_flags = uap->flags;
+#endif
+ uiov = msg.msg_iov;
+ msg.msg_iov = iov;
+ if (error = copyin((caddr_t)uiov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+ goto done;
+ if ((error = recvit(p, uap->s, &msg, (caddr_t)0, retval)) == 0) {
+ msg.msg_iov = uiov;
+ error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
+ }
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+
+recvit(p, s, mp, namelenp, retsize)
+ register struct proc *p;
+ int s;
+ register struct msghdr *mp;
+ caddr_t namelenp;
+ int *retsize;
+{
+ struct file *fp;
+ struct uio auio;
+ register struct iovec *iov;
+ register int i;
+ int len, error;
+ struct mbuf *from = 0, *control = 0;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (error = getsock(p->p_fd, s, &fp))
+ return (error);
+ auio.uio_iov = mp->msg_iov;
+ auio.uio_iovcnt = mp->msg_iovlen;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_procp = p;
+ auio.uio_offset = 0; /* XXX */
+ auio.uio_resid = 0;
+ iov = mp->msg_iov;
+ for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+ if (iov->iov_len < 0)
+ return (EINVAL);
+ if ((auio.uio_resid += iov->iov_len) < 0)
+ return (EINVAL);
+ }
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO)) {
+ int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ len = auio.uio_resid;
+ if (error = soreceive((struct socket *)fp->f_data, &from, &auio,
+ (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
+ &mp->msg_flags)) {
+ if (auio.uio_resid != len && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ }
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, s, UIO_READ,
+ ktriov, len - auio.uio_resid, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+ if (error)
+ goto out;
+ *retsize = len - auio.uio_resid;
+ if (mp->msg_name) {
+ len = mp->msg_namelen;
+ if (len <= 0 || from == 0)
+ len = 0;
+ else {
+#ifdef COMPAT_OLDSOCK
+ if (mp->msg_flags & MSG_COMPAT)
+ mtod(from, struct osockaddr *)->sa_family =
+ mtod(from, struct sockaddr *)->sa_family;
+#endif
+ if (len > from->m_len)
+ len = from->m_len;
+ /* else if len < from->m_len ??? */
+ if (error = copyout(mtod(from, caddr_t),
+ (caddr_t)mp->msg_name, (unsigned)len))
+ goto out;
+ }
+ mp->msg_namelen = len;
+ if (namelenp &&
+ (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
+#ifdef COMPAT_OLDSOCK
+ if (mp->msg_flags & MSG_COMPAT)
+ error = 0; /* old recvfrom didn't check */
+ else
+#endif
+ goto out;
+ }
+ }
+ if (mp->msg_control) {
+#ifdef COMPAT_OLDSOCK
+ /*
+ * We assume that old recvmsg calls won't receive access
+ * rights and other control info, esp. as control info
+ * is always optional and those options didn't exist in 4.3.
+ * If we receive rights, trim the cmsghdr; anything else
+ * is tossed.
+ */
+ if (control && mp->msg_flags & MSG_COMPAT) {
+ if (mtod(control, struct cmsghdr *)->cmsg_level !=
+ SOL_SOCKET ||
+ mtod(control, struct cmsghdr *)->cmsg_type !=
+ SCM_RIGHTS) {
+ mp->msg_controllen = 0;
+ goto out;
+ }
+ control->m_len -= sizeof (struct cmsghdr);
+ control->m_data += sizeof (struct cmsghdr);
+ }
+#endif
+ len = mp->msg_controllen;
+ if (len <= 0 || control == 0)
+ len = 0;
+ else {
+ if (len >= control->m_len)
+ len = control->m_len;
+ else
+ mp->msg_flags |= MSG_CTRUNC;
+ error = copyout((caddr_t)mtod(control, caddr_t),
+ (caddr_t)mp->msg_control, (unsigned)len);
+ }
+ mp->msg_controllen = len;
+ }
+out:
+ if (from)
+ m_freem(from);
+ if (control)
+ m_freem(control);
+ return (error);
+}
+
+struct shutdown_args {
+ int s;
+ int how;
+};
+/* ARGSUSED */
+shutdown(p, uap, retval)
+ struct proc *p;
+ register struct shutdown_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ return (soshutdown((struct socket *)fp->f_data, uap->how));
+}
+
+struct setsockopt_args {
+ int s;
+ int level;
+ int name;
+ caddr_t val;
+ int valsize;
+};
+/* ARGSUSED */
+setsockopt(p, uap, retval)
+ struct proc *p;
+ register struct setsockopt_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *m = NULL;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ if (uap->valsize > MLEN)
+ return (EINVAL);
+ if (uap->val) {
+ m = m_get(M_WAIT, MT_SOOPTS);
+ if (m == NULL)
+ return (ENOBUFS);
+ if (error = copyin(uap->val, mtod(m, caddr_t),
+ (u_int)uap->valsize)) {
+ (void) m_free(m);
+ return (error);
+ }
+ m->m_len = uap->valsize;
+ }
+ return (sosetopt((struct socket *)fp->f_data, uap->level,
+ uap->name, m));
+}
+
+struct getsockopt_args {
+ int s;
+ int level;
+ int name;
+ caddr_t val;
+ int *avalsize;
+};
+/* ARGSUSED */
+getsockopt(p, uap, retval)
+ struct proc *p;
+ register struct getsockopt_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *m = NULL;
+ int valsize, error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ if (uap->val) {
+ if (error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
+ sizeof (valsize)))
+ return (error);
+ } else
+ valsize = 0;
+ if ((error = sogetopt((struct socket *)fp->f_data, uap->level,
+ uap->name, &m)) == 0 && uap->val && valsize && m != NULL) {
+ if (valsize > m->m_len)
+ valsize = m->m_len;
+ error = copyout(mtod(m, caddr_t), uap->val, (u_int)valsize);
+ if (error == 0)
+ error = copyout((caddr_t)&valsize,
+ (caddr_t)uap->avalsize, sizeof (valsize));
+ }
+ if (m != NULL)
+ (void) m_free(m);
+ return (error);
+}
+
+struct pipe_args {
+ int dummy;
+};
+/* ARGSUSED */
+pipe(p, uap, retval)
+ struct proc *p;
+ struct pipe_args *uap;
+ int retval[];
+{
+ register struct filedesc *fdp = p->p_fd;
+ struct file *rf, *wf;
+ struct socket *rso, *wso;
+ int fd, error;
+
+ if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0))
+ return (error);
+ if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0))
+ goto free1;
+ if (error = falloc(p, &rf, &fd))
+ goto free2;
+ retval[0] = fd;
+ rf->f_flag = FREAD;
+ rf->f_type = DTYPE_SOCKET;
+ rf->f_ops = &socketops;
+ rf->f_data = (caddr_t)rso;
+ if (error = falloc(p, &wf, &fd))
+ goto free3;
+ wf->f_flag = FWRITE;
+ wf->f_type = DTYPE_SOCKET;
+ wf->f_ops = &socketops;
+ wf->f_data = (caddr_t)wso;
+ retval[1] = fd;
+ if (error = unp_connect2(wso, rso))
+ goto free4;
+ return (0);
+free4:
+ ffree(wf);
+ fdp->fd_ofiles[retval[1]] = 0;
+free3:
+ ffree(rf);
+ fdp->fd_ofiles[retval[0]] = 0;
+free2:
+ (void)soclose(wso);
+free1:
+ (void)soclose(rso);
+ return (error);
+}
+
+/*
+ * Get socket name.
+ */
+struct getsockname_args {
+ int fdes;
+ caddr_t asa;
+ int *alen;
+#ifdef COMPAT_OLDSOCK
+ int compat_43; /* pseudo */
+#endif
+};
+#ifdef COMPAT_OLDSOCK
+getsockname(p, uap, retval)
+ struct proc *p;
+ struct getsockname_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 0;
+ return (getsockname1(p, uap, retval));
+}
+
+ogetsockname(p, uap, retval)
+ struct proc *p;
+ struct getsockname_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 1;
+ return (getsockname1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define getsockname1 getsockname
+#endif
+
+/* ARGSUSED */
+getsockname1(p, uap, retval)
+ struct proc *p;
+ register struct getsockname_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ register struct socket *so;
+ struct mbuf *m;
+ int len, error;
+
+ if (error = getsock(p->p_fd, uap->fdes, &fp))
+ return (error);
+ if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)))
+ return (error);
+ so = (struct socket *)fp->f_data;
+ m = m_getclr(M_WAIT, MT_SONAME);
+ if (m == NULL)
+ return (ENOBUFS);
+ if (error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0))
+ goto bad;
+ if (len > m->m_len)
+ len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+ if (uap->compat_43)
+ mtod(m, struct osockaddr *)->sa_family =
+ mtod(m, struct sockaddr *)->sa_family;
+#endif
+ error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len);
+ if (error == 0)
+ error = copyout((caddr_t)&len, (caddr_t)uap->alen,
+ sizeof (len));
+bad:
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Get name of peer for connected socket.
+ */
+struct getpeername_args {
+ int fdes;
+ caddr_t asa;
+ int *alen;
+#ifdef COMPAT_OLDSOCK
+ int compat_43; /* pseudo */
+#endif
+};
+
+#ifdef COMPAT_OLDSOCK
+getpeername(p, uap, retval)
+ struct proc *p;
+ struct getpeername_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 0;
+ return (getpeername1(p, uap, retval));
+}
+
+ogetpeername(p, uap, retval)
+ struct proc *p;
+ struct getpeername_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 1;
+ return (getpeername1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define getpeername1 getpeername
+#endif
+
+/* ARGSUSED */
+getpeername1(p, uap, retval)
+ struct proc *p;
+ register struct getpeername_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ register struct socket *so;
+ struct mbuf *m;
+ int len, error;
+
+ if (error = getsock(p->p_fd, uap->fdes, &fp))
+ return (error);
+ so = (struct socket *)fp->f_data;
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
+ return (ENOTCONN);
+ if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)))
+ return (error);
+ m = m_getclr(M_WAIT, MT_SONAME);
+ if (m == NULL)
+ return (ENOBUFS);
+ if (error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0))
+ goto bad;
+ if (len > m->m_len)
+ len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+ if (uap->compat_43)
+ mtod(m, struct osockaddr *)->sa_family =
+ mtod(m, struct sockaddr *)->sa_family;
+#endif
+ if (error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len))
+ goto bad;
+ error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
+bad:
+ m_freem(m);
+ return (error);
+}
+
+sockargs(mp, buf, buflen, type)
+ struct mbuf **mp;
+ caddr_t buf;
+ int buflen, type;
+{
+ register struct sockaddr *sa;
+ register struct mbuf *m;
+ int error;
+
+ if ((u_int)buflen > MLEN) {
+#ifdef COMPAT_OLDSOCK
+ if (type == MT_SONAME && (u_int)buflen <= 112)
+ buflen = MLEN; /* unix domain compat. hack */
+ else
+#endif
+ return (EINVAL);
+ }
+ m = m_get(M_WAIT, type);
+ if (m == NULL)
+ return (ENOBUFS);
+ m->m_len = buflen;
+ error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
+ if (error)
+ (void) m_free(m);
+ else {
+ *mp = m;
+ if (type == MT_SONAME) {
+ sa = mtod(m, struct sockaddr *);
+
+#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
+ if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+ sa->sa_family = sa->sa_len;
+#endif
+ sa->sa_len = buflen;
+ }
+ }
+ return (error);
+}
+
+getsock(fdp, fdes, fpp)
+ struct filedesc *fdp;
+ int fdes;
+ struct file **fpp;
+{
+ register struct file *fp;
+
+ if ((unsigned)fdes >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fdes]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_SOCKET)
+ return (ENOTSOCK);
+ *fpp = fp;
+ return (0);
+}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
new file mode 100644
index 0000000..94bf8f7
--- /dev/null
+++ b/sys/kern/uipc_usrreq.c
@@ -0,0 +1,823 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/unpcb.h>
+#include <sys/un.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mbuf.h>
+
+/*
+ * Unix communications domain.
+ *
+ * TODO:
+ * SEQPACKET, RDM
+ * rethink name space problems
+ * need a proper out-of-band
+ */
+struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
+ino_t unp_ino; /* prototype for fake inode numbers */
+
+/*ARGSUSED*/
+uipc_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ struct unpcb *unp = sotounpcb(so);
+ register struct socket *so2;
+ register int error = 0;
+ struct proc *p = curproc; /* XXX */
+
+ if (req == PRU_CONTROL)
+ return (EOPNOTSUPP);
+ if (req != PRU_SEND && control && control->m_len) {
+ error = EOPNOTSUPP;
+ goto release;
+ }
+ if (unp == 0 && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (unp) {
+ error = EISCONN;
+ break;
+ }
+ error = unp_attach(so);
+ break;
+
+ case PRU_DETACH:
+ unp_detach(unp);
+ break;
+
+ case PRU_BIND:
+ error = unp_bind(unp, nam, p);
+ break;
+
+ case PRU_LISTEN:
+ if (unp->unp_vnode == 0)
+ error = EINVAL;
+ break;
+
+ case PRU_CONNECT:
+ error = unp_connect(so, nam, p);
+ break;
+
+ case PRU_CONNECT2:
+ error = unp_connect2(so, (struct socket *)nam);
+ break;
+
+ case PRU_DISCONNECT:
+ unp_disconnect(unp);
+ break;
+
+ case PRU_ACCEPT:
+ /*
+ * Pass back name of connected socket,
+ * if it was bound and we are still connected
+ * (our peer may have closed already!).
+ */
+ if (unp->unp_conn && unp->unp_conn->unp_addr) {
+ nam->m_len = unp->unp_conn->unp_addr->m_len;
+ bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else {
+ nam->m_len = sizeof(sun_noname);
+ *(mtod(nam, struct sockaddr *)) = sun_noname;
+ }
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ unp_shutdown(unp);
+ break;
+
+ case PRU_RCVD:
+ switch (so->so_type) {
+
+ case SOCK_DGRAM:
+ panic("uipc 1");
+ /*NOTREACHED*/
+
+ case SOCK_STREAM:
+#define rcv (&so->so_rcv)
+#define snd (&so2->so_snd)
+ if (unp->unp_conn == 0)
+ break;
+ so2 = unp->unp_conn->unp_socket;
+ /*
+ * Adjust backpressure on sender
+ * and wakeup any waiting to write.
+ */
+ snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
+ unp->unp_mbcnt = rcv->sb_mbcnt;
+ snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
+ unp->unp_cc = rcv->sb_cc;
+ sowwakeup(so2);
+#undef snd
+#undef rcv
+ break;
+
+ default:
+ panic("uipc 2");
+ }
+ break;
+
+ case PRU_SEND:
+ if (control && (error = unp_internalize(control, p)))
+ break;
+ switch (so->so_type) {
+
+ case SOCK_DGRAM: {
+ struct sockaddr *from;
+
+ if (nam) {
+ if (unp->unp_conn) {
+ error = EISCONN;
+ break;
+ }
+ error = unp_connect(so, nam, p);
+ if (error)
+ break;
+ } else {
+ if (unp->unp_conn == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ }
+ so2 = unp->unp_conn->unp_socket;
+ if (unp->unp_addr)
+ from = mtod(unp->unp_addr, struct sockaddr *);
+ else
+ from = &sun_noname;
+ if (sbappendaddr(&so2->so_rcv, from, m, control)) {
+ sorwakeup(so2);
+ m = 0;
+ control = 0;
+ } else
+ error = ENOBUFS;
+ if (nam)
+ unp_disconnect(unp);
+ break;
+ }
+
+ case SOCK_STREAM:
+#define rcv (&so2->so_rcv)
+#define snd (&so->so_snd)
+ if (so->so_state & SS_CANTSENDMORE) {
+ error = EPIPE;
+ break;
+ }
+ if (unp->unp_conn == 0)
+ panic("uipc 3");
+ so2 = unp->unp_conn->unp_socket;
+ /*
+ * Send to paired receive port, and then reduce
+ * send buffer hiwater marks to maintain backpressure.
+ * Wake up readers.
+ */
+ if (control) {
+ if (sbappendcontrol(rcv, m, control))
+ control = 0;
+ } else
+ sbappend(rcv, m);
+ snd->sb_mbmax -=
+ rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
+ unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
+ snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
+ unp->unp_conn->unp_cc = rcv->sb_cc;
+ sorwakeup(so2);
+ m = 0;
+#undef snd
+#undef rcv
+ break;
+
+ default:
+ panic("uipc 4");
+ }
+ break;
+
+ case PRU_ABORT:
+ unp_drop(unp, ECONNABORTED);
+ break;
+
+ case PRU_SENSE:
+ ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+ if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
+ so2 = unp->unp_conn->unp_socket;
+ ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
+ }
+ ((struct stat *) m)->st_dev = NODEV;
+ if (unp->unp_ino == 0)
+ unp->unp_ino = unp_ino++;
+ ((struct stat *) m)->st_ino = unp->unp_ino;
+ return (0);
+
+ case PRU_RCVOOB:
+ return (EOPNOTSUPP);
+
+ case PRU_SENDOOB:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_SOCKADDR:
+ if (unp->unp_addr) {
+ nam->m_len = unp->unp_addr->m_len;
+ bcopy(mtod(unp->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else
+ nam->m_len = 0;
+ break;
+
+ case PRU_PEERADDR:
+ if (unp->unp_conn && unp->unp_conn->unp_addr) {
+ nam->m_len = unp->unp_conn->unp_addr->m_len;
+ bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else
+ nam->m_len = 0;
+ break;
+
+ case PRU_SLOWTIMO:
+ break;
+
+ default:
+ panic("piusrreq");
+ }
+release:
+ if (control)
+ m_freem(control);
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace. Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#define PIPSIZ 4096
+u_long unpst_sendspace = PIPSIZ;
+u_long unpst_recvspace = PIPSIZ;
+u_long unpdg_sendspace = 2*1024; /* really max datagram size */
+u_long unpdg_recvspace = 4*1024;
+
+int unp_rights; /* file descriptors in flight */
+
+unp_attach(so)
+ struct socket *so;
+{
+ register struct mbuf *m;
+ register struct unpcb *unp;
+ int error;
+
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ switch (so->so_type) {
+
+ case SOCK_STREAM:
+ error = soreserve(so, unpst_sendspace, unpst_recvspace);
+ break;
+
+ case SOCK_DGRAM:
+ error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
+ break;
+
+ default:
+ panic("unp_attach");
+ }
+ if (error)
+ return (error);
+ }
+ m = m_getclr(M_DONTWAIT, MT_PCB);
+ if (m == NULL)
+ return (ENOBUFS);
+ unp = mtod(m, struct unpcb *);
+ so->so_pcb = (caddr_t)unp;
+ unp->unp_socket = so;
+ return (0);
+}
+
+unp_detach(unp)
+ register struct unpcb *unp;
+{
+
+ if (unp->unp_vnode) {
+ unp->unp_vnode->v_socket = 0;
+ vrele(unp->unp_vnode);
+ unp->unp_vnode = 0;
+ }
+ if (unp->unp_conn)
+ unp_disconnect(unp);
+ while (unp->unp_refs)
+ unp_drop(unp->unp_refs, ECONNRESET);
+ soisdisconnected(unp->unp_socket);
+ unp->unp_socket->so_pcb = 0;
+ m_freem(unp->unp_addr);
+ (void) m_free(dtom(unp));
+ if (unp_rights) {
+ /*
+ * Normally the receive buffer is flushed later,
+ * in sofree, but if our receive buffer holds references
+ * to descriptors that are now garbage, we will dispose
+ * of those descriptor references after the garbage collector
+ * gets them (resulting in a "panic: closef: count < 0").
+ */
+ sorflush(unp->unp_socket);
+ unp_gc();
+ }
+}
+
+unp_bind(unp, nam, p)
+ struct unpcb *unp;
+ struct mbuf *nam;
+ struct proc *p;
+{
+ struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
+ soun->sun_path, p);
+ if (unp->unp_vnode != NULL)
+ return (EINVAL);
+ if (nam->m_len == MLEN) {
+ if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+ return (EINVAL);
+ } else
+ *(mtod(nam, caddr_t) + nam->m_len) = 0;
+/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EADDRINUSE);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VSOCK;
+ vattr.va_mode = ACCESSPERMS;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
+ return (error);
+ vp = nd.ni_vp;
+ vp->v_socket = unp->unp_socket;
+ unp->unp_vnode = vp;
+ unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
+ VOP_UNLOCK(vp);
+ return (0);
+}
+
+unp_connect(so, nam, p)
+ struct socket *so;
+ struct mbuf *nam;
+ struct proc *p;
+{
+ register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+ register struct vnode *vp;
+ register struct socket *so2, *so3;
+ struct unpcb *unp2, *unp3;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
+ if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */
+ if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+ return (EMSGSIZE);
+ } else
+ *(mtod(nam, caddr_t) + nam->m_len) = 0;
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VSOCK) {
+ error = ENOTSOCK;
+ goto bad;
+ }
+ if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
+ goto bad;
+ so2 = vp->v_socket;
+ if (so2 == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ if (so->so_type != so2->so_type) {
+ error = EPROTOTYPE;
+ goto bad;
+ }
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
+ (so3 = sonewconn(so2, 0)) == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ unp2 = sotounpcb(so2);
+ unp3 = sotounpcb(so3);
+ if (unp2->unp_addr)
+ unp3->unp_addr =
+ m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+ so2 = so3;
+ }
+ error = unp_connect2(so, so2);
+bad:
+ vput(vp);
+ return (error);
+}
+
+unp_connect2(so, so2)
+ register struct socket *so;
+ register struct socket *so2;
+{
+ register struct unpcb *unp = sotounpcb(so);
+ register struct unpcb *unp2;
+
+ if (so2->so_type != so->so_type)
+ return (EPROTOTYPE);
+ unp2 = sotounpcb(so2);
+ unp->unp_conn = unp2;
+ switch (so->so_type) {
+
+ case SOCK_DGRAM:
+ unp->unp_nextref = unp2->unp_refs;
+ unp2->unp_refs = unp;
+ soisconnected(so);
+ break;
+
+ case SOCK_STREAM:
+ unp2->unp_conn = unp;
+ soisconnected(so);
+ soisconnected(so2);
+ break;
+
+ default:
+ panic("unp_connect2");
+ }
+ return (0);
+}
+
+unp_disconnect(unp)
+ struct unpcb *unp;
+{
+ register struct unpcb *unp2 = unp->unp_conn;
+
+ if (unp2 == 0)
+ return;
+ unp->unp_conn = 0;
+ switch (unp->unp_socket->so_type) {
+
+ case SOCK_DGRAM:
+ if (unp2->unp_refs == unp)
+ unp2->unp_refs = unp->unp_nextref;
+ else {
+ unp2 = unp2->unp_refs;
+ for (;;) {
+ if (unp2 == 0)
+ panic("unp_disconnect");
+ if (unp2->unp_nextref == unp)
+ break;
+ unp2 = unp2->unp_nextref;
+ }
+ unp2->unp_nextref = unp->unp_nextref;
+ }
+ unp->unp_nextref = 0;
+ unp->unp_socket->so_state &= ~SS_ISCONNECTED;
+ break;
+
+ case SOCK_STREAM:
+ soisdisconnected(unp->unp_socket);
+ unp2->unp_conn = 0;
+ soisdisconnected(unp2->unp_socket);
+ break;
+ }
+}
+
+#ifdef notdef
+unp_abort(unp)
+ struct unpcb *unp;
+{
+
+ unp_detach(unp);
+}
+#endif
+
+unp_shutdown(unp)
+ struct unpcb *unp;
+{
+ struct socket *so;
+
+ if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
+ (so = unp->unp_conn->unp_socket))
+ socantrcvmore(so);
+}
+
+unp_drop(unp, errno)
+ struct unpcb *unp;
+ int errno;
+{
+ struct socket *so = unp->unp_socket;
+
+ so->so_error = errno;
+ unp_disconnect(unp);
+ if (so->so_head) {
+ so->so_pcb = (caddr_t) 0;
+ m_freem(unp->unp_addr);
+ (void) m_free(dtom(unp));
+ sofree(so);
+ }
+}
+
+#ifdef notdef
+unp_drain()
+{
+
+}
+#endif
+
+unp_externalize(rights)
+ struct mbuf *rights;
+{
+ struct proc *p = curproc; /* XXX */
+ register int i;
+ register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
+ register struct file **rp = (struct file **)(cm + 1);
+ register struct file *fp;
+ int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
+ int f;
+
+ if (!fdavail(p, newfds)) {
+ for (i = 0; i < newfds; i++) {
+ fp = *rp;
+ unp_discard(fp);
+ *rp++ = 0;
+ }
+ return (EMSGSIZE);
+ }
+ for (i = 0; i < newfds; i++) {
+ if (fdalloc(p, 0, &f))
+ panic("unp_externalize");
+ fp = *rp;
+ p->p_fd->fd_ofiles[f] = fp;
+ fp->f_msgcount--;
+ unp_rights--;
+ *(int *)rp++ = f;
+ }
+ return (0);
+}
+
+unp_internalize(control, p)
+ struct mbuf *control;
+ struct proc *p;
+{
+ struct filedesc *fdp = p->p_fd;
+ register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+ register struct file **rp;
+ register struct file *fp;
+ register int i, fd;
+ int oldfds;
+
+ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_len != control->m_len)
+ return (EINVAL);
+ oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < oldfds; i++) {
+ fd = *(int *)rp++;
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ fdp->fd_ofiles[fd] == NULL)
+ return (EBADF);
+ }
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < oldfds; i++) {
+ fp = fdp->fd_ofiles[*(int *)rp];
+ *rp++ = fp;
+ fp->f_count++;
+ fp->f_msgcount++;
+ unp_rights++;
+ }
+ return (0);
+}
+
+int unp_defer, unp_gcing;
+int unp_mark();
+extern struct domain unixdomain;
+
+unp_gc()
+{
+ register struct file *fp, *nextfp;
+ register struct socket *so;
+ struct file **extra_ref, **fpp;
+ int nunref, i;
+
+ if (unp_gcing)
+ return;
+ unp_gcing = 1;
+ unp_defer = 0;
+ for (fp = filehead; fp; fp = fp->f_filef)
+ fp->f_flag &= ~(FMARK|FDEFER);
+ do {
+ for (fp = filehead; fp; fp = fp->f_filef) {
+ if (fp->f_count == 0)
+ continue;
+ if (fp->f_flag & FDEFER) {
+ fp->f_flag &= ~FDEFER;
+ unp_defer--;
+ } else {
+ if (fp->f_flag & FMARK)
+ continue;
+ if (fp->f_count == fp->f_msgcount)
+ continue;
+ fp->f_flag |= FMARK;
+ }
+ if (fp->f_type != DTYPE_SOCKET ||
+ (so = (struct socket *)fp->f_data) == 0)
+ continue;
+ if (so->so_proto->pr_domain != &unixdomain ||
+ (so->so_proto->pr_flags&PR_RIGHTS) == 0)
+ continue;
+#ifdef notdef
+ if (so->so_rcv.sb_flags & SB_LOCK) {
+ /*
+ * This is problematical; it's not clear
+ * we need to wait for the sockbuf to be
+ * unlocked (on a uniprocessor, at least),
+ * and it's also not clear what to do
+ * if sbwait returns an error due to receipt
+ * of a signal. If sbwait does return
+ * an error, we'll go into an infinite
+ * loop. Delete all of this for now.
+ */
+ (void) sbwait(&so->so_rcv);
+ goto restart;
+ }
+#endif
+ unp_scan(so->so_rcv.sb_mb, unp_mark);
+ }
+ } while (unp_defer);
+ /*
+ * We grab an extra reference to each of the file table entries
+ * that are not otherwise accessible and then free the rights
+ * that are stored in messages on them.
+ *
+ * The bug in the orginal code is a little tricky, so I'll describe
+ * what's wrong with it here.
+ *
+ * It is incorrect to simply unp_discard each entry for f_msgcount
+ * times -- consider the case of sockets A and B that contain
+ * references to each other. On a last close of some other socket,
+ * we trigger a gc since the number of outstanding rights (unp_rights)
+ * is non-zero. If during the sweep phase the gc code un_discards,
+ * we end up doing a (full) closef on the descriptor. A closef on A
+ * results in the following chain. Closef calls soo_close, which
+ * calls soclose. Soclose calls first (through the switch
+ * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
+ * returns because the previous instance had set unp_gcing, and
+ * we return all the way back to soclose, which marks the socket
+ * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
+ * to free up the rights that are queued in messages on the socket A,
+ * i.e., the reference on B. The sorflush calls via the dom_dispose
+ * switch unp_dispose, which unp_scans with unp_discard. This second
+ * instance of unp_discard just calls closef on B.
+ *
+ * Well, a similar chain occurs on B, resulting in a sorflush on B,
+ * which results in another closef on A. Unfortunately, A is already
+ * being closed, and the descriptor has already been marked with
+ * SS_NOFDREF, and soclose panics at this point.
+ *
+ * Here, we first take an extra reference to each inaccessible
+ * descriptor. Then, we call sorflush ourself, since we know
+ * it is a Unix domain socket anyhow. After we destroy all the
+ * rights carried in messages, we do a last closef to get rid
+ * of our extra reference. This is the last close, and the
+ * unp_detach etc will shut down the socket.
+ *
+ * 91/09/19, bsy@cs.cmu.edu
+ */
+ extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
+ for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
+ nextfp = fp->f_filef;
+ if (fp->f_count == 0)
+ continue;
+ if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
+ *fpp++ = fp;
+ nunref++;
+ fp->f_count++;
+ }
+ }
+ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+ sorflush((struct socket *)(*fpp)->f_data);
+ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+ closef(*fpp);
+ free((caddr_t)extra_ref, M_FILE);
+ unp_gcing = 0;
+}
+
+unp_dispose(m)
+ struct mbuf *m;
+{
+ int unp_discard();
+
+ if (m)
+ unp_scan(m, unp_discard);
+}
+
+unp_scan(m0, op)
+ register struct mbuf *m0;
+ int (*op)();
+{
+ register struct mbuf *m;
+ register struct file **rp;
+ register struct cmsghdr *cm;
+ register int i;
+ int qfds;
+
+ while (m0) {
+ for (m = m0; m; m = m->m_next)
+ if (m->m_type == MT_CONTROL &&
+ m->m_len >= sizeof(*cm)) {
+ cm = mtod(m, struct cmsghdr *);
+ if (cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_type != SCM_RIGHTS)
+ continue;
+ qfds = (cm->cmsg_len - sizeof *cm)
+ / sizeof (struct file *);
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < qfds; i++)
+ (*op)(*rp++);
+ break; /* XXX, but saves time */
+ }
+ m0 = m0->m_act;
+ }
+}
+
+unp_mark(fp)
+ struct file *fp;
+{
+
+ if (fp->f_flag & FMARK)
+ return;
+ unp_defer++;
+ fp->f_flag |= (FMARK|FDEFER);
+}
+
+unp_discard(fp)
+ struct file *fp;
+{
+
+ fp->f_msgcount--;
+ unp_rights--;
+ (void) closef(fp, (struct proc *)NULL);
+}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
new file mode 100644
index 0000000..ec5c962
--- /dev/null
+++ b/sys/kern/vfs_bio.c
@@ -0,0 +1,339 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vfs_bio.c 8.6 (Berkeley) 1/11/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/trace.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Definitions for the buffer hash lists.
+ */
+#define BUFHASH(dvp, lbn) \
+ (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
+LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
+u_long bufhash;
+
+/*
+ * Insq/Remq for the buffer hash lists.
+ */
+#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash)
+#define bremhash(bp) LIST_REMOVE(bp, b_hash)
+
+/*
+ * Definitions for the buffer free lists.
+ */
+#define BQUEUES 4 /* number of free buffer queues */
+
+#define BQ_LOCKED 0 /* super-blocks &c */
+#define BQ_LRU 1 /* lru, useful buffers */
+#define BQ_AGE 2 /* rubbish */
+#define BQ_EMPTY 3 /* buffer headers with no memory */
+
+TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
+int needbuffer;
+
+/*
+ * Insq/Remq for the buffer free lists.
+ */
+#define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist)
+#define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist)
+
+void
+bremfree(bp)
+ struct buf *bp;
+{
+ struct bqueues *dp = NULL;
+
+ /*
+ * We only calculate the head of the freelist when removing
+ * the last element of the list as that is the only time that
+ * it is needed (e.g. to reset the tail pointer).
+ *
+ * NB: This makes an assumption about how tailq's are implemented.
+ */
+ if (bp->b_freelist.tqe_next == NULL) {
+ for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
+ if (dp->tqh_last == &bp->b_freelist.tqe_next)
+ break;
+ if (dp == &bufqueues[BQUEUES])
+ panic("bremfree: lost tail");
+ }
+ TAILQ_REMOVE(dp, bp, b_freelist);
+}
+
+/*
+ * Initialize buffers and hash links for buffers.
+ */
+void
+bufinit()
+{
+ register struct buf *bp;
+ struct bqueues *dp;
+ register int i;
+ int base, residual;
+
+ for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
+ TAILQ_INIT(dp);
+ bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash);
+ base = bufpages / nbuf;
+ residual = bufpages % nbuf;
+ for (i = 0; i < nbuf; i++) {
+ bp = &buf[i];
+ bzero((char *)bp, sizeof *bp);
+ bp->b_dev = NODEV;
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ bp->b_vnbufs.le_next = NOLIST;
+ bp->b_data = buffers + i * MAXBSIZE;
+ if (i < residual)
+ bp->b_bufsize = (base + 1) * CLBYTES;
+ else
+ bp->b_bufsize = base * CLBYTES;
+ bp->b_flags = B_INVAL;
+ dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
+ binsheadfree(bp, dp);
+ binshash(bp, &invalhash);
+ }
+}
+
+bread(a1, a2, a3, a4, a5)
+ struct vnode *a1;
+ daddr_t a2;
+ int a3;
+ struct ucred *a4;
+ struct buf **a5;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+breadn(a1, a2, a3, a4, a5, a6, a7, a8)
+ struct vnode *a1;
+ daddr_t a2; int a3;
+ daddr_t a4[]; int a5[];
+ int a6;
+ struct ucred *a7;
+ struct buf **a8;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+bwrite(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+int
+vn_bwrite(ap)
+ struct vop_bwrite_args *ap;
+{
+ return (bwrite(ap->a_bp));
+}
+
+bdwrite(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+bawrite(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+brelse(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+struct buf *
+incore(a1, a2)
+ struct vnode *a1;
+ daddr_t a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+struct buf *
+getblk(a1, a2, a3, a4, a5)
+ struct vnode *a1;
+ daddr_t a2;
+ int a3, a4, a5;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((struct buf *)0);
+}
+
+struct buf *
+geteblk(a1)
+ int a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((struct buf *)0);
+}
+
+allocbuf(a1, a2)
+ struct buf *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+struct buf *
+getnewbuf(a1, a2)
+ int a1, a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((struct buf *)0);
+}
+
+biowait(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+void
+biodone(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+int
+count_lock_queue()
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * Print out statistics on the current allocation of the buffer pool.
+ * Can be enabled to print out on every ``sync'' by setting "syncprt"
+ * in vfs_syscalls.c using sysctl.
+ */
+void
+vfs_bufstats()
+{
+ int s, i, j, count;
+ register struct buf *bp;
+ register struct bqueues *dp;
+ int counts[MAXBSIZE/CLBYTES+1];
+ static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
+
+ for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
+ count = 0;
+ for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
+ counts[j] = 0;
+ s = splbio();
+ for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) {
+ counts[bp->b_bufsize/CLBYTES]++;
+ count++;
+ }
+ splx(s);
+ printf("%s: total-%d", bname[i], count);
+ for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
+ if (counts[j] != 0)
+ printf(", %d-%d", j * CLBYTES, counts[j]);
+ printf("\n");
+ }
+}
+#endif /* DIAGNOSTIC */
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
new file mode 100644
index 0000000..4ccfd72
--- /dev/null
+++ b/sys/kern/vfs_cache.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_cache.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Name caching works as follows:
+ *
+ * Names found by directory scans are retained in a cache
+ * for future reference. It is managed LRU, so frequently
+ * used names will hang around. Cache is indexed by hash value
+ * obtained from (vp, name) where vp refers to the directory
+ * containing name.
+ *
+ * For simplicity (and economy of storage), names longer than
+ * a maximum length of NCHNAMLEN are not cached; they occur
+ * infrequently in any case, and are almost never of interest.
+ *
+ * Upon reaching the last segment of a path, if the reference
+ * is for DELETE, or NOCACHE is set (rewrite), and the
+ * name is located in the cache, it will be dropped.
+ */
+
+/*
+ * Structures associated with name cacheing.
+ */
+struct namecache **nchashtbl;
+u_long nchash; /* size of hash table - 1 */
+long numcache; /* number of cache entries allocated */
+struct namecache *nchhead, **nchtail; /* LRU chain pointers */
+struct nchstats nchstats; /* cache effectiveness statistics */
+
+int doingcache = 1; /* 1 => enable the cache */
+
+/*
+ * Look for a the name in the cache. We don't do this
+ * if the segment name is long, simply so the cache can avoid
+ * holding long names (which would either waste space, or
+ * add greatly to the complexity).
+ *
+ * Lookup is called with ni_dvp pointing to the directory to search,
+ * ni_ptr pointing to the name of the entry being sought, ni_namelen
+ * tells the length of the name, and ni_hash contains a hash of
+ * the name. If the lookup succeeds, the vnode is returned in ni_vp
+ * and a status of -1 is returned. If the lookup determines that
+ * the name does not exist (negative cacheing), a status of ENOENT
+ * is returned. If the lookup fails, a status of zero is returned.
+ */
+int
+cache_lookup(dvp, vpp, cnp)
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+{
+ register struct namecache *ncp, *ncq, **ncpp;
+
+ if (!doingcache)
+ return (0);
+ if (cnp->cn_namelen > NCHNAMLEN) {
+ nchstats.ncs_long++;
+ cnp->cn_flags &= ~MAKEENTRY;
+ return (0);
+ }
+ ncpp = &nchashtbl[cnp->cn_hash & nchash];
+ for (ncp = *ncpp; ncp; ncp = ncp->nc_forw) {
+ if (ncp->nc_dvp == dvp &&
+ ncp->nc_dvpid == dvp->v_id &&
+ ncp->nc_nlen == cnp->cn_namelen &&
+ !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
+ break;
+ }
+ if (ncp == NULL) {
+ nchstats.ncs_miss++;
+ return (0);
+ }
+ if (!(cnp->cn_flags & MAKEENTRY)) {
+ nchstats.ncs_badhits++;
+ } else if (ncp->nc_vp == NULL) {
+ if (cnp->cn_nameiop != CREATE) {
+ nchstats.ncs_neghits++;
+ /*
+ * Move this slot to end of LRU chain,
+ * if not already there.
+ */
+ if (ncp->nc_nxt) {
+ /* remove from LRU chain */
+ *ncp->nc_prev = ncp->nc_nxt;
+ ncp->nc_nxt->nc_prev = ncp->nc_prev;
+ /* and replace at end of it */
+ ncp->nc_nxt = NULL;
+ ncp->nc_prev = nchtail;
+ *nchtail = ncp;
+ nchtail = &ncp->nc_nxt;
+ }
+ return (ENOENT);
+ }
+ } else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
+ nchstats.ncs_falsehits++;
+ } else {
+ nchstats.ncs_goodhits++;
+ /*
+ * move this slot to end of LRU chain, if not already there
+ */
+ if (ncp->nc_nxt) {
+ /* remove from LRU chain */
+ *ncp->nc_prev = ncp->nc_nxt;
+ ncp->nc_nxt->nc_prev = ncp->nc_prev;
+ /* and replace at end of it */
+ ncp->nc_nxt = NULL;
+ ncp->nc_prev = nchtail;
+ *nchtail = ncp;
+ nchtail = &ncp->nc_nxt;
+ }
+ *vpp = ncp->nc_vp;
+ return (-1);
+ }
+
+ /*
+ * Last component and we are renaming or deleting,
+ * the cache entry is invalid, or otherwise don't
+ * want cache entry to exist.
+ */
+ /* remove from LRU chain */
+ if (ncq = ncp->nc_nxt)
+ ncq->nc_prev = ncp->nc_prev;
+ else
+ nchtail = ncp->nc_prev;
+ *ncp->nc_prev = ncq;
+ /* remove from hash chain */
+ if (ncq = ncp->nc_forw)
+ ncq->nc_back = ncp->nc_back;
+ *ncp->nc_back = ncq;
+ /* and make a dummy hash chain */
+ ncp->nc_forw = NULL;
+ ncp->nc_back = NULL;
+ /* insert at head of LRU list (first to grab) */
+ if (ncq = nchhead)
+ ncq->nc_prev = &ncp->nc_nxt;
+ else
+ nchtail = &ncp->nc_nxt;
+ nchhead = ncp;
+ ncp->nc_nxt = ncq;
+ ncp->nc_prev = &nchhead;
+ return (0);
+}
+
+/*
+ * Add an entry to the cache
+ */
+cache_enter(dvp, vp, cnp)
+ struct vnode *dvp;
+ struct vnode *vp;
+ struct componentname *cnp;
+{
+ register struct namecache *ncp, *ncq, **ncpp;
+
+#ifdef DIAGNOSTIC
+ if (cnp->cn_namelen > NCHNAMLEN)
+ panic("cache_enter: name too long");
+#endif
+ if (!doingcache)
+ return;
+ /*
+ * Free the cache slot at head of lru chain.
+ */
+ if (numcache < desiredvnodes) {
+ ncp = (struct namecache *)
+ malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
+ bzero((char *)ncp, sizeof *ncp);
+ numcache++;
+ } else if (ncp = nchhead) {
+ /* remove from lru chain */
+ if (ncq = ncp->nc_nxt)
+ ncq->nc_prev = ncp->nc_prev;
+ else
+ nchtail = ncp->nc_prev;
+ *ncp->nc_prev = ncq;
+ /* remove from old hash chain, if on one */
+ if (ncp->nc_back) {
+ if (ncq = ncp->nc_forw)
+ ncq->nc_back = ncp->nc_back;
+ *ncp->nc_back = ncq;
+ ncp->nc_forw = NULL;
+ ncp->nc_back = NULL;
+ }
+ } else
+ return;
+ /* grab the vnode we just found */
+ ncp->nc_vp = vp;
+ if (vp)
+ ncp->nc_vpid = vp->v_id;
+ else
+ ncp->nc_vpid = 0;
+ /* fill in cache info */
+ ncp->nc_dvp = dvp;
+ ncp->nc_dvpid = dvp->v_id;
+ ncp->nc_nlen = cnp->cn_namelen;
+ bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
+ /* link at end of lru chain */
+ ncp->nc_nxt = NULL;
+ ncp->nc_prev = nchtail;
+ *nchtail = ncp;
+ nchtail = &ncp->nc_nxt;
+ /* and insert on hash chain */
+ ncpp = &nchashtbl[cnp->cn_hash & nchash];
+ if (ncq = *ncpp)
+ ncq->nc_back = &ncp->nc_forw;
+ ncp->nc_forw = ncq;
+ ncp->nc_back = ncpp;
+ *ncpp = ncp;
+}
+
+/*
+ * Name cache initialization, from vfs_init() when we are booting
+ */
+nchinit()
+{
+
+ nchtail = &nchhead;
+ nchashtbl = hashinit(desiredvnodes, M_CACHE, &nchash);
+}
+
+/*
+ * Cache flush, a particular vnode; called when a vnode is renamed to
+ * hide entries that would now be invalid
+ */
+cache_purge(vp)
+ struct vnode *vp;
+{
+ struct namecache *ncp, **ncpp;
+
+ vp->v_id = ++nextvnodeid;
+ if (nextvnodeid != 0)
+ return;
+ for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
+ for (ncp = *ncpp; ncp; ncp = ncp->nc_forw) {
+ ncp->nc_vpid = 0;
+ ncp->nc_dvpid = 0;
+ }
+ }
+ vp->v_id = ++nextvnodeid;
+}
+
+/*
+ * Cache flush, a whole filesystem; called when filesys is umounted to
+ * remove entries that would now be invalid
+ *
+ * The line "nxtcp = nchhead" near the end is to avoid potential problems
+ * if the cache lru chain is modified while we are dumping the
+ * inode. This makes the algorithm O(n^2), but do you think I care?
+ */
+cache_purgevfs(mp)
+ struct mount *mp;
+{
+ register struct namecache *ncp, *nxtcp;
+
+ for (ncp = nchhead; ncp; ncp = nxtcp) {
+ if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
+ nxtcp = ncp->nc_nxt;
+ continue;
+ }
+ /* free the resources we had */
+ ncp->nc_vp = NULL;
+ ncp->nc_dvp = NULL;
+ /* remove from old hash chain, if on one */
+ if (ncp->nc_back) {
+ if (nxtcp = ncp->nc_forw)
+ nxtcp->nc_back = ncp->nc_back;
+ *ncp->nc_back = nxtcp;
+ ncp->nc_forw = NULL;
+ ncp->nc_back = NULL;
+ }
+ /* delete this entry from LRU chain */
+ if (nxtcp = ncp->nc_nxt)
+ nxtcp->nc_prev = ncp->nc_prev;
+ else
+ nchtail = ncp->nc_prev;
+ *ncp->nc_prev = nxtcp;
+ /* cause rescan of list, it may have altered */
+ /* also put the now-free entry at head of LRU */
+ if (nxtcp = nchhead)
+ nxtcp->nc_prev = &ncp->nc_nxt;
+ else
+ nchtail = &ncp->nc_nxt;
+ nchhead = ncp;
+ ncp->nc_nxt = nxtcp;
+ ncp->nc_prev = &nchhead;
+ }
+}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
new file mode 100644
index 0000000..c34fbc3
--- /dev/null
+++ b/sys/kern/vfs_cluster.c
@@ -0,0 +1,746 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/trace.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <libkern/libkern.h>
+
+#ifdef DEBUG
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+int doreallocblks = 1;
+struct ctldebug debug13 = { "doreallocblks", &doreallocblks };
+#else
+/* XXX for cluster_write */
+#define doreallocblks 1
+#endif
+
+/*
+ * Local declarations
+ */
+struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,
+ daddr_t, long, int));
+struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
+ daddr_t, daddr_t, long, int, long));
+void cluster_wbuild __P((struct vnode *, struct buf *, long,
+ daddr_t, int, daddr_t));
+struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *));
+
+#ifdef DIAGNOSTIC
+/*
+ * Set to 1 if reads of block zero should cause readahead to be done.
+ * Set to 0 treats a read of block zero as a non-sequential read.
+ *
+ * Setting to one assumes that most reads of block zero of files are due to
+ * sequential passes over the files (e.g. cat, sum) where additional blocks
+ * will soon be needed. Setting to zero assumes that the majority are
+ * surgical strikes to get particular info (e.g. size, file) where readahead
+ * blocks will not be used and, in fact, push out other potentially useful
+ * blocks from the cache. The former seems intuitive, but some quick tests
+ * showed that the latter performed better from a system-wide point of view.
+ */
+int doclusterraz = 0;
+#define ISSEQREAD(vp, blk) \
+ (((blk) != 0 || doclusterraz) && \
+ ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#else
+#define ISSEQREAD(vp, blk) \
+ ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#endif
+
+/*
+ * This replaces bread. If this is a bread at the beginning of a file and
+ * lastr is 0, we assume this is the first read and we'll read up to two
+ * blocks if they are sequential. After that, we'll do regular read ahead
+ * in clustered chunks.
+ *
+ * There are 4 or 5 cases depending on how you count:
+ * Desired block is in the cache:
+ * 1 Not sequential access (0 I/Os).
+ * 2 Access is sequential, do read-ahead (1 ASYNC).
+ * Desired block is not in cache:
+ * 3 Not sequential access (1 SYNC).
+ * 4 Sequential access, next block is contiguous (1 SYNC).
+ * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
+ *
+ * There are potentially two buffers that require I/O.
+ * bp is the block requested.
+ * rbp is the read-ahead block.
+ * If either is NULL, then you don't have to do the I/O.
+ */
+cluster_read(vp, filesize, lblkno, size, cred, bpp)
+ struct vnode *vp;
+ u_quad_t filesize;
+ daddr_t lblkno;
+ long size;
+ struct ucred *cred;
+ struct buf **bpp;
+{
+ struct buf *bp, *rbp;
+ daddr_t blkno, ioblkno;
+ long flags;
+ int error, num_ra, alreadyincore;
+
+#ifdef DIAGNOSTIC
+ if (size == 0)
+ panic("cluster_read: size = 0");
+#endif
+
+ error = 0;
+ flags = B_READ;
+ *bpp = bp = getblk(vp, lblkno, size, 0, 0);
+ if (bp->b_flags & B_CACHE) {
+ /*
+ * Desired block is in cache; do any readahead ASYNC.
+ * Case 1, 2.
+ */
+ trace(TR_BREADHIT, pack(vp, size), lblkno);
+ flags |= B_ASYNC;
+ ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1);
+ alreadyincore = (int)incore(vp, ioblkno);
+ bp = NULL;
+ } else {
+ /* Block wasn't in cache, case 3, 4, 5. */
+ trace(TR_BREADMISS, pack(vp, size), lblkno);
+ bp->b_flags |= B_READ;
+ ioblkno = lblkno;
+ alreadyincore = 0;
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ }
+ /*
+ * XXX
+ * Replace 1 with a window size based on some permutation of
+ * maxcontig and rot_delay. This will let you figure out how
+ * many blocks you should read-ahead (case 2, 4, 5).
+ *
+ * If the access isn't sequential, reset the window to 1.
+ * Note that a read to the same block is considered sequential.
+ * This catches the case where the file is being read sequentially,
+ * but at smaller than the filesystem block size.
+ */
+ rbp = NULL;
+ if (!ISSEQREAD(vp, lblkno)) {
+ vp->v_ralen = 0;
+ vp->v_maxra = lblkno;
+ } else if ((ioblkno + 1) * size <= filesize && !alreadyincore &&
+ !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) &&
+ blkno != -1) {
+ /*
+ * Reading sequentially, and the next block is not in the
+ * cache. We are going to try reading ahead.
+ */
+ if (num_ra) {
+ /*
+ * If our desired readahead block had been read
+ * in a previous readahead but is no longer in
+ * core, then we may be reading ahead too far
+ * or are not using our readahead very rapidly.
+ * In this case we scale back the window.
+ */
+ if (!alreadyincore && ioblkno <= vp->v_maxra)
+ vp->v_ralen = max(vp->v_ralen >> 1, 1);
+ /*
+ * There are more sequential blocks than our current
+ * window allows, scale up. Ideally we want to get
+ * in sync with the filesystem maxcontig value.
+ */
+ else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr)
+ vp->v_ralen = vp->v_ralen ?
+ min(num_ra, vp->v_ralen << 1) : 1;
+
+ if (num_ra > vp->v_ralen)
+ num_ra = vp->v_ralen;
+ }
+
+ if (num_ra) /* case 2, 4 */
+ rbp = cluster_rbuild(vp, filesize,
+ bp, ioblkno, blkno, size, num_ra, flags);
+ else if (ioblkno == lblkno) {
+ bp->b_blkno = blkno;
+ /* Case 5: check how many blocks to read ahead */
+ ++ioblkno;
+ if ((ioblkno + 1) * size > filesize ||
+ incore(vp, ioblkno) || (error = VOP_BMAP(vp,
+ ioblkno, NULL, &blkno, &num_ra)) || blkno == -1)
+ goto skip_readahead;
+ /*
+ * Adjust readahead as above
+ */
+ if (num_ra) {
+ if (!alreadyincore && ioblkno <= vp->v_maxra)
+ vp->v_ralen = max(vp->v_ralen >> 1, 1);
+ else if (num_ra > vp->v_ralen &&
+ lblkno != vp->v_lastr)
+ vp->v_ralen = vp->v_ralen ?
+ min(num_ra,vp->v_ralen<<1) : 1;
+ if (num_ra > vp->v_ralen)
+ num_ra = vp->v_ralen;
+ }
+ flags |= B_ASYNC;
+ if (num_ra)
+ rbp = cluster_rbuild(vp, filesize,
+ NULL, ioblkno, blkno, size, num_ra, flags);
+ else {
+ rbp = getblk(vp, ioblkno, size, 0, 0);
+ rbp->b_flags |= flags;
+ rbp->b_blkno = blkno;
+ }
+ } else {
+ /* case 2; read ahead single block */
+ rbp = getblk(vp, ioblkno, size, 0, 0);
+ rbp->b_flags |= flags;
+ rbp->b_blkno = blkno;
+ }
+
+ if (rbp == bp) /* case 4 */
+ rbp = NULL;
+ else if (rbp) { /* case 2, 5 */
+ trace(TR_BREADMISSRA,
+ pack(vp, (num_ra + 1) * size), ioblkno);
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ }
+ }
+
+ /* XXX Kirk, do we need to make sure the bp has creds? */
+skip_readahead:
+ if (bp)
+ if (bp->b_flags & (B_DONE | B_DELWRI))
+ panic("cluster_read: DONE bp");
+ else
+ error = VOP_STRATEGY(bp);
+
+ if (rbp)
+ if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {
+ rbp->b_flags &= ~(B_ASYNC | B_READ);
+ brelse(rbp);
+ } else
+ (void) VOP_STRATEGY(rbp);
+
+ /*
+ * Recalculate our maximum readahead
+ */
+ if (rbp == NULL)
+ rbp = bp;
+ if (rbp)
+ vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1;
+
+ if (bp)
+ return(biowait(bp));
+ return(error);
+}
+
+/*
+ * If blocks are contiguous on disk, use this to provide clustered
+ * read ahead. We will read as many blocks as possible sequentially
+ * and then parcel them up into logical blocks in the buffer hash table.
+ */
+struct buf *
+cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
+ struct vnode *vp;
+ u_quad_t filesize;
+ struct buf *bp;
+ daddr_t lbn;
+ daddr_t blkno;
+ long size;
+ int run;
+ long flags;
+{
+ struct cluster_save *b_save;
+ struct buf *tbp;
+ daddr_t bn;
+ int i, inc;
+
+#ifdef DIAGNOSTIC
+ if (size != vp->v_mount->mnt_stat.f_iosize)
+ panic("cluster_rbuild: size %d != filesize %d\n",
+ size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+ if (size * (lbn + run + 1) > filesize)
+ --run;
+ if (run == 0) {
+ if (!bp) {
+ bp = getblk(vp, lbn, size, 0, 0);
+ bp->b_blkno = blkno;
+ bp->b_flags |= flags;
+ }
+ return(bp);
+ }
+
+ bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);
+ if (bp->b_flags & (B_DONE | B_DELWRI))
+ return (bp);
+
+ b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
+ M_SEGMENT, M_WAITOK);
+ b_save->bs_bufsize = b_save->bs_bcount = size;
+ b_save->bs_nchildren = 0;
+ b_save->bs_children = (struct buf **)(b_save + 1);
+ b_save->bs_saveaddr = bp->b_saveaddr;
+ bp->b_saveaddr = (caddr_t) b_save;
+
+ inc = btodb(size);
+ for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {
+ if (incore(vp, lbn + i)) {
+ if (i == 1) {
+ bp->b_saveaddr = b_save->bs_saveaddr;
+ bp->b_flags &= ~B_CALL;
+ bp->b_iodone = NULL;
+ allocbuf(bp, size);
+ free(b_save, M_SEGMENT);
+ } else
+ allocbuf(bp, size * i);
+ break;
+ }
+ tbp = getblk(vp, lbn + i, 0, 0, 0);
+ /*
+ * getblk may return some memory in the buffer if there were
+ * no empty buffers to shed it to. If there is currently
+ * memory in the buffer, we move it down size bytes to make
+ * room for the valid pages that cluster_callback will insert.
+ * We do this now so we don't have to do it at interrupt time
+ * in the callback routine.
+ */
+ if (tbp->b_bufsize != 0) {
+ caddr_t bdata = (char *)tbp->b_data;
+
+ if (tbp->b_bufsize + size > MAXBSIZE)
+ panic("cluster_rbuild: too much memory");
+ if (tbp->b_bufsize > size) {
+ /*
+ * XXX if the source and destination regions
+ * overlap we have to copy backward to avoid
+ * clobbering any valid pages (i.e. pagemove
+ * implementations typically can't handle
+ * overlap).
+ */
+ bdata += tbp->b_bufsize;
+ while (bdata > (char *)tbp->b_data) {
+ bdata -= CLBYTES;
+ pagemove(bdata, bdata + size, CLBYTES);
+ }
+ } else
+ pagemove(bdata, bdata + size, tbp->b_bufsize);
+ }
+ tbp->b_blkno = bn;
+ tbp->b_flags |= flags | B_READ | B_ASYNC;
+ ++b_save->bs_nchildren;
+ b_save->bs_children[i - 1] = tbp;
+ }
+ return(bp);
+}
+
+/*
+ * Either get a new buffer or grow the existing one.
+ */
+struct buf *
+cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)
+ struct vnode *vp;
+ struct buf *bp;
+ long flags;
+ daddr_t blkno;
+ daddr_t lblkno;
+ long size;
+ int run;
+{
+ if (!bp) {
+ bp = getblk(vp, lblkno, size, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ bp->b_blkno = blkno;
+ return(bp);
+ }
+ }
+ allocbuf(bp, run * size);
+ bp->b_blkno = blkno;
+ bp->b_iodone = cluster_callback;
+ bp->b_flags |= flags | B_CALL;
+ return(bp);
+}
+
+/*
+ * Cleanup after a clustered read or write.
+ * This is complicated by the fact that any of the buffers might have
+ * extra memory (if there were no empty buffer headers at allocbuf time)
+ * that we will need to shift around.
+ */
+void
+cluster_callback(bp)
+ struct buf *bp;
+{
+ struct cluster_save *b_save;
+ struct buf **bpp, *tbp;
+ long bsize;
+ caddr_t cp;
+ int error = 0;
+
+ /*
+ * Must propogate errors to all the components.
+ */
+ if (bp->b_flags & B_ERROR)
+ error = bp->b_error;
+
+ b_save = (struct cluster_save *)(bp->b_saveaddr);
+ bp->b_saveaddr = b_save->bs_saveaddr;
+
+ bsize = b_save->bs_bufsize;
+ cp = (char *)bp->b_data + bsize;
+ /*
+ * Move memory from the large cluster buffer into the component
+ * buffers and mark IO as done on these.
+ */
+ for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) {
+ tbp = *bpp;
+ pagemove(cp, tbp->b_data, bsize);
+ tbp->b_bufsize += bsize;
+ tbp->b_bcount = bsize;
+ if (error) {
+ tbp->b_flags |= B_ERROR;
+ tbp->b_error = error;
+ }
+ biodone(tbp);
+ bp->b_bufsize -= bsize;
+ cp += bsize;
+ }
+ /*
+ * If there was excess memory in the cluster buffer,
+ * slide it up adjacent to the remaining valid data.
+ */
+ if (bp->b_bufsize != bsize) {
+ if (bp->b_bufsize < bsize)
+ panic("cluster_callback: too little memory");
+ pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize);
+ }
+ bp->b_bcount = bsize;
+ bp->b_iodone = NULL;
+ free(b_save, M_SEGMENT);
+ if (bp->b_flags & B_ASYNC)
+ brelse(bp);
+ else {
+ bp->b_flags &= ~B_WANTED;
+ wakeup((caddr_t)bp);
+ }
+}
+
+/*
+ * Do clustered write for FFS.
+ *
+ * Three cases:
+ * 1. Write is not sequential (write asynchronously)
+ * Write is sequential:
+ * 2. beginning of cluster - begin cluster
+ * 3. middle of a cluster - add to cluster
+ * 4. end of a cluster - asynchronously write cluster
+ */
+void
+cluster_write(bp, filesize)
+ struct buf *bp;
+ u_quad_t filesize;
+{
+ struct vnode *vp;
+ daddr_t lbn;
+ int maxclen, cursize;
+
+ vp = bp->b_vp;
+ lbn = bp->b_lblkno;
+
+ /* Initialize vnode to beginning of file. */
+ if (lbn == 0)
+ vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
+
+ if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
+ (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) {
+ maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
+ if (vp->v_clen != 0) {
+ /*
+ * Next block is not sequential.
+ *
+ * If we are not writing at end of file, the process
+ * seeked to another point in the file since its
+ * last write, or we have reached our maximum
+ * cluster size, then push the previous cluster.
+ * Otherwise try reallocating to make it sequential.
+ */
+ cursize = vp->v_lastw - vp->v_cstart + 1;
+ if (!doreallocblks ||
+ (lbn + 1) * bp->b_bcount != filesize ||
+ lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
+ cluster_wbuild(vp, NULL, bp->b_bcount,
+ vp->v_cstart, cursize, lbn);
+ } else {
+ struct buf **bpp, **endbp;
+ struct cluster_save *buflist;
+
+ buflist = cluster_collectbufs(vp, bp);
+ endbp = &buflist->bs_children
+ [buflist->bs_nchildren - 1];
+ if (VOP_REALLOCBLKS(vp, buflist)) {
+ /*
+ * Failed, push the previous cluster.
+ */
+ for (bpp = buflist->bs_children;
+ bpp < endbp; bpp++)
+ brelse(*bpp);
+ free(buflist, M_SEGMENT);
+ cluster_wbuild(vp, NULL, bp->b_bcount,
+ vp->v_cstart, cursize, lbn);
+ } else {
+ /*
+ * Succeeded, keep building cluster.
+ */
+ for (bpp = buflist->bs_children;
+ bpp <= endbp; bpp++)
+ bdwrite(*bpp);
+ free(buflist, M_SEGMENT);
+ vp->v_lastw = lbn;
+ vp->v_lasta = bp->b_blkno;
+ return;
+ }
+ }
+ }
+ /*
+ * Consider beginning a cluster.
+ * If at end of file, make cluster as large as possible,
+ * otherwise find size of existing cluster.
+ */
+ if ((lbn + 1) * bp->b_bcount != filesize &&
+ (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) ||
+ bp->b_blkno == -1)) {
+ bawrite(bp);
+ vp->v_clen = 0;
+ vp->v_lasta = bp->b_blkno;
+ vp->v_cstart = lbn + 1;
+ vp->v_lastw = lbn;
+ return;
+ }
+ vp->v_clen = maxclen;
+ if (maxclen == 0) { /* I/O not contiguous */
+ vp->v_cstart = lbn + 1;
+ bawrite(bp);
+ } else { /* Wait for rest of cluster */
+ vp->v_cstart = lbn;
+ bdwrite(bp);
+ }
+ } else if (lbn == vp->v_cstart + vp->v_clen) {
+ /*
+ * At end of cluster, write it out.
+ */
+ cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
+ vp->v_clen + 1, lbn);
+ vp->v_clen = 0;
+ vp->v_cstart = lbn + 1;
+ } else
+ /*
+ * In the middle of a cluster, so just delay the
+ * I/O for now.
+ */
+ bdwrite(bp);
+ vp->v_lastw = lbn;
+ vp->v_lasta = bp->b_blkno;
+}
+
+
+/*
+ * This is an awful lot like cluster_rbuild...wish they could be combined.
+ * The last lbn argument is the current block on which I/O is being
+ * performed. Check to see that it doesn't fall in the middle of
+ * the current block (if last_bp == NULL).
+ */
+void
+cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
+ struct vnode *vp;
+ struct buf *last_bp;
+ long size;
+ daddr_t start_lbn;
+ int len;
+ daddr_t lbn;
+{
+ struct cluster_save *b_save;
+ struct buf *bp, *tbp;
+ caddr_t cp;
+ int i, s;
+
+#ifdef DIAGNOSTIC
+ if (size != vp->v_mount->mnt_stat.f_iosize)
+ panic("cluster_wbuild: size %d != filesize %d\n",
+ size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+redo:
+ while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) {
+ ++start_lbn;
+ --len;
+ }
+
+ /* Get more memory for current buffer */
+ if (len <= 1) {
+ if (last_bp) {
+ bawrite(last_bp);
+ } else if (len) {
+ bp = getblk(vp, start_lbn, size, 0, 0);
+ bawrite(bp);
+ }
+ return;
+ }
+
+ bp = getblk(vp, start_lbn, size, 0, 0);
+ if (!(bp->b_flags & B_DELWRI)) {
+ ++start_lbn;
+ --len;
+ brelse(bp);
+ goto redo;
+ }
+
+ /*
+ * Extra memory in the buffer, punt on this buffer.
+ * XXX we could handle this in most cases, but we would have to
+ * push the extra memory down to after our max possible cluster
+ * size and then potentially pull it back up if the cluster was
+ * terminated prematurely--too much hassle.
+ */
+ if (bp->b_bcount != bp->b_bufsize) {
+ ++start_lbn;
+ --len;
+ bawrite(bp);
+ goto redo;
+ }
+
+ --len;
+ b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save),
+ M_SEGMENT, M_WAITOK);
+ b_save->bs_bcount = bp->b_bcount;
+ b_save->bs_bufsize = bp->b_bufsize;
+ b_save->bs_nchildren = 0;
+ b_save->bs_children = (struct buf **)(b_save + 1);
+ b_save->bs_saveaddr = bp->b_saveaddr;
+ bp->b_saveaddr = (caddr_t) b_save;
+
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = cluster_callback;
+ cp = (char *)bp->b_data + size;
+ for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) {
+ /*
+ * Block is not in core or the non-sequential block
+ * ending our cluster was part of the cluster (in which
+ * case we don't want to write it twice).
+ */
+ if (!incore(vp, start_lbn) ||
+ last_bp == NULL && start_lbn == lbn)
+ break;
+
+ /*
+ * Get the desired block buffer (unless it is the final
+ * sequential block whose buffer was passed in explictly
+ * as last_bp).
+ */
+ if (last_bp == NULL || start_lbn != lbn) {
+ tbp = getblk(vp, start_lbn, size, 0, 0);
+ if (!(tbp->b_flags & B_DELWRI)) {
+ brelse(tbp);
+ break;
+ }
+ } else
+ tbp = last_bp;
+
+ ++b_save->bs_nchildren;
+
+ /* Move memory from children to parent */
+ if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) {
+ printf("Clustered Block: %d addr %x bufsize: %d\n",
+ bp->b_lblkno, bp->b_blkno, bp->b_bufsize);
+ printf("Child Block: %d addr: %x\n", tbp->b_lblkno,
+ tbp->b_blkno);
+ panic("Clustered write to wrong blocks");
+ }
+
+ pagemove(tbp->b_data, cp, size);
+ bp->b_bcount += size;
+ bp->b_bufsize += size;
+
+ tbp->b_bufsize -= size;
+ tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
+ tbp->b_flags |= (B_ASYNC | B_AGE);
+ s = splbio();
+ reassignbuf(tbp, tbp->b_vp); /* put on clean list */
+ ++tbp->b_vp->v_numoutput;
+ splx(s);
+ b_save->bs_children[i] = tbp;
+
+ cp += size;
+ }
+
+ if (i == 0) {
+ /* None to cluster */
+ bp->b_saveaddr = b_save->bs_saveaddr;
+ bp->b_flags &= ~B_CALL;
+ bp->b_iodone = NULL;
+ free(b_save, M_SEGMENT);
+ }
+ bawrite(bp);
+ if (i < len) {
+ len -= i + 1;
+ start_lbn += 1;
+ goto redo;
+ }
+}
+
+/*
+ * Collect together all the buffers in a cluster.
+ * Plus add one additional buffer.
+ */
+struct cluster_save *
+cluster_collectbufs(vp, last_bp)
+ struct vnode *vp;
+ struct buf *last_bp;
+{
+ struct cluster_save *buflist;
+ daddr_t lbn;
+ int i, len;
+
+ len = vp->v_lastw - vp->v_cstart + 1;
+ buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
+ M_SEGMENT, M_WAITOK);
+ buflist->bs_nchildren = 0;
+ buflist->bs_children = (struct buf **)(buflist + 1);
+ for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++)
+ (void)bread(vp, lbn, last_bp->b_bcount, NOCRED,
+ &buflist->bs_children[i]);
+ buflist->bs_children[i] = last_bp;
+ buflist->bs_nchildren = i + 1;
+ return (buflist);
+}
diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c
new file mode 100644
index 0000000..2fe39eb
--- /dev/null
+++ b/sys/kern/vfs_conf.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_conf.c 8.8 (Berkeley) 3/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#ifdef FFS
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * This specifies the filesystem used to mount the root.
+ * This specification should be done by /etc/config.
+ */
+int (*mountroot)() = ffs_mountroot;
+#endif
+
+/*
+ * These define the root filesystem and device.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ * The types are defined in mount.h.
+ */
+#ifdef FFS
+extern struct vfsops ufs_vfsops;
+#define UFS_VFSOPS &ufs_vfsops
+#else
+#define UFS_VFSOPS NULL
+#endif
+
+#ifdef LFS
+extern struct vfsops lfs_vfsops;
+#define LFS_VFSOPS &lfs_vfsops
+#else
+#define LFS_VFSOPS NULL
+#endif
+
+#ifdef MFS
+extern struct vfsops mfs_vfsops;
+#define MFS_VFSOPS &mfs_vfsops
+#else
+#define MFS_VFSOPS NULL
+#endif
+
+#ifdef NFS
+extern struct vfsops nfs_vfsops;
+#define NFS_VFSOPS &nfs_vfsops
+#else
+#define NFS_VFSOPS NULL
+#endif
+
+#ifdef FDESC
+extern struct vfsops fdesc_vfsops;
+#define FDESC_VFSOPS &fdesc_vfsops
+#else
+#define FDESC_VFSOPS NULL
+#endif
+
+#ifdef PORTAL
+extern struct vfsops portal_vfsops;
+#define PORTAL_VFSOPS &portal_vfsops
+#else
+#define PORTAL_VFSOPS NULL
+#endif
+
+#ifdef NULLFS
+extern struct vfsops null_vfsops;
+#define NULL_VFSOPS &null_vfsops
+#else
+#define NULL_VFSOPS NULL
+#endif
+
+#ifdef UMAPFS
+extern struct vfsops umap_vfsops;
+#define UMAP_VFSOPS &umap_vfsops
+#else
+#define UMAP_VFSOPS NULL
+#endif
+
+#ifdef KERNFS
+extern struct vfsops kernfs_vfsops;
+#define KERNFS_VFSOPS &kernfs_vfsops
+#else
+#define KERNFS_VFSOPS NULL
+#endif
+
+#ifdef PROCFS
+extern struct vfsops procfs_vfsops;
+#define PROCFS_VFSOPS &procfs_vfsops
+#else
+#define PROCFS_VFSOPS NULL
+#endif
+
+#ifdef AFS
+extern struct vfsops afs_vfsops;
+#define AFS_VFSOPS &afs_vfsops
+#else
+#define AFS_VFSOPS NULL
+#endif
+
+#ifdef CD9660
+extern struct vfsops cd9660_vfsops;
+#define CD9660_VFSOPS &cd9660_vfsops
+#else
+#define CD9660_VFSOPS NULL
+#endif
+
+#ifdef UNION
+extern struct vfsops union_vfsops;
+#define UNION_VFSOPS &union_vfsops
+#else
+#define UNION_VFSOPS NULL
+#endif
+
+struct vfsops *vfssw[] = {
+ NULL, /* 0 = MOUNT_NONE */
+ UFS_VFSOPS, /* 1 = MOUNT_UFS */
+ NFS_VFSOPS, /* 2 = MOUNT_NFS */
+ MFS_VFSOPS, /* 3 = MOUNT_MFS */
+ NULL, /* 4 = MOUNT_PC */
+ LFS_VFSOPS, /* 5 = MOUNT_LFS */
+ NULL, /* 6 = MOUNT_LOFS */
+ FDESC_VFSOPS, /* 7 = MOUNT_FDESC */
+ PORTAL_VFSOPS, /* 8 = MOUNT_PORTAL */
+ NULL_VFSOPS, /* 9 = MOUNT_NULL */
+ UMAP_VFSOPS, /* 10 = MOUNT_UMAP */
+ KERNFS_VFSOPS, /* 11 = MOUNT_KERNFS */
+ PROCFS_VFSOPS, /* 12 = MOUNT_PROCFS */
+ AFS_VFSOPS, /* 13 = MOUNT_AFS */
+ CD9660_VFSOPS, /* 14 = MOUNT_CD9660 */
+ UNION_VFSOPS, /* 15 = MOUNT_UNION */
+ 0
+};
+
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector. It is consulted at system boot to build operation
+ * vectors. It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+ &ffs_vnodeop_opv_desc,
+ &ffs_specop_opv_desc,
+#ifdef FIFO
+ &ffs_fifoop_opv_desc,
+#endif
+ &dead_vnodeop_opv_desc,
+#ifdef FIFO
+ &fifo_vnodeop_opv_desc,
+#endif
+ &spec_vnodeop_opv_desc,
+#ifdef LFS
+ &lfs_vnodeop_opv_desc,
+ &lfs_specop_opv_desc,
+#ifdef FIFO
+ &lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+ &mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+ &nfsv2_vnodeop_opv_desc,
+ &spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+ &fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+ &fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+ &portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+ &null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+ &umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+ &kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+ &procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+ &cd9660_vnodeop_opv_desc,
+ &cd9660_specop_opv_desc,
+#ifdef FIFO
+ &cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+ &union_vnodeop_opv_desc,
+#endif
+ NULL
+};
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
new file mode 100644
index 0000000..1ce7347
--- /dev/null
+++ b/sys/kern/vfs_init.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed
+ * to Berkeley by John Heidemann of the UCLA Ficus project.
+ *
+ * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94
+ */
+
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Sigh, such primitive tools are these...
+ */
+#if 0
+#define DODEBUG(A) A
+#else
+#define DODEBUG(A)
+#endif
+
+extern struct vnodeopv_desc *vfs_opv_descs[];
+ /* a list of lists of vnodeops defns */
+extern struct vnodeop_desc *vfs_op_descs[];
+ /* and the operations they perform */
+/*
+ * This code doesn't work if the defn is **vnodop_defns with cc.
+ * The problem is because of the compiler sometimes putting in an
+ * extra level of indirection for arrays. It's an interesting
+ * "feature" of C.
+ */
+int vfs_opv_numops;
+
+typedef (*PFI)(); /* the standard Pointer to a Function returning an Int */
+
+/*
+ * A miscellaneous routine.
+ * A generic "default" routine that just returns an error.
+ */
+int
+vn_default_error()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * vfs_init.c
+ *
+ * Allocate and fill in operations vectors.
+ *
+ * An undocumented feature of this approach to defining operations is that
+ * there can be multiple entries in vfs_opv_descs for the same operations
+ * vector. This allows third parties to extend the set of operations
+ * supported by another layer in a binary compatibile way. For example,
+ * assume that NFS needed to be modified to support Ficus. NFS has an entry
+ * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
+ * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
+ * listing those new operations Ficus adds to NFS, all without modifying the
+ * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
+ * that is a(whole)nother story.) This is a feature.
+ */
+void
+vfs_opv_init()
+{
+ int i, j, k;
+ int (***opv_desc_vector_p)();
+ int (**opv_desc_vector)();
+ struct vnodeopv_entry_desc *opve_descp;
+
+ /*
+ * Allocate the dynamic vectors and fill them in.
+ */
+ for (i=0; vfs_opv_descs[i]; i++) {
+ opv_desc_vector_p = vfs_opv_descs[i]->opv_desc_vector_p;
+ /*
+ * Allocate and init the vector, if it needs it.
+ * Also handle backwards compatibility.
+ */
+ if (*opv_desc_vector_p == NULL) {
+ /* XXX - shouldn't be M_VNODE */
+ MALLOC(*opv_desc_vector_p, PFI*,
+ vfs_opv_numops*sizeof(PFI), M_VNODE, M_WAITOK);
+ bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFI));
+ DODEBUG(printf("vector at %x allocated\n",
+ opv_desc_vector_p));
+ }
+ opv_desc_vector = *opv_desc_vector_p;
+ for (j=0; vfs_opv_descs[i]->opv_desc_ops[j].opve_op; j++) {
+ opve_descp = &(vfs_opv_descs[i]->opv_desc_ops[j]);
+
+ /*
+ * Sanity check: is this operation listed
+ * in the list of operations? We check this
+ * by seeing if its offest is zero. Since
+ * the default routine should always be listed
+ * first, it should be the only one with a zero
+ * offset. Any other operation with a zero
+ * offset is probably not listed in
+ * vfs_op_descs, and so is probably an error.
+ *
+ * A panic here means the layer programmer
+ * has committed the all-too common bug
+ * of adding a new operation to the layer's
+ * list of vnode operations but
+ * not adding the operation to the system-wide
+ * list of supported operations.
+ */
+ if (opve_descp->opve_op->vdesc_offset == 0 &&
+ opve_descp->opve_op->vdesc_offset !=
+ VOFFSET(vop_default)) {
+ printf("operation %s not listed in %s.\n",
+ opve_descp->opve_op->vdesc_name,
+ "vfs_op_descs");
+ panic ("vfs_opv_init: bad operation");
+ }
+ /*
+ * Fill in this entry.
+ */
+ opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
+ opve_descp->opve_impl;
+ }
+ }
+ /*
+ * Finally, go back and replace unfilled routines
+ * with their default. (Sigh, an O(n^3) algorithm. I
+ * could make it better, but that'd be work, and n is small.)
+ */
+ for (i = 0; vfs_opv_descs[i]; i++) {
+ opv_desc_vector = *(vfs_opv_descs[i]->opv_desc_vector_p);
+ /*
+ * Force every operations vector to have a default routine.
+ */
+ if (opv_desc_vector[VOFFSET(vop_default)]==NULL) {
+ panic("vfs_opv_init: operation vector without default routine.");
+ }
+ for (k = 0; k<vfs_opv_numops; k++)
+ if (opv_desc_vector[k] == NULL)
+ opv_desc_vector[k] =
+ opv_desc_vector[VOFFSET(vop_default)];
+ }
+}
+
+/*
+ * Initialize known vnode operations vectors.
+ */
+void
+vfs_op_init()
+{
+ int i;
+
+ DODEBUG(printf("Vnode_interface_init.\n"));
+ /*
+ * Set all vnode vectors to a well known value.
+ */
+ for (i = 0; vfs_opv_descs[i]; i++)
+ *(vfs_opv_descs[i]->opv_desc_vector_p) = NULL;
+ /*
+ * Figure out how many ops there are by counting the table,
+ * and assign each its offset.
+ */
+ for (vfs_opv_numops = 0, i = 0; vfs_op_descs[i]; i++) {
+ vfs_op_descs[i]->vdesc_offset = vfs_opv_numops;
+ vfs_opv_numops++;
+ }
+ DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops));
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern struct vnodeops dead_vnodeops;
+extern struct vnodeops spec_vnodeops;
+extern void vclean();
+struct vattr va_null;
+
+/*
+ * Initialize the vnode structures and initialize each file system type.
+ */
+vfsinit()
+{
+ struct vfsops **vfsp;
+
+ /*
+ * Initialize the vnode table
+ */
+ vntblinit();
+ /*
+ * Initialize the vnode name cache
+ */
+ nchinit();
+ /*
+ * Build vnode operation vectors.
+ */
+ vfs_op_init();
+ vfs_opv_init(); /* finish the job */
+ /*
+ * Initialize each file system type.
+ */
+ vattr_null(&va_null);
+ for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) {
+ if (*vfsp == NULL)
+ continue;
+ (*(*vfsp)->vfs_init)();
+ }
+}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
new file mode 100644
index 0000000..0fa5aa1
--- /dev/null
+++ b/sys/kern/vfs_lookup.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/syslimits.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * Convert a pathname into a pointer to a locked inode.
+ *
+ * The FOLLOW flag is set when symbolic links are to be followed
+ * when they occur at the end of the name translation process.
+ * Symbolic links are always followed for all other pathname
+ * components other than the last.
+ *
+ * The segflg defines whether the name is to be copied from user
+ * space or kernel space.
+ *
+ * Overall outline of namei:
+ *
+ * copy in name
+ * get starting directory
+ * while (!done && !error) {
+ * call lookup to search path.
+ * if symbolic link, massage name in buffer and continue
+ * }
+ */
+int
+namei(ndp)
+ register struct nameidata *ndp;
+{
+ register struct filedesc *fdp; /* pointer to file descriptor state */
+ register char *cp; /* pointer into pathname argument */
+ register struct vnode *dp; /* the directory we are searching */
+ struct iovec aiov; /* uio for reading symbolic links */
+ struct uio auio;
+ int error, linklen;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
+#ifdef DIAGNOSTIC
+ if (!cnp->cn_cred || !cnp->cn_proc)
+ panic ("namei: bad cred/proc");
+ if (cnp->cn_nameiop & (~OPMASK))
+ panic ("namei: nameiop contaminated with flags");
+ if (cnp->cn_flags & OPMASK)
+ panic ("namei: flags contaminated with nameiops");
+#endif
+ fdp = cnp->cn_proc->p_fd;
+
+ /*
+ * Get a buffer for the name to be translated, and copy the
+ * name into the buffer.
+ */
+ if ((cnp->cn_flags & HASBUF) == 0)
+ MALLOC(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ if (ndp->ni_segflg == UIO_SYSSPACE)
+ error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
+ MAXPATHLEN, &ndp->ni_pathlen);
+ else
+ error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
+ MAXPATHLEN, &ndp->ni_pathlen);
+ if (error) {
+ free(cnp->cn_pnbuf, M_NAMEI);
+ ndp->ni_vp = NULL;
+ return (error);
+ }
+ ndp->ni_loopcnt = 0;
+#ifdef KTRACE
+ if (KTRPOINT(cnp->cn_proc, KTR_NAMEI))
+ ktrnamei(cnp->cn_proc->p_tracep, cnp->cn_pnbuf);
+#endif
+
+ /*
+ * Get starting point for the translation.
+ */
+ if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL)
+ ndp->ni_rootdir = rootvnode;
+ dp = fdp->fd_cdir;
+ VREF(dp);
+ for (;;) {
+ /*
+ * Check if root directory should replace current directory.
+ * Done at start of translation and after symbolic link.
+ */
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ if (*(cnp->cn_nameptr) == '/') {
+ vrele(dp);
+ while (*(cnp->cn_nameptr) == '/') {
+ cnp->cn_nameptr++;
+ ndp->ni_pathlen--;
+ }
+ dp = ndp->ni_rootdir;
+ VREF(dp);
+ }
+ ndp->ni_startdir = dp;
+ if (error = lookup(ndp)) {
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+ }
+ /*
+ * Check for symbolic link
+ */
+ if ((cnp->cn_flags & ISSYMLINK) == 0) {
+ if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ else
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ VOP_UNLOCK(ndp->ni_dvp);
+ if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+ error = ELOOP;
+ break;
+ }
+ if (ndp->ni_pathlen > 1)
+ MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ else
+ cp = cnp->cn_pnbuf;
+ aiov.iov_base = cp;
+ aiov.iov_len = MAXPATHLEN;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = (struct proc *)0;
+ auio.uio_resid = MAXPATHLEN;
+ if (error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred)) {
+ if (ndp->ni_pathlen > 1)
+ free(cp, M_NAMEI);
+ break;
+ }
+ linklen = MAXPATHLEN - auio.uio_resid;
+ if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
+ if (ndp->ni_pathlen > 1)
+ free(cp, M_NAMEI);
+ error = ENAMETOOLONG;
+ break;
+ }
+ if (ndp->ni_pathlen > 1) {
+ bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ cnp->cn_pnbuf = cp;
+ } else
+ cnp->cn_pnbuf[linklen] = '\0';
+ ndp->ni_pathlen += linklen;
+ vput(ndp->ni_vp);
+ dp = ndp->ni_dvp;
+ }
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ return (error);
+}
+
+/*
+ * Search a pathname.
+ * This is a very central and rather complicated routine.
+ *
+ * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
+ * The starting directory is taken from ni_startdir. The pathname is
+ * descended until done, or a symbolic link is encountered. The variable
+ * ni_more is clear if the path is completed; it is set to one if a
+ * symbolic link needing interpretation is encountered.
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it, the parent directory is returned
+ * locked. If flag has WANTPARENT or'ed into it, the parent directory is
+ * returned unlocked. Otherwise the parent directory is not returned. If
+ * the target of the pathname exists and LOCKLEAF is or'ed into the flag
+ * the target is returned locked, otherwise it is returned unlocked.
+ * When creating or renaming and LOCKPARENT is specified, the target may not
+ * be ".". When deleting and LOCKPARENT is specified, the target may be ".".
+ *
+ * Overall outline of lookup:
+ *
+ * dirloop:
+ * identify next component of name at ndp->ni_ptr
+ * handle degenerate case where name is null string
+ * if .. and crossing mount points and on mounted filesys, find parent
+ * call VOP_LOOKUP routine for next component name
+ * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
+ * component vnode returned in ni_vp (if it exists), locked.
+ * if result vnode is mounted on and crossing mount points,
+ * find mounted on vnode
+ * if more components of name, do next level at dirloop
+ * return the answer in ni_vp, locked if LOCKLEAF set
+ * if LOCKPARENT set, return locked parent in ni_dvp
+ * if WANTPARENT set, return unlocked parent in ni_dvp
+ */
+int
+lookup(ndp)
+ register struct nameidata *ndp;
+{
+ register char *cp; /* pointer into pathname argument */
+ register struct vnode *dp = 0; /* the directory we are searching */
+ struct vnode *tdp; /* saved dp */
+ struct mount *mp; /* mount table entry */
+ int docache; /* == 0 do not cache last component */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int rdonly; /* lookup read-only flag bit */
+ int error = 0;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ /*
+ * Setup: break out flag bits into variables.
+ */
+ wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
+ docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+ if (cnp->cn_nameiop == DELETE ||
+ (wantparent && cnp->cn_nameiop != CREATE))
+ docache = 0;
+ rdonly = cnp->cn_flags & RDONLY;
+ ndp->ni_dvp = NULL;
+ cnp->cn_flags &= ~ISSYMLINK;
+ dp = ndp->ni_startdir;
+ ndp->ni_startdir = NULLVP;
+ VOP_LOCK(dp);
+
+dirloop:
+ /*
+ * Search a new directory.
+ *
+ * The cn_hash value is for use by vfs_cache.
+ * The last component of the filename is left accessible via
+ * cnp->cn_nameptr for callers that need the name. Callers needing
+ * the name set the SAVENAME flag. When done, they assume
+ * responsibility for freeing the pathname buffer.
+ */
+ cnp->cn_consume = 0;
+ cnp->cn_hash = 0;
+ for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+ cnp->cn_hash += (unsigned char)*cp;
+ cnp->cn_namelen = cp - cnp->cn_nameptr;
+ if (cnp->cn_namelen > NAME_MAX) {
+ error = ENAMETOOLONG;
+ goto bad;
+ }
+#ifdef NAMEI_DIAGNOSTIC
+ { char c = *cp;
+ *cp = '\0';
+ printf("{%s}: ", cnp->cn_nameptr);
+ *cp = c; }
+#endif
+ ndp->ni_pathlen -= cnp->cn_namelen;
+ ndp->ni_next = cp;
+ cnp->cn_flags |= MAKEENTRY;
+ if (*cp == '\0' && docache == 0)
+ cnp->cn_flags &= ~MAKEENTRY;
+ if (cnp->cn_namelen == 2 &&
+ cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
+ cnp->cn_flags |= ISDOTDOT;
+ else
+ cnp->cn_flags &= ~ISDOTDOT;
+ if (*ndp->ni_next == 0)
+ cnp->cn_flags |= ISLASTCN;
+ else
+ cnp->cn_flags &= ~ISLASTCN;
+
+
+ /*
+ * Check for degenerate name (e.g. / or "")
+ * which is a way of talking about a directory,
+ * e.g. like "/." or ".".
+ */
+ if (cnp->cn_nameptr[0] == '\0') {
+ if (cnp->cn_nameiop != LOOKUP) {
+ error = EISDIR;
+ goto bad;
+ }
+ if (dp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ if (wantparent) {
+ ndp->ni_dvp = dp;
+ VREF(dp);
+ }
+ ndp->ni_vp = dp;
+ if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
+ VOP_UNLOCK(dp);
+ if (cnp->cn_flags & SAVESTART)
+ panic("lookup: SAVESTART");
+ return (0);
+ }
+
+ /*
+ * Handle "..": two special cases.
+ * 1. If at root directory (e.g. after chroot)
+ * or at absolute root directory
+ * then ignore it so can't get out.
+ * 2. If this vnode is the root of a mounted
+ * filesystem, then replace it with the
+ * vnode which was mounted on so we take the
+ * .. in the other file system.
+ */
+ if (cnp->cn_flags & ISDOTDOT) {
+ for (;;) {
+ if (dp == ndp->ni_rootdir || dp == rootvnode) {
+ ndp->ni_dvp = dp;
+ ndp->ni_vp = dp;
+ VREF(dp);
+ goto nextname;
+ }
+ if ((dp->v_flag & VROOT) == 0 ||
+ (cnp->cn_flags & NOCROSSMOUNT))
+ break;
+ tdp = dp;
+ dp = dp->v_mount->mnt_vnodecovered;
+ vput(tdp);
+ VREF(dp);
+ VOP_LOCK(dp);
+ }
+ }
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ */
+unionlookup:
+ ndp->ni_dvp = dp;
+ if (error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) {
+#ifdef DIAGNOSTIC
+ if (ndp->ni_vp != NULL)
+ panic("leaf should be empty");
+#endif
+#ifdef NAMEI_DIAGNOSTIC
+ printf("not found\n");
+#endif
+ if ((error == ENOENT) &&
+ (dp->v_flag & VROOT) &&
+ (dp->v_mount->mnt_flag & MNT_UNION)) {
+ tdp = dp;
+ dp = dp->v_mount->mnt_vnodecovered;
+ vput(tdp);
+ VREF(dp);
+ VOP_LOCK(dp);
+ goto unionlookup;
+ }
+
+ if (error != EJUSTRETURN)
+ goto bad;
+ /*
+ * If creating and at end of pathname, then can consider
+ * allowing file to be created.
+ */
+ if (rdonly || (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto bad;
+ }
+ /*
+ * We return with ni_vp NULL to indicate that the entry
+ * doesn't currently exist, leaving a pointer to the
+ * (possibly locked) directory inode in ndp->ni_dvp.
+ */
+ if (cnp->cn_flags & SAVESTART) {
+ ndp->ni_startdir = ndp->ni_dvp;
+ VREF(ndp->ni_startdir);
+ }
+ return (0);
+ }
+#ifdef NAMEI_DIAGNOSTIC
+ printf("found\n");
+#endif
+
+ /*
+ * Take into account any additional components consumed by
+ * the underlying filesystem.
+ */
+ if (cnp->cn_consume > 0) {
+ cnp->cn_nameptr += cnp->cn_consume;
+ ndp->ni_next += cnp->cn_consume;
+ ndp->ni_pathlen -= cnp->cn_consume;
+ cnp->cn_consume = 0;
+ }
+
+ dp = ndp->ni_vp;
+ /*
+ * Check for symbolic link
+ */
+ if ((dp->v_type == VLNK) &&
+ ((cnp->cn_flags & FOLLOW) || *ndp->ni_next == '/')) {
+ cnp->cn_flags |= ISSYMLINK;
+ return (0);
+ }
+
+ /*
+ * Check to see if the vnode has been mounted on;
+ * if so find the root of the mounted file system.
+ */
+ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
+ (cnp->cn_flags & NOCROSSMOUNT) == 0) {
+ if (mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t)mp, PVFS);
+ continue;
+ }
+ if (error = VFS_ROOT(dp->v_mountedhere, &tdp))
+ goto bad2;
+ vput(dp);
+ ndp->ni_vp = dp = tdp;
+ }
+
+nextname:
+ /*
+ * Not a symbolic link. If more pathname,
+ * continue at next component, else return.
+ */
+ if (*ndp->ni_next == '/') {
+ cnp->cn_nameptr = ndp->ni_next;
+ while (*cnp->cn_nameptr == '/') {
+ cnp->cn_nameptr++;
+ ndp->ni_pathlen--;
+ }
+ vrele(ndp->ni_dvp);
+ goto dirloop;
+ }
+ /*
+ * Check for read-only file systems.
+ */
+ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+ /*
+ * Disallow directory write attempts on read-only
+ * file systems.
+ */
+ if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) ||
+ (wantparent &&
+ (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY))) {
+ error = EROFS;
+ goto bad2;
+ }
+ }
+ if (cnp->cn_flags & SAVESTART) {
+ ndp->ni_startdir = ndp->ni_dvp;
+ VREF(ndp->ni_startdir);
+ }
+ if (!wantparent)
+ vrele(ndp->ni_dvp);
+ if ((cnp->cn_flags & LOCKLEAF) == 0)
+ VOP_UNLOCK(dp);
+ return (0);
+
+bad2:
+ if ((cnp->cn_flags & LOCKPARENT) && *ndp->ni_next == '\0')
+ VOP_UNLOCK(ndp->ni_dvp);
+ vrele(ndp->ni_dvp);
+bad:
+ vput(dp);
+ ndp->ni_vp = NULL;
+ return (error);
+}
+
+
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
new file mode 100644
index 0000000..9891fe6
--- /dev/null
+++ b/sys/kern/vfs_subr.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+ VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+ VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int vttoif_tab[9] = {
+ 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+ S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define bufremvn(bp) { \
+ LIST_REMOVE(bp, b_vnbufs); \
+ (bp)->b_vnbufs.le_next = NOLIST; \
+}
+
+TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
+struct mntlist mountlist; /* mounted filesystem list */
+
+/*
+ * Initialize the vnode management data structures.
+ */
+vntblinit()
+{
+
+ TAILQ_INIT(&vnode_free_list);
+ TAILQ_INIT(&mountlist);
+}
+
+/*
+ * Lock a filesystem.
+ * Used to prevent access to it while mounting and unmounting.
+ */
+vfs_lock(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t)mp, PVFS);
+ }
+ mp->mnt_flag |= MNT_MLOCK;
+ return (0);
+}
+
+/*
+ * Unlock a locked filesystem.
+ * Panic if filesystem is not locked.
+ */
+void
+vfs_unlock(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MLOCK) == 0)
+ panic("vfs_unlock: not locked");
+ mp->mnt_flag &= ~MNT_MLOCK;
+ if (mp->mnt_flag & MNT_MWAIT) {
+ mp->mnt_flag &= ~MNT_MWAIT;
+ wakeup((caddr_t)mp);
+ }
+}
+
+/*
+ * Mark a mount point as busy.
+ * Used to synchronize access and to delay unmounting.
+ */
+vfs_busy(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MPBUSY) {
+ mp->mnt_flag |= MNT_MPWANT;
+ sleep((caddr_t)&mp->mnt_flag, PVFS);
+ }
+ if (mp->mnt_flag & MNT_UNMOUNT)
+ return (1);
+ mp->mnt_flag |= MNT_MPBUSY;
+ return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ * Panic if filesystem is not busy.
+ */
+vfs_unbusy(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vfs_unbusy: not busy");
+ mp->mnt_flag &= ~MNT_MPBUSY;
+ if (mp->mnt_flag & MNT_MPWANT) {
+ mp->mnt_flag &= ~MNT_MPWANT;
+ wakeup((caddr_t)&mp->mnt_flag);
+ }
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+getvfs(fsid)
+ fsid_t *fsid;
+{
+ register struct mount *mp;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+ mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+ return (mp);
+ }
+ return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+getnewfsid(mp, mtype)
+ struct mount *mp;
+ int mtype;
+{
+static u_short xxxfs_mntid;
+
+ fsid_t tfsid;
+
+ mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+ mp->mnt_stat.f_fsid.val[1] = mtype;
+ if (xxxfs_mntid == 0)
+ ++xxxfs_mntid;
+ tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+ tfsid.val[1] = mtype;
+ if (mountlist.tqh_first != NULL) {
+ while (getvfs(&tfsid)) {
+ tfsid.val[0]++;
+ xxxfs_mntid++;
+ }
+ }
+ mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void vattr_null(vap)
+ register struct vattr *vap;
+{
+
+ vap->va_type = VNON;
+ vap->va_size = vap->va_bytes = VNOVAL;
+ vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+ vap->va_fsid = vap->va_fileid =
+ vap->va_blocksize = vap->va_rdev =
+ vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+ vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+ vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+ vap->va_flags = vap->va_gen = VNOVAL;
+ vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+extern void vclean();
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+getnewvnode(tag, mp, vops, vpp)
+ enum vtagtype tag;
+ struct mount *mp;
+ int (**vops)();
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ int s;
+
+ if ((vnode_free_list.tqh_first == NULL &&
+ numvnodes < 2 * desiredvnodes) ||
+ numvnodes < desiredvnodes) {
+ vp = (struct vnode *)malloc((u_long)sizeof *vp,
+ M_VNODE, M_WAITOK);
+ bzero((char *)vp, sizeof *vp);
+ numvnodes++;
+ } else {
+ if ((vp = vnode_free_list.tqh_first) == NULL) {
+ tablefull("vnode");
+ *vpp = 0;
+ return (ENFILE);
+ }
+ if (vp->v_usecount)
+ panic("free vnode isn't");
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ /* see comment on why 0xdeadb is set at end of vgone (below) */
+ vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+ vp->v_lease = NULL;
+ if (vp->v_type != VBAD)
+ vgone(vp);
+#ifdef DIAGNOSTIC
+ if (vp->v_data)
+ panic("cleaned vnode isn't");
+ s = splbio();
+ if (vp->v_numoutput)
+ panic("Clean vnode has pending I/O's");
+ splx(s);
+#endif
+ vp->v_flag = 0;
+ vp->v_lastr = 0;
+ vp->v_ralen = 0;
+ vp->v_maxra = 0;
+ vp->v_lastw = 0;
+ vp->v_lasta = 0;
+ vp->v_cstart = 0;
+ vp->v_clen = 0;
+ vp->v_socket = 0;
+ }
+ vp->v_type = VNON;
+ cache_purge(vp);
+ vp->v_tag = tag;
+ vp->v_op = vops;
+ insmntque(vp, mp);
+ *vpp = vp;
+ vp->v_usecount = 1;
+ vp->v_data = 0;
+ return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+insmntque(vp, mp)
+ register struct vnode *vp;
+ register struct mount *mp;
+{
+
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL)
+ LIST_REMOVE(vp, v_mntvnodes);
+ /*
+ * Insert into list of vnodes for the new mount point, if available.
+ */
+ if ((vp->v_mount = mp) == NULL)
+ return;
+ LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+vwakeup(bp)
+ register struct buf *bp;
+{
+ register struct vnode *vp;
+
+ bp->b_flags &= ~B_WRITEINPROG;
+ if (vp = bp->b_vp) {
+ vp->v_numoutput--;
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ vp->v_flag &= ~VBWAIT;
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+ register struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int slpflag, slptimeo;
+{
+ register struct buf *bp;
+ struct buf *nbp, *blist;
+ int s, error;
+
+ if (flags & V_SAVE) {
+ if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+ return (error);
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ panic("vinvalbuf: dirty bufs");
+ }
+ for (;;) {
+ if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
+ (flags & V_SAVEMETA))
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist)
+ break;
+
+ for (bp = blist; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+ continue;
+ s = splbio();
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp,
+ slpflag | (PRIBIO + 1), "vinvalbuf",
+ slptimeo);
+ splx(s);
+ if (error)
+ return (error);
+ break;
+ }
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ /*
+ * XXX Since there are no node locks for NFS, I believe
+ * there is a slight chance that a delayed write will
+ * occur while sleeping just above, so check for it.
+ */
+ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+ (void) VOP_BWRITE(bp);
+ break;
+ }
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ }
+ }
+ if (!(flags & V_SAVEMETA) &&
+ (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+ panic("vinvalbuf: flush failed");
+ return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+bgetvp(vp, bp)
+ register struct vnode *vp;
+ register struct buf *bp;
+{
+
+ if (bp->b_vp)
+ panic("bgetvp: not free");
+ VHOLD(vp);
+ bp->b_vp = vp;
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ bp->b_dev = vp->v_rdev;
+ else
+ bp->b_dev = NODEV;
+ /*
+ * Insert onto list for new vnode.
+ */
+ bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+brelvp(bp)
+ register struct buf *bp;
+{
+ struct vnode *vp;
+
+ if (bp->b_vp == (struct vnode *) 0)
+ panic("brelvp: NULL");
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ vp = bp->b_vp;
+ bp->b_vp = (struct vnode *) 0;
+ HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+reassignbuf(bp, newvp)
+ register struct buf *bp;
+ register struct vnode *newvp;
+{
+ register struct buflists *listheadp;
+
+ if (newvp == NULL) {
+ printf("reassignbuf: NULL");
+ return;
+ }
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ /*
+ * If dirty, put on list of dirty buffers;
+ * otherwise insert onto list of clean buffers.
+ */
+ if (bp->b_flags & B_DELWRI)
+ listheadp = &newvp->v_dirtyblkhd;
+ else
+ listheadp = &newvp->v_cleanblkhd;
+ bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+bdevvp(dev, vpp)
+ dev_t dev;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct vnode *nvp;
+ int error;
+
+ if (dev == NODEV)
+ return (0);
+ error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+ if (error) {
+ *vpp = 0;
+ return (error);
+ }
+ vp = nvp;
+ vp->v_type = VBLK;
+ if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+ vput(vp);
+ vp = nvp;
+ }
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+ register struct vnode *nvp;
+ dev_t nvp_rdev;
+ struct mount *mp;
+{
+ register struct vnode *vp;
+ struct vnode **vpp;
+
+ if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+ return (NULLVP);
+
+ vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+ for (vp = *vpp; vp; vp = vp->v_specnext) {
+ if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ goto loop;
+ }
+ if (vget(vp, 1))
+ goto loop;
+ break;
+ }
+ if (vp == NULL || vp->v_tag != VT_NON) {
+ MALLOC(nvp->v_specinfo, struct specinfo *,
+ sizeof(struct specinfo), M_VNODE, M_WAITOK);
+ nvp->v_rdev = nvp_rdev;
+ nvp->v_hashchain = vpp;
+ nvp->v_specnext = *vpp;
+ nvp->v_specflags = 0;
+ *vpp = nvp;
+ if (vp != NULL) {
+ nvp->v_flag |= VALIASED;
+ vp->v_flag |= VALIASED;
+ vput(vp);
+ }
+ return (NULLVP);
+ }
+ VOP_UNLOCK(vp);
+ vclean(vp, 0);
+ vp->v_op = nvp->v_op;
+ vp->v_tag = nvp->v_tag;
+ nvp->v_type = VNON;
+ insmntque(vp, mp);
+ return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+vget(vp, lockflag)
+ register struct vnode *vp;
+ int lockflag;
+{
+
+ /*
+ * If the vnode is in the process of being cleaned out for
+ * another use, we wait for the cleaning to finish and then
+ * return failure. Cleaning is determined either by checking
+ * that the VXLOCK flag is set, or that the use count is
+ * zero with the back pointer set to show that it has been
+ * removed from the free list by getnewvnode. The VXLOCK
+ * flag may not have been set yet because vclean is blocked in
+ * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+ */
+ if ((vp->v_flag & VXLOCK) ||
+ (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return (1);
+ }
+ if (vp->v_usecount == 0)
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ vp->v_usecount++;
+ if (lockflag)
+ VOP_LOCK(vp);
+ return (0);
+}
+
+/*
+ * Vnode reference, just increment the count
+ */
+void vref(vp)
+ struct vnode *vp;
+{
+
+ if (vp->v_usecount <= 0)
+ panic("vref used where vget required");
+ vp->v_usecount++;
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void vput(vp)
+ register struct vnode *vp;
+{
+
+ VOP_UNLOCK(vp);
+ vrele(vp);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void vrele(vp)
+ register struct vnode *vp;
+{
+
+#ifdef DIAGNOSTIC
+ if (vp == NULL)
+ panic("vrele: null vp");
+#endif
+ vp->v_usecount--;
+ if (vp->v_usecount > 0)
+ return;
+#ifdef DIAGNOSTIC
+ if (vp->v_usecount != 0 || vp->v_writecount != 0) {
+ vprint("vrele: bad ref count", vp);
+ panic("vrele: ref cnt");
+ }
+#endif
+ /*
+ * insert at tail of LRU list
+ */
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ VOP_INACTIVE(vp);
+}
+
+/*
+ * Page or buffer structure gets a reference.
+ */
+void vhold(vp)
+ register struct vnode *vp;
+{
+
+ vp->v_holdcnt++;
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void holdrele(vp)
+ register struct vnode *vp;
+{
+
+ if (vp->v_holdcnt <= 0)
+ panic("holdrele: holdcnt");
+ vp->v_holdcnt--;
+}
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0; /* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+vflush(mp, skipvp, flags)
+ struct mount *mp;
+ struct vnode *skipvp;
+ int flags;
+{
+ register struct vnode *vp, *nvp;
+ int busy = 0;
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vflush: not busy");
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp)
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ /*
+ * Skip over a selected vnode.
+ */
+ if (vp == skipvp)
+ continue;
+ /*
+ * Skip over a vnodes marked VSYSTEM.
+ */
+ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+ continue;
+ /*
+ * If WRITECLOSE is set, only flush out regular file
+ * vnodes open for writing.
+ */
+ if ((flags & WRITECLOSE) &&
+ (vp->v_writecount == 0 || vp->v_type != VREG))
+ continue;
+ /*
+ * With v_usecount == 0, all we need to do is clear
+ * out the vnode data structures and we are done.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ continue;
+ }
+ /*
+ * If FORCECLOSE is set, forcibly close the vnode.
+ * For block or character devices, revert to an
+ * anonymous device. For all other files, just kill them.
+ */
+ if (flags & FORCECLOSE) {
+ if (vp->v_type != VBLK && vp->v_type != VCHR) {
+ vgone(vp);
+ } else {
+ vclean(vp, 0);
+ vp->v_op = spec_vnodeop_p;
+ insmntque(vp, (struct mount *)0);
+ }
+ continue;
+ }
+#ifdef DIAGNOSTIC
+ if (busyprt)
+ vprint("vflush: busy vnode", vp);
+#endif
+ busy++;
+ }
+ if (busy)
+ return (EBUSY);
+ return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ */
+void
+vclean(vp, flags)
+ register struct vnode *vp;
+ int flags;
+{
+ int active;
+
+ /*
+ * Check to see if the vnode is in use.
+ * If so we have to reference it before we clean it out
+ * so that its count cannot fall to zero and generate a
+ * race against ourselves to recycle it.
+ */
+ if (active = vp->v_usecount)
+ VREF(vp);
+ /*
+ * Even if the count is zero, the VOP_INACTIVE routine may still
+ * have the object locked while it cleans it out. The VOP_LOCK
+ * ensures that the VOP_INACTIVE routine is done with its work.
+ * For active vnodes, it ensures that no other activity can
+ * occur while the underlying object is being cleaned out.
+ */
+ VOP_LOCK(vp);
+ /*
+ * Prevent the vnode from being recycled or
+ * brought into use while we clean it out.
+ */
+ if (vp->v_flag & VXLOCK)
+ panic("vclean: deadlock");
+ vp->v_flag |= VXLOCK;
+ /*
+ * Clean out any buffers associated with the vnode.
+ */
+ if (flags & DOCLOSE)
+ vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ /*
+ * Any other processes trying to obtain this lock must first
+ * wait for VXLOCK to clear, then call the new lock operation.
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * If purging an active vnode, it must be closed and
+ * deactivated before being reclaimed.
+ */
+ if (active) {
+ if (flags & DOCLOSE)
+ VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
+ VOP_INACTIVE(vp);
+ }
+ /*
+ * Reclaim the vnode.
+ */
+ if (VOP_RECLAIM(vp))
+ panic("vclean: cannot reclaim");
+ if (active)
+ vrele(vp);
+
+ /*
+ * Done with purge, notify sleepers of the grim news.
+ */
+ vp->v_op = dead_vnodeop_p;
+ vp->v_tag = VT_NON;
+ vp->v_flag &= ~VXLOCK;
+ if (vp->v_flag & VXWANT) {
+ vp->v_flag &= ~VXWANT;
+ wakeup((caddr_t)vp);
+ }
+}
+
+/*
+ * Eliminate all activity associated with the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+void vgoneall(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_flag & VALIASED) {
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Ensure that vp will not be vgone'd while we
+ * are eliminating its aliases.
+ */
+ vp->v_flag |= VXLOCK;
+ while (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type || vp == vq)
+ continue;
+ vgone(vq);
+ break;
+ }
+ }
+ /*
+ * Remove the lock so that vgone below will
+ * really eliminate the vnode after which time
+ * vgone will awaken any sleepers.
+ */
+ vp->v_flag &= ~VXLOCK;
+ }
+ vgone(vp);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void vgone(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+ struct vnode *vx;
+
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Clean out the filesystem specific data.
+ */
+ vclean(vp, DOCLOSE);
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL) {
+ LIST_REMOVE(vp, v_mntvnodes);
+ vp->v_mount = NULL;
+ }
+ /*
+ * If special device, remove it from special device alias list.
+ */
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ if (*vp->v_hashchain == vp) {
+ *vp->v_hashchain = vp->v_specnext;
+ } else {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_specnext != vp)
+ continue;
+ vq->v_specnext = vp->v_specnext;
+ break;
+ }
+ if (vq == NULL)
+ panic("missing bdev");
+ }
+ if (vp->v_flag & VALIASED) {
+ vx = NULL;
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vx)
+ break;
+ vx = vq;
+ }
+ if (vx == NULL)
+ panic("missing alias");
+ if (vq == NULL)
+ vx->v_flag &= ~VALIASED;
+ vp->v_flag &= ~VALIASED;
+ }
+ FREE(vp->v_specinfo, M_VNODE);
+ vp->v_specinfo = NULL;
+ }
+ /*
+ * If it is on the freelist and not already at the head,
+ * move it to the head of the list. The test of the back
+ * pointer and the reference count of zero is because
+ * it will be removed from the free list by getnewvnode,
+ * but will not have its reference count incremented until
+ * after calling vgone. If the reference count were
+ * incremented first, vgone would (incorrectly) try to
+ * close the previous instance of the underlying object.
+ * So, the back pointer is explicitly set to `0xdeadb' in
+ * getnewvnode after removing it from the freelist to ensure
+ * that we do not try to move it here.
+ */
+ if (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
+ vnode_free_list.tqh_first != vp) {
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ }
+ vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+vfinddev(dev, type, vpp)
+ dev_t dev;
+ enum vtype type;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+
+ for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+ if (dev != vp->v_rdev || type != vp->v_type)
+ continue;
+ *vpp = vp;
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+vcount(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq, *vnext;
+ int count;
+
+loop:
+ if ((vp->v_flag & VALIASED) == 0)
+ return (vp->v_usecount);
+ for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+ vnext = vq->v_specnext;
+ if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vq->v_usecount == 0 && vq != vp) {
+ vgone(vq);
+ goto loop;
+ }
+ count += vq->v_usecount;
+ }
+ return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+ { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+vprint(label, vp)
+ char *label;
+ register struct vnode *vp;
+{
+ char buf[64];
+
+ if (label != NULL)
+ printf("%s: ", label);
+ printf("type %s, usecount %d, writecount %d, refcount %d,",
+ typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+ vp->v_holdcnt);
+ buf[0] = '\0';
+ if (vp->v_flag & VROOT)
+ strcat(buf, "|VROOT");
+ if (vp->v_flag & VTEXT)
+ strcat(buf, "|VTEXT");
+ if (vp->v_flag & VSYSTEM)
+ strcat(buf, "|VSYSTEM");
+ if (vp->v_flag & VXLOCK)
+ strcat(buf, "|VXLOCK");
+ if (vp->v_flag & VXWANT)
+ strcat(buf, "|VXWANT");
+ if (vp->v_flag & VBWAIT)
+ strcat(buf, "|VBWAIT");
+ if (vp->v_flag & VALIASED)
+ strcat(buf, "|VALIASED");
+ if (buf[0] != '\0')
+ printf(" flags (%s)", &buf[1]);
+ if (vp->v_data == NULL) {
+ printf("\n");
+ } else {
+ printf("\n\t");
+ VOP_PRINT(vp);
+ }
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+printlockedvnodes()
+{
+ register struct mount *mp;
+ register struct vnode *vp;
+
+ printf("Locked vnodes\n");
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next)
+ if (VOP_ISLOCKED(vp))
+ vprint((char *)0, vp);
+ }
+}
+#endif
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP 10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+sysctl_vnode(where, sizep)
+ char *where;
+ size_t *sizep;
+{
+ register struct mount *mp, *nmp;
+ struct vnode *vp;
+ register char *bp = where, *savebp;
+ char *ewhere;
+ int error;
+
+#define VPTRSZ sizeof (struct vnode *)
+#define VNODESZ sizeof (struct vnode)
+ if (where == NULL) {
+ *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+ return (0);
+ }
+ ewhere = where + *sizep;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (vfs_busy(mp))
+ continue;
+ savebp = bp;
+again:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * Check that the vp is still associated with
+ * this filesystem. RACE: could have been
+ * recycled onto the same filesystem.
+ */
+ if (vp->v_mount != mp) {
+ if (kinfo_vdebug)
+ printf("kinfo: vp changed\n");
+ bp = savebp;
+ goto again;
+ }
+ if (bp + VPTRSZ + VNODESZ > ewhere) {
+ *sizep = bp - where;
+ return (ENOMEM);
+ }
+ if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+ (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+ return (error);
+ bp += VPTRSZ + VNODESZ;
+ }
+ vfs_unbusy(mp);
+ }
+
+ *sizep = bp - where;
+ return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ if (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vq->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ register int i;
+ struct radix_node *rn;
+ struct sockaddr *saddr, *smask = 0;
+ struct domain *dom;
+ int error;
+
+ if (argp->ex_addrlen == 0) {
+ if (mp->mnt_flag & MNT_DEFEXPORTED)
+ return (EPERM);
+ np = &nep->ne_defexported;
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ mp->mnt_flag |= MNT_DEFEXPORTED;
+ return (0);
+ }
+ i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+ np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+ bzero((caddr_t)np, i);
+ saddr = (struct sockaddr *)(np + 1);
+ if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+ goto out;
+ if (saddr->sa_len > argp->ex_addrlen)
+ saddr->sa_len = argp->ex_addrlen;
+ if (argp->ex_masklen) {
+ smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+ error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+ if (error)
+ goto out;
+ if (smask->sa_len > argp->ex_masklen)
+ smask->sa_len = argp->ex_masklen;
+ }
+ i = saddr->sa_family;
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ /*
+ * Seems silly to initialize every AF when most are not
+ * used, do so on demand here
+ */
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_family == i && dom->dom_rtattach) {
+ dom->dom_rtattach((void **)&nep->ne_rtable[i],
+ dom->dom_rtoffset);
+ break;
+ }
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ error = ENOBUFS;
+ goto out;
+ }
+ }
+ rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+ np->netc_rnodes);
+ if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
+ error = EPERM;
+ goto out;
+ }
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ return (0);
+out:
+ free(np, M_NETADDR);
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+ struct radix_node *rn;
+ caddr_t w;
+{
+ register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+ (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+ free((caddr_t)rn, M_NETADDR);
+ return (0);
+}
+
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+ struct netexport *nep;
+{
+ register int i;
+ register struct radix_node_head *rnh;
+
+ for (i = 0; i <= AF_MAX; i++)
+ if (rnh = nep->ne_rtable[i]) {
+ (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+ (caddr_t)rnh);
+ free((caddr_t)rnh, M_RTABLE);
+ nep->ne_rtable[i] = 0;
+ }
+}
+
+int
+vfs_export(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ int error;
+
+ if (argp->ex_flags & MNT_DELEXPORT) {
+ vfs_free_addrlist(nep);
+ mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+ }
+ if (argp->ex_flags & MNT_EXPORTED) {
+ if (error = vfs_hang_addrlist(mp, nep, argp))
+ return (error);
+ mp->mnt_flag |= MNT_EXPORTED;
+ }
+ return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+ register struct mount *mp;
+ struct netexport *nep;
+ struct mbuf *nam;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ struct sockaddr *saddr;
+
+ np = NULL;
+ if (mp->mnt_flag & MNT_EXPORTED) {
+ /*
+ * Lookup in the export list first.
+ */
+ if (nam != NULL) {
+ saddr = mtod(nam, struct sockaddr *);
+ rnh = nep->ne_rtable[saddr->sa_family];
+ if (rnh != NULL) {
+ np = (struct netcred *)
+ (*rnh->rnh_matchaddr)((caddr_t)saddr,
+ rnh);
+ if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+ np = NULL;
+ }
+ }
+ /*
+ * If no address match, use the default if it exists.
+ */
+ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+ np = &nep->ne_defexported;
+ }
+ return (np);
+}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
new file mode 100644
index 0000000..345c7a7
--- /dev/null
+++ b/sys/kern/vfs_syscalls.c
@@ -0,0 +1,2107 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+struct mount_args {
+ int type;
+ char *path;
+ int flags;
+ caddr_t data;
+};
+/* ARGSUSED */
+mount(p, uap, retval)
+ struct proc *p;
+ register struct mount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ register struct mount *mp;
+ int error, flag;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ /*
+ * Get vnode to be covered
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (uap->flags & MNT_UPDATE) {
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ flag = mp->mnt_flag;
+ /*
+ * We only allow the filesystem to be reloaded if it
+ * is currently mounted read-only.
+ */
+ if ((uap->flags & MNT_RELOAD) &&
+ ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+ vput(vp);
+ return (EOPNOTSUPP); /* Needs translation */
+ }
+ mp->mnt_flag |=
+ uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+ VOP_UNLOCK(vp);
+ goto update;
+ }
+ if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+ if (vp->v_type != VDIR) {
+ vput(vp);
+ return (ENOTDIR);
+ }
+ if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) {
+ vput(vp);
+ return (ENODEV);
+ }
+
+ /*
+ * Allocate and initialize the file system.
+ */
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = vfssw[uap->type];
+ if (error = vfs_lock(mp)) {
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (error);
+ }
+ if (vp->v_mountedhere != NULL) {
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (EBUSY);
+ }
+ vp->v_mountedhere = mp;
+ mp->mnt_vnodecovered = vp;
+update:
+ /*
+ * Set the mount level flags.
+ */
+ if (uap->flags & MNT_RDONLY)
+ mp->mnt_flag |= MNT_RDONLY;
+ else if (mp->mnt_flag & MNT_RDONLY)
+ mp->mnt_flag |= MNT_WANTRDWR;
+ mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ /*
+ * Mount the filesystem.
+ */
+ error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p);
+ if (mp->mnt_flag & MNT_UPDATE) {
+ vrele(vp);
+ if (mp->mnt_flag & MNT_WANTRDWR)
+ mp->mnt_flag &= ~MNT_RDONLY;
+ mp->mnt_flag &=~
+ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+ if (error)
+ mp->mnt_flag = flag;
+ return (error);
+ }
+ /*
+ * Put the new filesystem on the mount list after root.
+ */
+ cache_purge(vp);
+ if (!error) {
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ VOP_UNLOCK(vp);
+ vfs_unlock(mp);
+ error = VFS_START(mp, 0, p);
+ } else {
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+struct unmount_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+unmount(p, uap, retval)
+ struct proc *p;
+ register struct unmount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+
+ /*
+ * Unless this is a user mount, then must
+ * have suser privilege.
+ */
+ if (((vp->v_mount->mnt_flag & MNT_USER) == 0) &&
+ (error = suser(p->p_ucred, &p->p_acflag))) {
+ vput(vp);
+ return (error);
+ }
+
+ /*
+ * Must be the root of the filesystem
+ */
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ vput(vp);
+ return (dounmount(mp, uap->flags, p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+dounmount(mp, flags, p)
+ register struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ struct vnode *coveredvp;
+ int error;
+
+ coveredvp = mp->mnt_vnodecovered;
+ if (vfs_busy(mp))
+ return (EBUSY);
+ mp->mnt_flag |= MNT_UNMOUNT;
+ if (error = vfs_lock(mp))
+ return (error);
+
+ mp->mnt_flag &=~ MNT_ASYNC;
+ vnode_pager_umount(mp); /* release cached vnodes */
+ cache_purgevfs(mp); /* remove cache entries for this file sys */
+ if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
+ (flags & MNT_FORCE))
+ error = VFS_UNMOUNT(mp, flags, p);
+ mp->mnt_flag &= ~MNT_UNMOUNT;
+ vfs_unbusy(mp);
+ if (error) {
+ vfs_unlock(mp);
+ } else {
+ vrele(coveredvp);
+ TAILQ_REMOVE(&mountlist, mp, mnt_list);
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ if (mp->mnt_vnodelist.lh_first != NULL)
+ panic("unmount: dangling vnode");
+ free((caddr_t)mp, M_MOUNT);
+ }
+ return (error);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DIAGNOSTIC
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+struct sync_args {
+ int dummy;
+};
+/* ARGSUSED */
+sync(p, uap, retval)
+ struct proc *p;
+ struct sync_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ int asyncflag;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ /*
+ * The lock check below is to avoid races with mount
+ * and unmount.
+ */
+ if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
+ !vfs_busy(mp)) {
+ asyncflag = mp->mnt_flag & MNT_ASYNC;
+ mp->mnt_flag &= ~MNT_ASYNC;
+ VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+ if (asyncflag)
+ mp->mnt_flag |= MNT_ASYNC;
+ vfs_unbusy(mp);
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (syncprt)
+ vfs_bufstats();
+#endif /* DIAGNOSTIC */
+ return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+struct quotactl_args {
+ char *path;
+ int cmd;
+ int uid;
+ caddr_t arg;
+};
+/* ARGSUSED */
+quotactl(p, uap, retval)
+ struct proc *p;
+ register struct quotactl_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ vrele(nd.ni_vp);
+ return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct statfs_args {
+ char *path;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+statfs(p, uap, retval)
+ struct proc *p;
+ register struct statfs_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ register struct statfs *sp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ sp = &mp->mnt_stat;
+ vrele(nd.ni_vp);
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct fstatfs_args {
+ int fd;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+fstatfs(p, uap, retval)
+ struct proc *p;
+ register struct fstatfs_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mount *mp;
+ register struct statfs *sp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ mp = ((struct vnode *)fp->f_data)->v_mount;
+ sp = &mp->mnt_stat;
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+struct getfsstat_args {
+ struct statfs *buf;
+ long bufsize;
+ int flags;
+};
+getfsstat(p, uap, retval)
+ struct proc *p;
+ register struct getfsstat_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ register struct statfs *sp;
+ caddr_t sfsp;
+ long count, maxcount, error;
+
+ maxcount = uap->bufsize / sizeof(struct statfs);
+ sfsp = (caddr_t)uap->buf;
+ for (count = 0, mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (sfsp && count < maxcount &&
+ ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+ sp = &mp->mnt_stat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the
+ * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+ */
+ if (((uap->flags & MNT_NOWAIT) == 0 ||
+ (uap->flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ continue;
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+ return (error);
+ sfsp += sizeof(*sp);
+ }
+ count++;
+ }
+ if (sfsp && count > maxcount)
+ *retval = maxcount;
+ else
+ *retval = count;
+ return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+struct fchdir_args {
+ int fd;
+};
+/* ARGSUSED */
+fchdir(p, uap, retval)
+ struct proc *p;
+ struct fchdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(fdp, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ VREF(vp);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = vp;
+ return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+struct chdir_args {
+ char *path;
+};
+/* ARGSUSED */
+chdir(p, uap, retval)
+ struct proc *p;
+ struct chdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+struct chroot_args {
+ char *path;
+};
+/* ARGSUSED */
+chroot(p, uap, retval)
+ struct proc *p;
+ struct chroot_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ if (fdp->fd_rdir != NULL)
+ vrele(fdp->fd_rdir);
+ fdp->fd_rdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+ register struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *vp;
+ int error;
+
+ if (error = namei(ndp))
+ return (error);
+ vp = ndp->ni_vp;
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+struct open_args {
+ char *path;
+ int flags;
+ int mode;
+};
+open(p, uap, retval)
+ struct proc *p;
+ register struct open_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register struct vnode *vp;
+ int flags, cmode;
+ struct file *nfp;
+ int type, indx, error;
+ struct flock lf;
+ struct nameidata nd;
+ extern struct fileops vnops;
+
+ if (error = falloc(p, &nfp, &indx))
+ return (error);
+ fp = nfp;
+ flags = FFLAGS(uap->flags);
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ p->p_dupfd = -indx - 1; /* XXX check for fdopen */
+ if (error = vn_open(&nd, flags, cmode)) {
+ ffree(fp);
+ if ((error == ENODEV || error == ENXIO) &&
+ p->p_dupfd >= 0 && /* XXX from fdopen */
+ (error =
+ dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+ *retval = indx;
+ return (0);
+ }
+ if (error == ERESTART)
+ error = EINTR;
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ p->p_dupfd = 0;
+ vp = nd.ni_vp;
+ fp->f_flag = flags & FMASK;
+ fp->f_type = DTYPE_VNODE;
+ fp->f_ops = &vnops;
+ fp->f_data = (caddr_t)vp;
+ if (flags & (O_EXLOCK | O_SHLOCK)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (flags & O_EXLOCK)
+ lf.l_type = F_WRLCK;
+ else
+ lf.l_type = F_RDLCK;
+ type = F_FLOCK;
+ if ((flags & FNONBLOCK) == 0)
+ type |= F_WAIT;
+ VOP_UNLOCK(vp);
+ if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+ (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+ ffree(fp);
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ VOP_LOCK(vp);
+ fp->f_flag |= FHASLOCK;
+ }
+ VOP_UNLOCK(vp);
+ *retval = indx;
+ return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+struct ocreat_args {
+ char *path;
+ int mode;
+};
+ocreat(p, uap, retval)
+ struct proc *p;
+ register struct ocreat_args *uap;
+ int *retval;
+{
+ struct open_args openuap;
+
+ openuap.path = uap->path;
+ openuap.mode = uap->mode;
+ openuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
+ return (open(p, &openuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+struct mknod_args {
+ char *path;
+ int mode;
+ int dev;
+};
+/* ARGSUSED */
+mknod(p, uap, retval)
+ struct proc *p;
+ register struct mknod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL)
+ error = EEXIST;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ vattr.va_rdev = uap->dev;
+
+ switch (uap->mode & S_IFMT) {
+ case S_IFMT: /* used by badsect to flag bad sectors */
+ vattr.va_type = VBAD;
+ break;
+ case S_IFCHR:
+ vattr.va_type = VCHR;
+ break;
+ case S_IFBLK:
+ vattr.va_type = VBLK;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ }
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (vp)
+ vrele(vp);
+ }
+ return (error);
+}
+
+/*
+ * Create named pipe.
+ */
+struct mkfifo_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkfifo(p, uap, retval)
+ struct proc *p;
+ register struct mkfifo_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+#ifndef FIFO
+ return (EOPNOTSUPP);
+#else
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ if (nd.ni_vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VFIFO;
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+struct link_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+link(p, uap, retval)
+ struct proc *p;
+ register struct link_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct nameidata nd;
+ int error;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ nd.ni_dirp = uap->link;
+ if ((error = namei(&nd)) == 0) {
+ if (nd.ni_vp != NULL)
+ error = EEXIST;
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp,
+ p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp,
+ p, p->p_ucred, LEASE_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+ }
+ }
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+struct symlink_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+symlink(p, uap, retval)
+ struct proc *p;
+ register struct symlink_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ char *path;
+ int error;
+ struct nameidata nd;
+
+ MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ if (error = copyinstr(uap->path, path, MAXPATHLEN, NULL))
+ goto out;
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p);
+ if (error = namei(&nd))
+ goto out;
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+ FREE(path, M_NAMEI);
+ return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+struct unlink_args {
+ char *path;
+};
+/* ARGSUSED */
+unlink(p, uap, retval)
+ struct proc *p;
+ struct unlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+ else
+ (void)vnode_pager_uncache(vp);
+ }
+
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+struct lseek_args {
+ int fd;
+ int pad;
+ off_t offset;
+ int whence;
+};
+lseek(p, uap, retval)
+ struct proc *p;
+ register struct lseek_args *uap;
+ int *retval;
+{
+ struct ucred *cred = p->p_ucred;
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vattr vattr;
+ int error;
+
+ if ((u_int)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (ESPIPE);
+ switch (uap->whence) {
+ case L_INCR:
+ fp->f_offset += uap->offset;
+ break;
+ case L_XTND:
+ if (error =
+ VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+ return (error);
+ fp->f_offset = uap->offset + vattr.va_size;
+ break;
+ case L_SET:
+ fp->f_offset = uap->offset;
+ break;
+ default:
+ return (EINVAL);
+ }
+ *(off_t *)retval = fp->f_offset;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+struct olseek_args {
+ int fd;
+ long offset;
+ int whence;
+};
+olseek(p, uap, retval)
+ struct proc *p;
+ register struct olseek_args *uap;
+ int *retval;
+{
+ struct lseek_args nuap;
+ off_t qret;
+ int error;
+
+ nuap.fd = uap->fd;
+ nuap.offset = uap->offset;
+ nuap.whence = uap->whence;
+ error = lseek(p, &nuap, &qret);
+ *(long *)retval = qret;
+ return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+struct access_args {
+ char *path;
+ int flags;
+};
+access(p, uap, retval)
+ struct proc *p;
+ register struct access_args *uap;
+ int *retval;
+{
+ register struct ucred *cred = p->p_ucred;
+ register struct vnode *vp;
+ int error, flags, t_gid, t_uid;
+ struct nameidata nd;
+
+ t_uid = cred->cr_uid;
+ t_gid = cred->cr_groups[0];
+ cred->cr_uid = p->p_cred->p_ruid;
+ cred->cr_groups[0] = p->p_cred->p_rgid;
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ goto out1;
+ vp = nd.ni_vp;
+
+ /* Flags == 0 means only check for existence. */
+ if (uap->flags) {
+ flags = 0;
+ if (uap->flags & R_OK)
+ flags |= VREAD;
+ if (uap->flags & W_OK)
+ flags |= VWRITE;
+ if (uap->flags & X_OK)
+ flags |= VEXEC;
+ if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+ error = VOP_ACCESS(vp, flags, cred, p);
+ }
+ vput(vp);
+out1:
+ cred->cr_uid = t_uid;
+ cred->cr_groups[0] = t_gid;
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+struct ostat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+ostat(p, uap, retval)
+ struct proc *p;
+ register struct ostat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct olstat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+olstat(p, uap, retval)
+ struct proc *p;
+ register struct olstat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+cvtstat(st, ost)
+ struct stat *st;
+ struct ostat *ost;
+{
+
+ ost->st_dev = st->st_dev;
+ ost->st_ino = st->st_ino;
+ ost->st_mode = st->st_mode;
+ ost->st_nlink = st->st_nlink;
+ ost->st_uid = st->st_uid;
+ ost->st_gid = st->st_gid;
+ ost->st_rdev = st->st_rdev;
+ if (st->st_size < (quad_t)1 << 32)
+ ost->st_size = st->st_size;
+ else
+ ost->st_size = -2;
+ ost->st_atime = st->st_atime;
+ ost->st_mtime = st->st_mtime;
+ ost->st_ctime = st->st_ctime;
+ ost->st_blksize = st->st_blksize;
+ ost->st_blocks = st->st_blocks;
+ ost->st_flags = st->st_flags;
+ ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+struct stat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+stat(p, uap, retval)
+ struct proc *p;
+ register struct stat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct lstat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+lstat(p, uap, retval)
+ struct proc *p;
+ register struct lstat_args *uap;
+ int *retval;
+{
+ int error;
+ struct vnode *vp, *dvp;
+ struct stat sb, sb1;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+ uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ /*
+ * For symbolic links, always return the attributes of its
+ * containing directory, except for mode, size, and links.
+ */
+ vp = nd.ni_vp;
+ dvp = nd.ni_dvp;
+ if (vp->v_type != VLNK) {
+ if (dvp == vp)
+ vrele(dvp);
+ else
+ vput(dvp);
+ error = vn_stat(vp, &sb, p);
+ vput(vp);
+ if (error)
+ return (error);
+ } else {
+ error = vn_stat(dvp, &sb, p);
+ vput(dvp);
+ if (error) {
+ vput(vp);
+ return (error);
+ }
+ error = vn_stat(vp, &sb1, p);
+ vput(vp);
+ if (error)
+ return (error);
+ sb.st_mode &= ~S_IFDIR;
+ sb.st_mode |= S_IFLNK;
+ sb.st_nlink = sb1.st_nlink;
+ sb.st_size = sb1.st_size;
+ sb.st_blocks = sb1.st_blocks;
+ }
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+struct pathconf_args {
+ char *path;
+ int name;
+};
+/* ARGSUSED */
+pathconf(p, uap, retval)
+ struct proc *p;
+ register struct pathconf_args *uap;
+ int *retval;
+{
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = VOP_PATHCONF(nd.ni_vp, uap->name, retval);
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+struct readlink_args {
+ char *path;
+ char *buf;
+ int count;
+};
+/* ARGSUSED */
+readlink(p, uap, retval)
+ struct proc *p;
+ register struct readlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct iovec aiov;
+ struct uio auio;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VLNK)
+ error = EINVAL;
+ else {
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ error = VOP_READLINK(vp, &auio, p->p_ucred);
+ }
+ vput(vp);
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+struct chflags_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+chflags(p, uap, retval)
+ struct proc *p;
+ register struct chflags_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+struct fchflags_args {
+ int fd;
+ int flags;
+};
+/* ARGSUSED */
+fchflags(p, uap, retval)
+ struct proc *p;
+ register struct fchflags_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+struct chmod_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+chmod(p, uap, retval)
+ struct proc *p;
+ register struct chmod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+struct fchmod_args {
+ int fd;
+ int mode;
+};
+/* ARGSUSED */
+fchmod(p, uap, retval)
+ struct proc *p;
+ register struct fchmod_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+struct chown_args {
+ char *path;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+chown(p, uap, retval)
+ struct proc *p;
+ register struct chown_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+struct fchown_args {
+ int fd;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+fchown(p, uap, retval)
+ struct proc *p;
+ register struct fchown_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+struct utimes_args {
+ char *path;
+ struct timeval *tptr;
+};
+/* ARGSUSED */
+utimes(p, uap, retval)
+ struct proc *p;
+ register struct utimes_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct timeval tv[2];
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ VATTR_NULL(&vattr);
+ if (uap->tptr == NULL) {
+ microtime(&tv[0]);
+ tv[1] = tv[0];
+ vattr.va_vaflags |= VA_UTIMES_NULL;
+ } else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ vattr.va_atime.ts_sec = tv[0].tv_sec;
+ vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+ vattr.va_mtime.ts_sec = tv[1].tv_sec;
+ vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+struct truncate_args {
+ char *path;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+truncate(p, uap, retval)
+ struct proc *p;
+ register struct truncate_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0 &&
+ (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct ftruncate_args {
+ int fd;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+ftruncate(p, uap, retval)
+ struct proc *p;
+ register struct ftruncate_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FWRITE) == 0)
+ return (EINVAL);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+struct otruncate_args {
+ char *path;
+ long length;
+};
+/* ARGSUSED */
+otruncate(p, uap, retval)
+ struct proc *p;
+ register struct otruncate_args *uap;
+ int *retval;
+{
+ struct truncate_args nuap;
+
+ nuap.path = uap->path;
+ nuap.length = uap->length;
+ return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct oftruncate_args {
+ int fd;
+ long length;
+};
+/* ARGSUSED */
+oftruncate(p, uap, retval)
+ struct proc *p;
+ register struct oftruncate_args *uap;
+ int *retval;
+{
+ struct ftruncate_args nuap;
+
+ nuap.fd = uap->fd;
+ nuap.length = uap->length;
+ return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+struct fsync_args {
+ int fd;
+};
+/* ARGSUSED */
+fsync(p, uap, retval)
+ struct proc *p;
+ struct fsync_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+struct rename_args {
+ char *from;
+ char *to;
+};
+/* ARGSUSED */
+rename(p, uap, retval)
+ struct proc *p;
+ register struct rename_args *uap;
+ int *retval;
+{
+ register struct vnode *tvp, *fvp, *tdvp;
+ struct nameidata fromnd, tond;
+ int error;
+
+ NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+ uap->from, p);
+ if (error = namei(&fromnd))
+ return (error);
+ fvp = fromnd.ni_vp;
+ NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+ UIO_USERSPACE, uap->to, p);
+ if (error = namei(&tond)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE);
+ if (fromnd.ni_dvp != tdvp)
+ LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (tvp)
+ LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ if (fromnd.ni_startdir)
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+struct mkdir_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkdir(p, uap, retval)
+ struct proc *p;
+ register struct mkdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VDIR;
+ vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ if (!error)
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+struct rmdir_args {
+ char *path;
+};
+/* ARGSUSED */
+rmdir(p, uap, retval)
+ struct proc *p;
+ struct rmdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct ogetdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+ogetdirentries(p, uap, retval)
+ struct proc *p;
+ register struct ogetdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio, kuio;
+ struct iovec aiov, kiov;
+ struct dirent *dp, *edp;
+ caddr_t dirbuf;
+ int error, readcnt;
+ long loff;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+# if (BYTE_ORDER != LITTLE_ENDIAN)
+ if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ } else
+# endif
+ {
+ kuio = auio;
+ kuio.uio_iov = &kiov;
+ kuio.uio_segflg = UIO_SYSSPACE;
+ kiov.iov_len = uap->count;
+ MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
+ kiov.iov_base = dirbuf;
+ error = VOP_READDIR(vp, &kuio, fp->f_cred);
+ fp->f_offset = kuio.uio_offset;
+ if (error == 0) {
+ readcnt = uap->count - kuio.uio_resid;
+ edp = (struct dirent *)&dirbuf[readcnt];
+ for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ /*
+ * The expected low byte of
+ * dp->d_namlen is our dp->d_type.
+ * The high MBZ byte of dp->d_namlen
+ * is our dp->d_namlen.
+ */
+ dp->d_type = dp->d_namlen;
+ dp->d_namlen = 0;
+# else
+ /*
+ * The dp->d_type is the high byte
+ * of the expected dp->d_namlen,
+ * so must be zero'ed.
+ */
+ dp->d_type = 0;
+# endif
+ if (dp->d_reclen > 0) {
+ dp = (struct dirent *)
+ ((char *)dp + dp->d_reclen);
+ } else {
+ error = EIO;
+ break;
+ }
+ }
+ if (dp >= edp)
+ error = uiomove(dirbuf, readcnt, &auio);
+ }
+ FREE(dirbuf, M_TEMP);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+#endif
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct getdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+getdirentries(p, uap, retval)
+ struct proc *p;
+ register struct getdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio;
+ struct iovec aiov;
+ long loff;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+unionread:
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+
+#ifdef UNION
+{
+ extern int (**union_vnodeop_p)();
+ extern struct vnode *union_lowervp __P((struct vnode *));
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_op == union_vnodeop_p)) {
+ struct vnode *tvp = vp;
+
+ vp = union_lowervp(vp);
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_OPEN(vp, FREAD);
+ VOP_UNLOCK(vp);
+
+ if (error) {
+ vrele(vp);
+ return (error);
+ }
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ error = vn_close(tvp, FREAD, fp->f_cred, p);
+ if (error)
+ return (error);
+ goto unionread;
+ }
+ }
+}
+#endif
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_flag & VROOT) &&
+ (vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ vp = vp->v_mount->mnt_vnodecovered;
+ VREF(vp);
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ vrele(tvp);
+ goto unionread;
+ }
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+struct umask_args {
+ int newmask;
+};
+mode_t /* XXX */
+umask(p, uap, retval)
+ struct proc *p;
+ struct umask_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp;
+
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = uap->newmask & ALLPERMS;
+ return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+struct revoke_args {
+ char *path;
+};
+/* ARGSUSED */
+revoke(p, uap, retval)
+ struct proc *p;
+ register struct revoke_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VCHR && vp->v_type != VBLK) {
+ error = EINVAL;
+ goto out;
+ }
+ if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+ goto out;
+ if (p->p_ucred->cr_uid != vattr.va_uid &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+ vgoneall(vp);
+out:
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+getvnode(fdp, fd, fpp)
+ struct filedesc *fdp;
+ struct file **fpp;
+ int fd;
+{
+ struct file *fp;
+
+ if ((u_int)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EINVAL);
+ *fpp = fp;
+ return (0);
+}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
new file mode 100644
index 0000000..d104bb9
--- /dev/null
+++ b/sys/kern/vfs_vnops.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+#include <vm/vm.h>
+
+struct fileops vnops =
+ { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
+
+/*
+ * Common code for vnode open operations.
+ * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
+ */
+vn_open(ndp, fmode, cmode)
+ register struct nameidata *ndp;
+ int fmode, cmode;
+{
+ register struct vnode *vp;
+ register struct proc *p = ndp->ni_cnd.cn_proc;
+ register struct ucred *cred = p->p_ucred;
+ struct vattr vat;
+ struct vattr *vap = &vat;
+ int error;
+
+ if (fmode & O_CREAT) {
+ ndp->ni_cnd.cn_nameiop = CREATE;
+ ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if ((fmode & O_EXCL) == 0)
+ ndp->ni_cnd.cn_flags |= FOLLOW;
+ if (error = namei(ndp))
+ return (error);
+ if (ndp->ni_vp == NULL) {
+ VATTR_NULL(vap);
+ vap->va_type = VREG;
+ vap->va_mode = cmode;
+ LEASE_CHECK(ndp->ni_dvp, p, cred, LEASE_WRITE);
+ if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
+ &ndp->ni_cnd, vap))
+ return (error);
+ fmode &= ~O_TRUNC;
+ vp = ndp->ni_vp;
+ } else {
+ VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
+ if (ndp->ni_dvp == ndp->ni_vp)
+ vrele(ndp->ni_dvp);
+ else
+ vput(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+ vp = ndp->ni_vp;
+ if (fmode & O_EXCL) {
+ error = EEXIST;
+ goto bad;
+ }
+ fmode &= ~O_CREAT;
+ }
+ } else {
+ ndp->ni_cnd.cn_nameiop = LOOKUP;
+ ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
+ if (error = namei(ndp))
+ return (error);
+ vp = ndp->ni_vp;
+ }
+ if (vp->v_type == VSOCK) {
+ error = EOPNOTSUPP;
+ goto bad;
+ }
+ if ((fmode & O_CREAT) == 0) {
+ if (fmode & FREAD) {
+ if (error = VOP_ACCESS(vp, VREAD, cred, p))
+ goto bad;
+ }
+ if (fmode & (FWRITE | O_TRUNC)) {
+ if (vp->v_type == VDIR) {
+ error = EISDIR;
+ goto bad;
+ }
+ if ((error = vn_writechk(vp)) ||
+ (error = VOP_ACCESS(vp, VWRITE, cred, p)))
+ goto bad;
+ }
+ }
+ if (fmode & O_TRUNC) {
+ VOP_UNLOCK(vp); /* XXX */
+ LEASE_CHECK(vp, p, cred, LEASE_WRITE);
+ VOP_LOCK(vp); /* XXX */
+ VATTR_NULL(vap);
+ vap->va_size = 0;
+ if (error = VOP_SETATTR(vp, vap, cred, p))
+ goto bad;
+ }
+ if (error = VOP_OPEN(vp, fmode, cred, p))
+ goto bad;
+ if (fmode & FWRITE)
+ vp->v_writecount++;
+ return (0);
+bad:
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Check for write permissions on the specified vnode.
+ * The read-only status of the file system is checked.
+ * Also, prototype text segments cannot be written.
+ */
+vn_writechk(vp)
+ register struct vnode *vp;
+{
+
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket or a block or character
+ * device resident on the file system.
+ */
+ if (vp->v_mount->mnt_flag & MNT_RDONLY) {
+ switch (vp->v_type) {
+ case VREG: case VDIR: case VLNK:
+ return (EROFS);
+ }
+ }
+ /*
+ * If there's shared text associated with
+ * the vnode, try to free it up once. If
+ * we fail, we can't allow writing.
+ */
+ if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+ return (ETXTBSY);
+ return (0);
+}
+
+/*
+ * Vnode close call
+ */
+vn_close(vp, flags, cred, p)
+ register struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+{
+ int error;
+
+ if (flags & FWRITE)
+ vp->v_writecount--;
+ error = VOP_CLOSE(vp, flags, cred, p);
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Package up an I/O request on a vnode into a uio and do it.
+ */
+vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
+ enum uio_rw rw;
+ struct vnode *vp;
+ caddr_t base;
+ int len;
+ off_t offset;
+ enum uio_seg segflg;
+ int ioflg;
+ struct ucred *cred;
+ int *aresid;
+ struct proc *p;
+{
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+
+ if ((ioflg & IO_NODELOCKED) == 0)
+ VOP_LOCK(vp);
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ aiov.iov_base = base;
+ aiov.iov_len = len;
+ auio.uio_resid = len;
+ auio.uio_offset = offset;
+ auio.uio_segflg = segflg;
+ auio.uio_rw = rw;
+ auio.uio_procp = p;
+ if (rw == UIO_READ) {
+ error = VOP_READ(vp, &auio, ioflg, cred);
+ } else {
+ error = VOP_WRITE(vp, &auio, ioflg, cred);
+ }
+ if (aresid)
+ *aresid = auio.uio_resid;
+ else
+ if (auio.uio_resid && error == 0)
+ error = EIO;
+ if ((ioflg & IO_NODELOCKED) == 0)
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * File table vnode read routine.
+ */
+vn_read(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+ register struct vnode *vp = (struct vnode *)fp->f_data;
+ int count, error;
+
+ LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_READ);
+ VOP_LOCK(vp);
+ uio->uio_offset = fp->f_offset;
+ count = uio->uio_resid;
+ error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
+ cred);
+ fp->f_offset += count - uio->uio_resid;
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * File table vnode write routine.
+ */
+vn_write(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+ register struct vnode *vp = (struct vnode *)fp->f_data;
+ int count, error, ioflag = 0;
+
+ if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+ ioflag |= IO_APPEND;
+ if (fp->f_flag & FNONBLOCK)
+ ioflag |= IO_NDELAY;
+ LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ uio->uio_offset = fp->f_offset;
+ count = uio->uio_resid;
+ error = VOP_WRITE(vp, uio, ioflag, cred);
+ if (ioflag & IO_APPEND)
+ fp->f_offset = uio->uio_offset;
+ else
+ fp->f_offset += count - uio->uio_resid;
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * File table vnode stat routine.
+ */
+vn_stat(vp, sb, p)
+ struct vnode *vp;
+ register struct stat *sb;
+ struct proc *p;
+{
+ struct vattr vattr;
+ register struct vattr *vap;
+ int error;
+ u_short mode;
+
+ vap = &vattr;
+ error = VOP_GETATTR(vp, vap, p->p_ucred, p);
+ if (error)
+ return (error);
+ /*
+ * Copy from vattr table
+ */
+ sb->st_dev = vap->va_fsid;
+ sb->st_ino = vap->va_fileid;
+ mode = vap->va_mode;
+ switch (vp->v_type) {
+ case VREG:
+ mode |= S_IFREG;
+ break;
+ case VDIR:
+ mode |= S_IFDIR;
+ break;
+ case VBLK:
+ mode |= S_IFBLK;
+ break;
+ case VCHR:
+ mode |= S_IFCHR;
+ break;
+ case VLNK:
+ mode |= S_IFLNK;
+ break;
+ case VSOCK:
+ mode |= S_IFSOCK;
+ break;
+ case VFIFO:
+ mode |= S_IFIFO;
+ break;
+ default:
+ return (EBADF);
+ };
+ sb->st_mode = mode;
+ sb->st_nlink = vap->va_nlink;
+ sb->st_uid = vap->va_uid;
+ sb->st_gid = vap->va_gid;
+ sb->st_rdev = vap->va_rdev;
+ sb->st_size = vap->va_size;
+ sb->st_atimespec = vap->va_atime;
+ sb->st_mtimespec= vap->va_mtime;
+ sb->st_ctimespec = vap->va_ctime;
+ sb->st_blksize = vap->va_blocksize;
+ sb->st_flags = vap->va_flags;
+ sb->st_gen = vap->va_gen;
+ sb->st_blocks = vap->va_bytes / S_BLKSIZE;
+ return (0);
+}
+
+/*
+ * File table vnode ioctl routine.
+ */
+vn_ioctl(fp, com, data, p)
+ struct file *fp;
+ int com;
+ caddr_t data;
+ struct proc *p;
+{
+ register struct vnode *vp = ((struct vnode *)fp->f_data);
+ struct vattr vattr;
+ int error;
+
+ switch (vp->v_type) {
+
+ case VREG:
+ case VDIR:
+ if (com == FIONREAD) {
+ if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+ return (error);
+ *(int *)data = vattr.va_size - fp->f_offset;
+ return (0);
+ }
+ if (com == FIONBIO || com == FIOASYNC) /* XXX */
+ return (0); /* XXX */
+ /* fall into ... */
+
+ default:
+ return (ENOTTY);
+
+ case VFIFO:
+ case VCHR:
+ case VBLK:
+ error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
+ if (error == 0 && com == TIOCSCTTY) {
+ p->p_session->s_ttyvp = vp;
+ VREF(vp);
+ }
+ return (error);
+ }
+}
+
+/*
+ * File table vnode select routine.
+ */
+vn_select(fp, which, p)
+ struct file *fp;
+ int which;
+ struct proc *p;
+{
+
+ return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
+ fp->f_cred, p));
+}
+
+/*
+ * File table vnode close routine.
+ */
+vn_closefile(fp, p)
+ struct file *fp;
+ struct proc *p;
+{
+
+ return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
+ fp->f_cred, p));
+}
diff --git a/sys/kern/vnode_if.sh b/sys/kern/vnode_if.sh
new file mode 100644
index 0000000..e190fa0
--- /dev/null
+++ b/sys/kern/vnode_if.sh
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+# (where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk. Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+ echo 'usage: vnode_if.sh srcfile'
+ exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ name = $1;
+ uname = toupper(name);
+
+ # Get the function arguments.
+ for (c1 = 0;; ++c1) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ a[c1] = $0;
+ }
+
+ # Print out the vop_F_args structure.
+ printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+ name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%sa_%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("};\n");
+
+ # Print out extern declaration.
+ printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+ # Print out inline struct.
+ printf("static inline int %s(", uname);
+ sep = ", ";
+ for (c2 = 0; c2 < c1; ++c2) {
+ if (c2 == c1 - 1)
+ sep = ")\n";
+ c3 = split(a[c2], t);
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("%s%s", substr(t[c3], beg, end - beg), sep);
+ }
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%s%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("{\n\tstruct %s_args a;\n\n", name);
+ printf("\ta.a_desc = VDESC(%s);\n", name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("a.a_%s = %s\n",
+ substr(t[c3], beg, end - beg), substr(t[c3], beg));
+ }
+ c1 = split(a[0], t);
+ beg = match(t[c1], "[^*]");
+ end = match(t[c1], ";");
+ printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+ substr(t[c1], beg, end - beg), name);
+ }' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+ 0,
+ "default",
+ 0,
+ NULL,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+ sub (/^[ \t]*/, "", s);
+ sub (/[ \t]*$/, "", s);
+ return s;
+ }
+
+ function read_args() {
+ numargs = 0;
+ while (getline ln) {
+ if (ln ~ /}/) {
+ break;
+ };
+
+ # Delete comments, if any.
+ gsub (/\/\*.*\*\//, "", ln);
+
+ # Delete leading/trailing space.
+ ln = kill_surrounding_ws(ln);
+
+ # Pick off direction.
+ if (1 == sub(/^INOUT[ \t]+/, "", ln))
+ dir = "INOUT";
+ else if (1 == sub(/^IN[ \t]+/, "", ln))
+ dir = "IN";
+ else if (1 == sub(/^OUT[ \t]+/, "", ln))
+ dir = "OUT";
+ else
+ bail("No IN/OUT direction for \"" ln "\".");
+
+ # check for "WILLRELE"
+ if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+ rele = "WILLRELE";
+ } else {
+ rele = "WONTRELE";
+ };
+
+ # kill trailing ;
+ if (1 != sub (/;$/, "", ln)) {
+ bail("Missing end-of-line ; in \"" ln "\".");
+ };
+
+ # pick off variable name
+ if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+ bail("Missing var name \"a_foo\" in \"" ln "\".");
+ };
+ arg = substr (ln, i);
+ # Want to <<substr(ln, i) = "";>>, but nawk cannot.
+ # Hack around this.
+ ln = substr(ln, 1, i-1);
+
+ # what is left must be type
+ # (put clean it up some)
+ type = ln;
+ gsub (/[ \t]+/, " ", type); # condense whitespace
+ type = kill_surrounding_ws(type);
+
+ # (boy this was easier in Perl)
+
+ numargs++;
+ dirs[numargs] = dir;
+ reles[numargs] = rele;
+ types[numargs] = type;
+ args[numargs] = arg;
+ };
+ }
+
+ function generate_operation_vp_offsets() {
+ printf ("int %s_vp_offsets[] = {\n", name);
+ # as a side effect, figure out the releflags
+ releflags = "";
+ vpnum = 0;
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode *") {
+ printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+ name, args[i]);
+ if (reles[i] == "WILLRELE") {
+ releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+ };
+ vpnum++;
+ };
+ };
+ sub (/^\|/, "", releflags);
+ print "\tVDESC_NO_OFFSET";
+ print "};";
+ }
+
+ function find_arg_with_type (type) {
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == type) {
+ return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+ };
+ };
+ return "VDESC_NO_OFFSET";
+ }
+
+ function generate_operation_desc() {
+ printf ("struct vnodeop_desc %s_desc = {\n", name);
+ # offset
+ printf ("\t0,\n");
+ # printable name
+ printf ("\t\"%s\",\n", name);
+ # flags
+ vppwillrele = "";
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode **" &&
+ (reles[i] == "WILLRELE")) {
+ vppwillrele = "|VDESC_VPP_WILLRELE";
+ };
+ };
+ if (releflags == "") {
+ printf ("\t0%s,\n", vppwillrele);
+ } else {
+ printf ("\t%s%s,\n", releflags, vppwillrele);
+ };
+ # vp offsets
+ printf ("\t%s_vp_offsets,\n", name);
+ # vpp (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+ # cred (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+ # proc (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+ # componentname
+ printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+ # transport layer information
+ printf ("\tNULL,\n};\n");
+ }
+
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # get the function name
+ name = $1;
+
+ # get the function arguments
+ read_args();
+
+ # Print out the vop_F_vp_offsets structure. This all depends
+ # on naming conventions and nothing else.
+ generate_operation_vp_offsets();
+
+ # Print out the vnodeop_desc structure.
+ generate_operation_desc();
+
+ printf "\n";
+
+ }' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+#
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments. This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+ struct buf *bp;
+{
+ struct vop_strategy_args a;
+
+ a.a_desc = VDESC(vop_strategy);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+ struct buf *bp;
+{
+ struct vop_bwrite_args a;
+
+ a.a_desc = VDESC(vop_bwrite);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+ 0,
+ "vop_strategy",
+ 0,
+ vop_strategy_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+ 0,
+ "vop_bwrite",
+ 0,
+ vop_bwrite_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+ BEGIN {
+ printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+ printf("\t&vop_default_desc, /* MUST BE FIRST */\n");
+ printf("\t&vop_strategy_desc, /* XXX: SPECIAL CASE */\n");
+ printf("\t&vop_bwrite_desc, /* XXX: SPECIAL CASE */\n");
+ }
+ END {
+ printf("\tNULL\n};\n");
+ }
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ printf("\t&%s_desc,\n", $1);
+
+ # Skip the function arguments.
+ for (;;) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ }
+ }' < $SRC >> $CFILE
+
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
new file mode 100644
index 0000000..caee21d
--- /dev/null
+++ b/sys/kern/vnode_if.src
@@ -0,0 +1,296 @@
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.src 8.3 (Berkeley) 2/3/94
+#
+vop_lookup {
+ IN struct vnode *dvp;
+ INOUT struct vnode **vpp;
+ IN struct componentname *cnp;
+};
+
+vop_create {
+ IN WILLRELE struct vnode *dvp;
+ OUT struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+};
+
+vop_mknod {
+ IN WILLRELE struct vnode *dvp;
+ OUT WILLRELE struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+};
+
+vop_open {
+ IN struct vnode *vp;
+ IN int mode;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_close {
+ IN struct vnode *vp;
+ IN int fflag;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_access {
+ IN struct vnode *vp;
+ IN int mode;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_getattr {
+ IN struct vnode *vp;
+ IN struct vattr *vap;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_setattr {
+ IN struct vnode *vp;
+ IN struct vattr *vap;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_read {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN int ioflag;
+ IN struct ucred *cred;
+};
+
+vop_write {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN int ioflag;
+ IN struct ucred *cred;
+};
+
+vop_ioctl {
+ IN struct vnode *vp;
+ IN int command;
+ IN caddr_t data;
+ IN int fflag;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+# Needs work? (fflags)
+vop_select {
+ IN struct vnode *vp;
+ IN int which;
+ IN int fflags;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_mmap {
+ IN struct vnode *vp;
+ IN int fflags;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_fsync {
+ IN struct vnode *vp;
+ IN struct ucred *cred;
+ IN int waitfor;
+ IN struct proc *p;
+};
+
+# Needs word: Is newoff right? What's it mean?
+vop_seek {
+ IN struct vnode *vp;
+ IN off_t oldoff;
+ IN off_t newoff;
+ IN struct ucred *cred;
+};
+
+vop_remove {
+ IN WILLRELE struct vnode *dvp;
+ IN WILLRELE struct vnode *vp;
+ IN struct componentname *cnp;
+};
+
+vop_link {
+ IN WILLRELE struct vnode *vp;
+ IN struct vnode *tdvp;
+ IN struct componentname *cnp;
+};
+
+vop_rename {
+ IN WILLRELE struct vnode *fdvp;
+ IN WILLRELE struct vnode *fvp;
+ IN struct componentname *fcnp;
+ IN WILLRELE struct vnode *tdvp;
+ IN WILLRELE struct vnode *tvp;
+ IN struct componentname *tcnp;
+};
+
+vop_mkdir {
+ IN WILLRELE struct vnode *dvp;
+ OUT struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+};
+
+vop_rmdir {
+ IN WILLRELE struct vnode *dvp;
+ IN WILLRELE struct vnode *vp;
+ IN struct componentname *cnp;
+};
+
+vop_symlink {
+ IN WILLRELE struct vnode *dvp;
+ OUT WILLRELE struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+ IN char *target;
+};
+
+vop_readdir {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN struct ucred *cred;
+};
+
+vop_readlink {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN struct ucred *cred;
+};
+
+vop_abortop {
+ IN struct vnode *dvp;
+ IN struct componentname *cnp;
+};
+
+vop_inactive {
+ IN struct vnode *vp;
+};
+
+vop_reclaim {
+ IN struct vnode *vp;
+};
+
+vop_lock {
+ IN struct vnode *vp;
+};
+
+vop_unlock {
+ IN struct vnode *vp;
+};
+
+vop_bmap {
+ IN struct vnode *vp;
+ IN daddr_t bn;
+ OUT struct vnode **vpp;
+ IN daddr_t *bnp;
+ OUT int *runp;
+};
+
+#vop_strategy {
+# IN struct buf *bp;
+#};
+
+vop_print {
+ IN struct vnode *vp;
+};
+
+vop_islocked {
+ IN struct vnode *vp;
+};
+
+vop_pathconf {
+ IN struct vnode *vp;
+ IN int name;
+ OUT int *retval;
+};
+
+vop_advlock {
+ IN struct vnode *vp;
+ IN caddr_t id;
+ IN int op;
+ IN struct flock *fl;
+ IN int flags;
+};
+
+vop_blkatoff {
+ IN struct vnode *vp;
+ IN off_t offset;
+ OUT char **res;
+ OUT struct buf **bpp;
+};
+
+vop_valloc {
+ IN struct vnode *pvp;
+ IN int mode;
+ IN struct ucred *cred;
+ OUT struct vnode **vpp;
+};
+
+vop_reallocblks {
+ IN struct vnode *vp;
+ IN struct cluster_save *buflist;
+};
+
+vop_vfree {
+ IN struct vnode *pvp;
+ IN ino_t ino;
+ IN int mode;
+};
+
+vop_truncate {
+ IN struct vnode *vp;
+ IN off_t length;
+ IN int flags;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_update {
+ IN struct vnode *vp;
+ IN struct timeval *access;
+ IN struct timeval *modify;
+ IN int waitfor;
+};
+
+# Needs work: no vp?
+#vop_bwrite {
+# IN struct buf *bp;
+#};
OpenPOWER on IntegriCloud