summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_tc.c528
-rw-r--r--sys/kern/kern_timeout.c528
-rw-r--r--sys/kern/subr_clist.c159
-rw-r--r--sys/kern/subr_disklabel.c364
-rw-r--r--sys/kern/subr_param.c145
-rw-r--r--sys/kern/uipc_sockbuf.c755
-rw-r--r--sys/kern/vfs_export.c1322
-rw-r--r--sys/kern/vfs_extattr.c2107
-rw-r--r--sys/kern/vfs_mount.c260
-rw-r--r--sys/kern/vnode_if.pl433
10 files changed, 6601 insertions, 0 deletions
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
new file mode 100644
index 0000000..f42900c
--- /dev/null
+++ b/sys/kern/kern_tc.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
new file mode 100644
index 0000000..f42900c
--- /dev/null
+++ b/sys/kern/kern_timeout.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/subr_clist.c b/sys/kern/subr_clist.c
new file mode 100644
index 0000000..fe8f000
--- /dev/null
+++ b/sys/kern/subr_clist.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)tty_subr.c 8.2 (Berkeley) 9/5/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+char cwaiting;
+struct cblock *cfree, *cfreelist;
+int cfreecount, nclist;
+
+void
+clist_init()
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+getc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+q_to_b(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+ndqb(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+void
+ndflush(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+putc(a1, a2)
+ char a1;
+ struct clist *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+b_to_q(a1, a2, a3)
+ char *a1;
+ int a2;
+ struct clist *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+char *
+nextc(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char *)0);
+}
+
+unputc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+void
+catq(a1, a2)
+ struct clist *a1, *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
diff --git a/sys/kern/subr_disklabel.c b/sys/kern/subr_disklabel.c
new file mode 100644
index 0000000..78dede4
--- /dev/null
+++ b/sys/kern/subr_disklabel.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/syslog.h>
+
+/*
+ * Seek sort for disks. We depend on the driver which calls us using b_resid
+ * as the current cylinder number.
+ *
+ * The argument ap structure holds a b_actf activity chain pointer on which we
+ * keep two queues, sorted in ascending cylinder order. The first queue holds
+ * those requests which are positioned after the current cylinder (in the first
+ * request); the second holds requests which came in after their cylinder number
+ * was passed. Thus we implement a one way scan, retracting after reaching the
+ * end of the drive to the first request on the second queue, at which time it
+ * becomes the first queue.
+ *
+ * A one-way scan is natural because of the way UNIX read-ahead blocks are
+ * allocated.
+ */
+
+/*
+ * For portability with historic industry practice, the
+ * cylinder number has to be maintained in the `b_resid'
+ * field.
+ */
+#define b_cylinder b_resid
+
+void
+disksort(ap, bp)
+ register struct buf *ap, *bp;
+{
+ register struct buf *bq;
+
+ /* If the queue is empty, then it's easy. */
+ if (ap->b_actf == NULL) {
+ bp->b_actf = NULL;
+ ap->b_actf = bp;
+ return;
+ }
+
+ /*
+ * If we lie after the first (currently active) request, then we
+ * must locate the second request list and add ourselves to it.
+ */
+ bq = ap->b_actf;
+ if (bp->b_cylinder < bq->b_cylinder) {
+ while (bq->b_actf) {
+ /*
+ * Check for an ``inversion'' in the normally ascending
+ * cylinder numbers, indicating the start of the second
+ * request list.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder) {
+ /*
+ * Search the second request list for the first
+ * request at a larger cylinder number. We go
+ * before that; if there is no such request, we
+ * go at end.
+ */
+ do {
+ if (bp->b_cylinder <
+ bq->b_actf->b_cylinder)
+ goto insert;
+ if (bp->b_cylinder ==
+ bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno)
+ goto insert;
+ bq = bq->b_actf;
+ } while (bq->b_actf);
+ goto insert; /* after last */
+ }
+ bq = bq->b_actf;
+ }
+ /*
+ * No inversions... we will go after the last, and
+ * be the first request in the second request list.
+ */
+ goto insert;
+ }
+ /*
+ * Request is at/after the current request...
+ * sort in the first request list.
+ */
+ while (bq->b_actf) {
+ /*
+ * We want to go after the current request if there is an
+ * inversion after it (i.e. it is the end of the first
+ * request list), or if the next request is a larger cylinder
+ * than our request.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder ||
+ bp->b_cylinder < bq->b_actf->b_cylinder ||
+ (bp->b_cylinder == bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno))
+ goto insert;
+ bq = bq->b_actf;
+ }
+ /*
+ * Neither a second list nor a larger request... we go at the end of
+ * the first list, which is the same as the end of the whole schebang.
+ */
+insert: bp->b_actf = bq->b_actf;
+ bq->b_actf = bp;
+}
+
+/*
+ * Attempt to read a disk label from a device using the indicated stategy
+ * routine. The label must be partly set up before this: secpercyl and
+ * anything required in the strategy routine (e.g., sector size) must be
+ * filled in before calling us. Returns NULL on success and an error
+ * string on failure.
+ */
+char *
+readdisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ register struct buf *bp;
+ struct disklabel *dlp;
+ char *msg = NULL;
+
+ if (lp->d_secperunit == 0)
+ lp->d_secperunit = 0x1fffffff;
+ lp->d_npartitions = 1;
+ if (lp->d_partitions[0].p_size == 0)
+ lp->d_partitions[0].p_size = 0x1fffffff;
+ lp->d_partitions[0].p_offset = 0;
+
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = dev;
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_BUSY | B_READ;
+ bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
+ (*strat)(bp);
+ if (biowait(bp))
+ msg = "I/O error";
+ else for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)((char *)bp->b_data +
+ DEV_BSIZE - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
+ if (msg == NULL)
+ msg = "no disk label";
+ } else if (dlp->d_npartitions > MAXPARTITIONS ||
+ dkcksum(dlp) != 0)
+ msg = "disk label corrupted";
+ else {
+ *lp = *dlp;
+ msg = NULL;
+ break;
+ }
+ }
+ bp->b_flags = B_INVAL | B_AGE;
+ brelse(bp);
+ return (msg);
+}
+
+/*
+ * Check new disk label for sensibility before setting it.
+ */
+int
+setdisklabel(olp, nlp, openmask)
+ register struct disklabel *olp, *nlp;
+ u_long openmask;
+{
+ register i;
+ register struct partition *opp, *npp;
+
+ if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
+ dkcksum(nlp) != 0)
+ return (EINVAL);
+ while ((i = ffs((long)openmask)) != 0) {
+ i--;
+ openmask &= ~(1 << i);
+ if (nlp->d_npartitions <= i)
+ return (EBUSY);
+ opp = &olp->d_partitions[i];
+ npp = &nlp->d_partitions[i];
+ if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
+ return (EBUSY);
+ /*
+ * Copy internally-set partition information
+ * if new label doesn't include it. XXX
+ */
+ if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
+ npp->p_fstype = opp->p_fstype;
+ npp->p_fsize = opp->p_fsize;
+ npp->p_frag = opp->p_frag;
+ npp->p_cpg = opp->p_cpg;
+ }
+ }
+ nlp->d_checksum = 0;
+ nlp->d_checksum = dkcksum(nlp);
+ *olp = *nlp;
+ return (0);
+}
+
+/* encoding of disk minor numbers, should be elsewhere... */
+#define dkunit(dev) (minor(dev) >> 3)
+#define dkpart(dev) (minor(dev) & 07)
+#define dkminor(unit, part) (((unit) << 3) | (part))
+
+/*
+ * Write disk label back to device after modification.
+ */
+int
+writedisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ struct buf *bp;
+ struct disklabel *dlp;
+ int labelpart;
+ int error = 0;
+
+ labelpart = dkpart(dev);
+ if (lp->d_partitions[labelpart].p_offset != 0) {
+ if (lp->d_partitions[0].p_offset != 0)
+ return (EXDEV); /* not quite right */
+ labelpart = 0;
+ }
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_READ;
+ (*strat)(bp);
+ if (error = biowait(bp))
+ goto done;
+ for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)
+ ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
+ dkcksum(dlp) == 0) {
+ *dlp = *lp;
+ bp->b_flags = B_WRITE;
+ (*strat)(bp);
+ error = biowait(bp);
+ goto done;
+ }
+ }
+ error = ESRCH;
+done:
+ brelse(bp);
+ return (error);
+}
+
+/*
+ * Compute checksum for disk label.
+ */
+dkcksum(lp)
+ register struct disklabel *lp;
+{
+ register u_short *start, *end;
+ register u_short sum = 0;
+
+ start = (u_short *)lp;
+ end = (u_short *)&lp->d_partitions[lp->d_npartitions];
+ while (start < end)
+ sum ^= *start++;
+ return (sum);
+}
+
+/*
+ * Disk error is the preface to plaintive error messages
+ * about failing disk transfers. It prints messages of the form
+
+hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
+
+ * if the offset of the error in the transfer and a disk label
+ * are both available. blkdone should be -1 if the position of the error
+ * is unknown; the disklabel pointer may be null from drivers that have not
+ * been converted to use them. The message is printed with printf
+ * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
+ * The message should be completed (with at least a newline) with printf
+ * or addlog, respectively. There is no trailing space.
+ */
+void
+diskerr(bp, dname, what, pri, blkdone, lp)
+ register struct buf *bp;
+ char *dname, *what;
+ int pri, blkdone;
+ register struct disklabel *lp;
+{
+ int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
+ register void (*pr) __P((const char *, ...));
+ char partname = 'a' + part;
+ int sn;
+
+ if (pri != LOG_PRINTF) {
+ log(pri, "");
+ pr = addlog;
+ } else
+ pr = printf;
+ (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
+ bp->b_flags & B_READ ? "read" : "writ");
+ sn = bp->b_blkno;
+ if (bp->b_bcount <= DEV_BSIZE)
+ (*pr)("%d", sn);
+ else {
+ if (blkdone >= 0) {
+ sn += blkdone;
+ (*pr)("%d of ", sn);
+ }
+ (*pr)("%d-%d", bp->b_blkno,
+ bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
+ }
+ if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
+#ifdef tahoe
+ sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
+#endif
+ sn += lp->d_partitions[part].p_offset;
+ (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
+ sn / lp->d_secpercyl);
+ sn %= lp->d_secpercyl;
+ (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
+ }
+}
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
new file mode 100644
index 0000000..9f4e2ca
--- /dev/null
+++ b/sys/kern/subr_param.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)param.c 8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/callout.h>
+#include <sys/clist.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+
+#ifdef SYSVSHM
+#include <machine/vmparam.h>
+#include <sys/shm.h>
+#endif
+
+/*
+ * System parameter formulae.
+ *
+ * This file is copied into each directory where we compile
+ * the kernel; it should be modified there to suit local taste
+ * if necessary.
+ *
+ * Compiled with -DHZ=xx -DTIMEZONE=x -DDST=x -DMAXUSERS=xx
+ */
+
+#ifndef HZ
+#define HZ 100
+#endif
+int hz = HZ;
+int tick = 1000000 / HZ;
+int tickadj = 30000 / (60 * HZ); /* can adjust 30ms in 60s */
+struct timezone tz = { TIMEZONE, DST };
+#define NPROC (20 + 16 * MAXUSERS)
+int maxproc = NPROC;
+#define NTEXT (80 + NPROC / 8) /* actually the object cache */
+#define NVNODE (NPROC + NTEXT + 100)
+int desiredvnodes = NVNODE;
+int maxfiles = 3 * (NPROC + MAXUSERS) + 80;
+int ncallout = 16 + NPROC;
+int nclist = 60 + 12 * MAXUSERS;
+int nmbclusters = NMBCLUSTERS;
+int fscale = FSCALE; /* kernel uses `FSCALE', user uses `fscale' */
+
+/*
+ * Values in support of System V compatible shared memory. XXX
+ */
+#ifdef SYSVSHM
+#define SHMMAX (SHMMAXPGS*NBPG)
+#define SHMMIN 1
+#define SHMMNI 32 /* <= SHMMMNI in shm.h */
+#define SHMSEG 8
+#define SHMALL (SHMMAXPGS/CLSIZE)
+
+struct shminfo shminfo = {
+ SHMMAX,
+ SHMMIN,
+ SHMMNI,
+ SHMSEG,
+ SHMALL
+};
+#endif
+
+/*
+ * These are initialized at bootstrap time
+ * to values dependent on memory size
+ */
+int nbuf, nswbuf;
+
+/*
+ * These have to be allocated somewhere; allocating
+ * them here forces loader errors if this file is omitted
+ * (if they've been externed everywhere else; hah!).
+ */
+struct callout *callout;
+struct cblock *cfree;
+struct buf *buf, *swbuf;
+char *buffers;
+
+/*
+ * Proc/pgrp hashing.
+ * Here so that hash table sizes can depend on MAXUSERS/NPROC.
+ * Hash size must be a power of two.
+ * NOW omission of this file will cause loader errors!
+ */
+
+#if NPROC > 1024
+#define PIDHSZ 512
+#else
+#if NPROC > 512
+#define PIDHSZ 256
+#else
+#if NPROC > 256
+#define PIDHSZ 128
+#else
+#define PIDHSZ 64
+#endif
+#endif
+#endif
+
+struct proc *pidhash[PIDHSZ];
+struct pgrp *pgrphash[PIDHSZ];
+int pidhashmask = PIDHSZ - 1;
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
new file mode 100644
index 0000000..d4af592
--- /dev/null
+++ b/sys/kern/uipc_sockbuf.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char netio[] = "netio";
+char netcon[] = "netcon";
+char netcls[] = "netcls";
+
+u_long sb_max = SB_MAX; /* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups. Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established. When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed. The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn(). When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ *
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+soisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= SS_ISCONNECTING;
+}
+
+soisconnected(so)
+ register struct socket *so;
+{
+ register struct socket *head = so->so_head;
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+ so->so_state |= SS_ISCONNECTED;
+ if (head && soqremque(so, 0)) {
+ soqinsque(head, so, 1);
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ } else {
+ wakeup((caddr_t)&so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+ }
+}
+
+soisdisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~SS_ISCONNECTING;
+ so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+soisdisconnected(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called. If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+ register struct socket *head;
+ int connstatus;
+{
+ register struct socket *so;
+ int soqueue = connstatus ? 1 : 0;
+
+ if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+ return ((struct socket *)0);
+ MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+ if (so == NULL)
+ return ((struct socket *)0);
+ bzero((caddr_t)so, sizeof(*so));
+ so->so_type = head->so_type;
+ so->so_options = head->so_options &~ SO_ACCEPTCONN;
+ so->so_linger = head->so_linger;
+ so->so_state = head->so_state | SS_NOFDREF;
+ so->so_proto = head->so_proto;
+ so->so_timeo = head->so_timeo;
+ so->so_pgid = head->so_pgid;
+ (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+ soqinsque(head, so, soqueue);
+ if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+ (void) soqremque(so, soqueue);
+ (void) free((caddr_t)so, M_SOCKET);
+ return ((struct socket *)0);
+ }
+ if (connstatus) {
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ so->so_state |= connstatus;
+ }
+ return (so);
+}
+
+soqinsque(head, so, q)
+ register struct socket *head, *so;
+ int q;
+{
+
+ register struct socket **prev;
+ so->so_head = head;
+ if (q == 0) {
+ head->so_q0len++;
+ so->so_q0 = 0;
+ for (prev = &(head->so_q0); *prev; )
+ prev = &((*prev)->so_q0);
+ } else {
+ head->so_qlen++;
+ so->so_q = 0;
+ for (prev = &(head->so_q); *prev; )
+ prev = &((*prev)->so_q);
+ }
+ *prev = so;
+}
+
+soqremque(so, q)
+ register struct socket *so;
+ int q;
+{
+ register struct socket *head, *prev, *next;
+
+ head = so->so_head;
+ prev = head;
+ for (;;) {
+ next = q ? prev->so_q : prev->so_q0;
+ if (next == so)
+ break;
+ if (next == 0)
+ return (0);
+ prev = next;
+ }
+ if (q == 0) {
+ prev->so_q0 = next->so_q0;
+ head->so_q0len--;
+ } else {
+ prev->so_q = next->so_q;
+ head->so_qlen--;
+ }
+ next->so_q0 = next->so_q = 0;
+ next->so_head = 0;
+ return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+socantsendmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTSENDMORE;
+ sowwakeup(so);
+}
+
+socantrcvmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTRCVMORE;
+ sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+sbwait(sb)
+ struct sockbuf *sb;
+{
+
+ sb->sb_flags |= SB_WAIT;
+ return (tsleep((caddr_t)&sb->sb_cc,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+ sb->sb_timeo));
+}
+
+/*
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+sb_lock(sb)
+ register struct sockbuf *sb;
+{
+ int error;
+
+ while (sb->sb_flags & SB_LOCK) {
+ sb->sb_flags |= SB_WANT;
+ if (error = tsleep((caddr_t)&sb->sb_flags,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+ netio, 0))
+ return (error);
+ }
+ sb->sb_flags |= SB_LOCK;
+ return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+sowakeup(so, sb)
+ register struct socket *so;
+ register struct sockbuf *sb;
+{
+ struct proc *p;
+
+ selwakeup(&sb->sb_sel);
+ sb->sb_flags &= ~SB_SEL;
+ if (sb->sb_flags & SB_WAIT) {
+ sb->sb_flags &= ~SB_WAIT;
+ wakeup((caddr_t)&sb->sb_cc);
+ }
+ if (so->so_state & SS_ASYNC) {
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGIO);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGIO);
+ }
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data. Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field. Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ * name, then a record containing that name must be present before
+ * any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ * just additional data associated with the message), and there are
+ * ``rights'' to be received, then a record containing this data
+ * should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ * a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve(). This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits). The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+soreserve(so, sndcc, rcvcc)
+ register struct socket *so;
+ u_long sndcc, rcvcc;
+{
+
+ if (sbreserve(&so->so_snd, sndcc) == 0)
+ goto bad;
+ if (sbreserve(&so->so_rcv, rcvcc) == 0)
+ goto bad2;
+ if (so->so_rcv.sb_lowat == 0)
+ so->so_rcv.sb_lowat = 1;
+ if (so->so_snd.sb_lowat == 0)
+ so->so_snd.sb_lowat = MCLBYTES;
+ if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+ return (0);
+bad2:
+ sbrelease(&so->so_snd);
+bad:
+ return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+sbreserve(sb, cc)
+ struct sockbuf *sb;
+ u_long cc;
+{
+
+ if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+ return (0);
+ sb->sb_hiwat = cc;
+ sb->sb_mbmax = min(cc * 2, sb_max);
+ if (sb->sb_lowat > sb->sb_hiwat)
+ sb->sb_lowat = sb->sb_hiwat;
+ return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+sbrelease(sb)
+ struct sockbuf *sb;
+{
+
+ sbflush(sb);
+ sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added. sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used. To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used. In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement. Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb. The additional space associated
+ * the mbuf chain is recorded in sb. Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+sbappend(sb, m)
+ struct sockbuf *sb;
+ struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ if (m == 0)
+ return;
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ do {
+ if (n->m_flags & M_EOR) {
+ sbappendrecord(sb, m); /* XXXXXX!!!! */
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ }
+ sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+sbcheck(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m;
+ register int len = 0, mbcnt = 0;
+
+ for (m = sb->sb_mb; m; m = m->m_next) {
+ len += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT)
+ mbcnt += m->m_ext.ext_size;
+ if (m->m_nextpkt)
+ panic("sbcheck nextpkt");
+ }
+ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+ printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+ mbcnt, sb->sb_mbcnt);
+ panic("sbcheck");
+ }
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+sbappendrecord(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+
+ if (m0 == 0)
+ return;
+ if (m = sb->sb_mb)
+ while (m->m_nextpkt)
+ m = m->m_nextpkt;
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ if (m)
+ m->m_nextpkt = m0;
+ else
+ sb->sb_mb = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+sbinsertoob(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+ register struct mbuf **mp;
+
+ if (m0 == 0)
+ return;
+ for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+ again:
+ switch (m->m_type) {
+
+ case MT_OOBDATA:
+ continue; /* WANT next train */
+
+ case MT_CONTROL:
+ if (m = m->m_next)
+ goto again; /* inspect THIS train further */
+ }
+ break;
+ }
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ m0->m_nextpkt = *mp;
+ *mp = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket. If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+sbappendaddr(sb, asa, m0, control)
+ register struct sockbuf *sb;
+ struct sockaddr *asa;
+ struct mbuf *m0, *control;
+{
+ register struct mbuf *m, *n;
+ int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+ if (m0)
+ space += m0->m_pkthdr.len;
+ for (n = control; n; n = n->m_next) {
+ space += n->m_len;
+ if (n->m_next == 0) /* keep pointer to last control buf */
+ break;
+ }
+ if (space > sbspace(sb))
+ return (0);
+ if (asa->sa_len > MLEN)
+ return (0);
+ MGET(m, M_DONTWAIT, MT_SONAME);
+ if (m == 0)
+ return (0);
+ m->m_len = asa->sa_len;
+ bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+ if (n)
+ n->m_next = m0; /* concatenate data to control */
+ else
+ control = m0;
+ m->m_next = control;
+ for (n = m; n; n = n->m_next)
+ sballoc(sb, n);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = m;
+ } else
+ sb->sb_mb = m;
+ return (1);
+}
+
+sbappendcontrol(sb, m0, control)
+ struct sockbuf *sb;
+ struct mbuf *control, *m0;
+{
+ register struct mbuf *m, *n;
+ int space = 0;
+
+ if (control == 0)
+ panic("sbappendcontrol");
+ for (m = control; ; m = m->m_next) {
+ space += m->m_len;
+ if (m->m_next == 0)
+ break;
+ }
+ n = m; /* save pointer to last control buffer */
+ for (m = m0; m; m = m->m_next)
+ space += m->m_len;
+ if (space > sbspace(sb))
+ return (0);
+ n->m_next = m0; /* concatenate data to control */
+ for (m = control; m; m = m->m_next)
+ sballoc(sb, m);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = control;
+ } else
+ sb->sb_mb = control;
+ return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n. If n
+ * is null, the buffer is presumed empty.
+ */
+sbcompress(sb, m, n)
+ register struct sockbuf *sb;
+ register struct mbuf *m, *n;
+{
+ register int eor = 0;
+ register struct mbuf *o;
+
+ while (m) {
+ eor |= m->m_flags & M_EOR;
+ if (m->m_len == 0 &&
+ (eor == 0 ||
+ (((o = m->m_next) || (o = n)) &&
+ o->m_type == m->m_type))) {
+ m = m_free(m);
+ continue;
+ }
+ if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+ (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+ n->m_type == m->m_type) {
+ bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+ (unsigned)m->m_len);
+ n->m_len += m->m_len;
+ sb->sb_cc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mb = m;
+ sballoc(sb, m);
+ n = m;
+ m->m_flags &= ~M_EOR;
+ m = m->m_next;
+ n->m_next = 0;
+ }
+ if (eor) {
+ if (n)
+ n->m_flags |= eor;
+ else
+ printf("semi-panic: sbcompress\n");
+ }
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+sbflush(sb)
+ register struct sockbuf *sb;
+{
+
+ if (sb->sb_flags & SB_LOCK)
+ panic("sbflush");
+ while (sb->sb_mbcnt)
+ sbdrop(sb, (int)sb->sb_cc);
+ if (sb->sb_cc || sb->sb_mb)
+ panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+sbdrop(sb, len)
+ register struct sockbuf *sb;
+ register int len;
+{
+ register struct mbuf *m, *mn;
+ struct mbuf *next;
+
+ next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ while (len > 0) {
+ if (m == 0) {
+ if (next == 0)
+ panic("sbdrop");
+ m = next;
+ next = m->m_nextpkt;
+ continue;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ m->m_data += len;
+ sb->sb_cc -= len;
+ break;
+ }
+ len -= m->m_len;
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ while (m && m->m_len == 0) {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ if (m) {
+ sb->sb_mb = m;
+ m->m_nextpkt = next;
+ } else
+ sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+sbdroprecord(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m, *mn;
+
+ m = sb->sb_mb;
+ if (m) {
+ sb->sb_mb = m->m_nextpkt;
+ do {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ } while (m = mn);
+ }
+}
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
new file mode 100644
index 0000000..9891fe6
--- /dev/null
+++ b/sys/kern/vfs_export.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+ VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+ VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int vttoif_tab[9] = {
+ 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+ S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define bufremvn(bp) { \
+ LIST_REMOVE(bp, b_vnbufs); \
+ (bp)->b_vnbufs.le_next = NOLIST; \
+}
+
+TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
+struct mntlist mountlist; /* mounted filesystem list */
+
+/*
+ * Initialize the vnode management data structures.
+ */
+vntblinit()
+{
+
+ TAILQ_INIT(&vnode_free_list);
+ TAILQ_INIT(&mountlist);
+}
+
+/*
+ * Lock a filesystem.
+ * Used to prevent access to it while mounting and unmounting.
+ */
+vfs_lock(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t)mp, PVFS);
+ }
+ mp->mnt_flag |= MNT_MLOCK;
+ return (0);
+}
+
+/*
+ * Unlock a locked filesystem.
+ * Panic if filesystem is not locked.
+ */
+void
+vfs_unlock(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MLOCK) == 0)
+ panic("vfs_unlock: not locked");
+ mp->mnt_flag &= ~MNT_MLOCK;
+ if (mp->mnt_flag & MNT_MWAIT) {
+ mp->mnt_flag &= ~MNT_MWAIT;
+ wakeup((caddr_t)mp);
+ }
+}
+
+/*
+ * Mark a mount point as busy.
+ * Used to synchronize access and to delay unmounting.
+ */
+vfs_busy(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MPBUSY) {
+ mp->mnt_flag |= MNT_MPWANT;
+ sleep((caddr_t)&mp->mnt_flag, PVFS);
+ }
+ if (mp->mnt_flag & MNT_UNMOUNT)
+ return (1);
+ mp->mnt_flag |= MNT_MPBUSY;
+ return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ * Panic if filesystem is not busy.
+ */
+vfs_unbusy(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vfs_unbusy: not busy");
+ mp->mnt_flag &= ~MNT_MPBUSY;
+ if (mp->mnt_flag & MNT_MPWANT) {
+ mp->mnt_flag &= ~MNT_MPWANT;
+ wakeup((caddr_t)&mp->mnt_flag);
+ }
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+getvfs(fsid)
+ fsid_t *fsid;
+{
+ register struct mount *mp;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+ mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+ return (mp);
+ }
+ return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+getnewfsid(mp, mtype)
+ struct mount *mp;
+ int mtype;
+{
+static u_short xxxfs_mntid;
+
+ fsid_t tfsid;
+
+ mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+ mp->mnt_stat.f_fsid.val[1] = mtype;
+ if (xxxfs_mntid == 0)
+ ++xxxfs_mntid;
+ tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+ tfsid.val[1] = mtype;
+ if (mountlist.tqh_first != NULL) {
+ while (getvfs(&tfsid)) {
+ tfsid.val[0]++;
+ xxxfs_mntid++;
+ }
+ }
+ mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void vattr_null(vap)
+ register struct vattr *vap;
+{
+
+ vap->va_type = VNON;
+ vap->va_size = vap->va_bytes = VNOVAL;
+ vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+ vap->va_fsid = vap->va_fileid =
+ vap->va_blocksize = vap->va_rdev =
+ vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+ vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+ vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+ vap->va_flags = vap->va_gen = VNOVAL;
+ vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+extern void vclean();
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+getnewvnode(tag, mp, vops, vpp)
+ enum vtagtype tag;
+ struct mount *mp;
+ int (**vops)();
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ int s;
+
+ if ((vnode_free_list.tqh_first == NULL &&
+ numvnodes < 2 * desiredvnodes) ||
+ numvnodes < desiredvnodes) {
+ vp = (struct vnode *)malloc((u_long)sizeof *vp,
+ M_VNODE, M_WAITOK);
+ bzero((char *)vp, sizeof *vp);
+ numvnodes++;
+ } else {
+ if ((vp = vnode_free_list.tqh_first) == NULL) {
+ tablefull("vnode");
+ *vpp = 0;
+ return (ENFILE);
+ }
+ if (vp->v_usecount)
+ panic("free vnode isn't");
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ /* see comment on why 0xdeadb is set at end of vgone (below) */
+ vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+ vp->v_lease = NULL;
+ if (vp->v_type != VBAD)
+ vgone(vp);
+#ifdef DIAGNOSTIC
+ if (vp->v_data)
+ panic("cleaned vnode isn't");
+ s = splbio();
+ if (vp->v_numoutput)
+ panic("Clean vnode has pending I/O's");
+ splx(s);
+#endif
+ vp->v_flag = 0;
+ vp->v_lastr = 0;
+ vp->v_ralen = 0;
+ vp->v_maxra = 0;
+ vp->v_lastw = 0;
+ vp->v_lasta = 0;
+ vp->v_cstart = 0;
+ vp->v_clen = 0;
+ vp->v_socket = 0;
+ }
+ vp->v_type = VNON;
+ cache_purge(vp);
+ vp->v_tag = tag;
+ vp->v_op = vops;
+ insmntque(vp, mp);
+ *vpp = vp;
+ vp->v_usecount = 1;
+ vp->v_data = 0;
+ return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+insmntque(vp, mp)
+ register struct vnode *vp;
+ register struct mount *mp;
+{
+
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL)
+ LIST_REMOVE(vp, v_mntvnodes);
+ /*
+ * Insert into list of vnodes for the new mount point, if available.
+ */
+ if ((vp->v_mount = mp) == NULL)
+ return;
+ LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+vwakeup(bp)
+ register struct buf *bp;
+{
+ register struct vnode *vp;
+
+ bp->b_flags &= ~B_WRITEINPROG;
+ if (vp = bp->b_vp) {
+ vp->v_numoutput--;
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ vp->v_flag &= ~VBWAIT;
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+ register struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int slpflag, slptimeo;
+{
+ register struct buf *bp;
+ struct buf *nbp, *blist;
+ int s, error;
+
+ if (flags & V_SAVE) {
+ if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+ return (error);
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ panic("vinvalbuf: dirty bufs");
+ }
+ for (;;) {
+ if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
+ (flags & V_SAVEMETA))
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist)
+ break;
+
+ for (bp = blist; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+ continue;
+ s = splbio();
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp,
+ slpflag | (PRIBIO + 1), "vinvalbuf",
+ slptimeo);
+ splx(s);
+ if (error)
+ return (error);
+ break;
+ }
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ /*
+ * XXX Since there are no node locks for NFS, I believe
+ * there is a slight chance that a delayed write will
+ * occur while sleeping just above, so check for it.
+ */
+ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+ (void) VOP_BWRITE(bp);
+ break;
+ }
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ }
+ }
+ if (!(flags & V_SAVEMETA) &&
+ (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+ panic("vinvalbuf: flush failed");
+ return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+bgetvp(vp, bp)
+ register struct vnode *vp;
+ register struct buf *bp;
+{
+
+ if (bp->b_vp)
+ panic("bgetvp: not free");
+ VHOLD(vp);
+ bp->b_vp = vp;
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ bp->b_dev = vp->v_rdev;
+ else
+ bp->b_dev = NODEV;
+ /*
+ * Insert onto list for new vnode.
+ */
+ bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+brelvp(bp)
+ register struct buf *bp;
+{
+ struct vnode *vp;
+
+ if (bp->b_vp == (struct vnode *) 0)
+ panic("brelvp: NULL");
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ vp = bp->b_vp;
+ bp->b_vp = (struct vnode *) 0;
+ HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+reassignbuf(bp, newvp)
+ register struct buf *bp;
+ register struct vnode *newvp;
+{
+ register struct buflists *listheadp;
+
+ if (newvp == NULL) {
+ printf("reassignbuf: NULL");
+ return;
+ }
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ /*
+ * If dirty, put on list of dirty buffers;
+ * otherwise insert onto list of clean buffers.
+ */
+ if (bp->b_flags & B_DELWRI)
+ listheadp = &newvp->v_dirtyblkhd;
+ else
+ listheadp = &newvp->v_cleanblkhd;
+ bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+bdevvp(dev, vpp)
+ dev_t dev;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct vnode *nvp;
+ int error;
+
+ if (dev == NODEV)
+ return (0);
+ error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+ if (error) {
+ *vpp = 0;
+ return (error);
+ }
+ vp = nvp;
+ vp->v_type = VBLK;
+ if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+ vput(vp);
+ vp = nvp;
+ }
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+ register struct vnode *nvp;
+ dev_t nvp_rdev;
+ struct mount *mp;
+{
+ register struct vnode *vp;
+ struct vnode **vpp;
+
+ if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+ return (NULLVP);
+
+ vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+ for (vp = *vpp; vp; vp = vp->v_specnext) {
+ if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ goto loop;
+ }
+ if (vget(vp, 1))
+ goto loop;
+ break;
+ }
+ if (vp == NULL || vp->v_tag != VT_NON) {
+ MALLOC(nvp->v_specinfo, struct specinfo *,
+ sizeof(struct specinfo), M_VNODE, M_WAITOK);
+ nvp->v_rdev = nvp_rdev;
+ nvp->v_hashchain = vpp;
+ nvp->v_specnext = *vpp;
+ nvp->v_specflags = 0;
+ *vpp = nvp;
+ if (vp != NULL) {
+ nvp->v_flag |= VALIASED;
+ vp->v_flag |= VALIASED;
+ vput(vp);
+ }
+ return (NULLVP);
+ }
+ VOP_UNLOCK(vp);
+ vclean(vp, 0);
+ vp->v_op = nvp->v_op;
+ vp->v_tag = nvp->v_tag;
+ nvp->v_type = VNON;
+ insmntque(vp, mp);
+ return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+vget(vp, lockflag)
+ register struct vnode *vp;
+ int lockflag;
+{
+
+ /*
+ * If the vnode is in the process of being cleaned out for
+ * another use, we wait for the cleaning to finish and then
+ * return failure. Cleaning is determined either by checking
+ * that the VXLOCK flag is set, or that the use count is
+ * zero with the back pointer set to show that it has been
+ * removed from the free list by getnewvnode. The VXLOCK
+ * flag may not have been set yet because vclean is blocked in
+ * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+ */
+ if ((vp->v_flag & VXLOCK) ||
+ (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return (1);
+ }
+ if (vp->v_usecount == 0)
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ vp->v_usecount++;
+ if (lockflag)
+ VOP_LOCK(vp);
+ return (0);
+}
+
+/*
+ * Vnode reference, just increment the count
+ */
+void vref(vp)
+ struct vnode *vp;
+{
+
+ if (vp->v_usecount <= 0)
+ panic("vref used where vget required");
+ vp->v_usecount++;
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void vput(vp)
+ register struct vnode *vp;
+{
+
+ VOP_UNLOCK(vp);
+ vrele(vp);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void vrele(vp)
+ register struct vnode *vp;
+{
+
+#ifdef DIAGNOSTIC
+ if (vp == NULL)
+ panic("vrele: null vp");
+#endif
+ vp->v_usecount--;
+ if (vp->v_usecount > 0)
+ return;
+#ifdef DIAGNOSTIC
+ if (vp->v_usecount != 0 || vp->v_writecount != 0) {
+ vprint("vrele: bad ref count", vp);
+ panic("vrele: ref cnt");
+ }
+#endif
+ /*
+ * insert at tail of LRU list
+ */
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ VOP_INACTIVE(vp);
+}
+
+/*
+ * Page or buffer structure gets a reference.
+ */
+void vhold(vp)
+ register struct vnode *vp;
+{
+
+ vp->v_holdcnt++;
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void holdrele(vp)
+ register struct vnode *vp;
+{
+
+ if (vp->v_holdcnt <= 0)
+ panic("holdrele: holdcnt");
+ vp->v_holdcnt--;
+}
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0; /* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+vflush(mp, skipvp, flags)
+ struct mount *mp;
+ struct vnode *skipvp;
+ int flags;
+{
+ register struct vnode *vp, *nvp;
+ int busy = 0;
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vflush: not busy");
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp)
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ /*
+ * Skip over a selected vnode.
+ */
+ if (vp == skipvp)
+ continue;
+ /*
+ * Skip over a vnodes marked VSYSTEM.
+ */
+ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+ continue;
+ /*
+ * If WRITECLOSE is set, only flush out regular file
+ * vnodes open for writing.
+ */
+ if ((flags & WRITECLOSE) &&
+ (vp->v_writecount == 0 || vp->v_type != VREG))
+ continue;
+ /*
+ * With v_usecount == 0, all we need to do is clear
+ * out the vnode data structures and we are done.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ continue;
+ }
+ /*
+ * If FORCECLOSE is set, forcibly close the vnode.
+ * For block or character devices, revert to an
+ * anonymous device. For all other files, just kill them.
+ */
+ if (flags & FORCECLOSE) {
+ if (vp->v_type != VBLK && vp->v_type != VCHR) {
+ vgone(vp);
+ } else {
+ vclean(vp, 0);
+ vp->v_op = spec_vnodeop_p;
+ insmntque(vp, (struct mount *)0);
+ }
+ continue;
+ }
+#ifdef DIAGNOSTIC
+ if (busyprt)
+ vprint("vflush: busy vnode", vp);
+#endif
+ busy++;
+ }
+ if (busy)
+ return (EBUSY);
+ return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ */
+void
+vclean(vp, flags)
+ register struct vnode *vp;
+ int flags;
+{
+ int active;
+
+ /*
+ * Check to see if the vnode is in use.
+ * If so we have to reference it before we clean it out
+ * so that its count cannot fall to zero and generate a
+ * race against ourselves to recycle it.
+ */
+ if (active = vp->v_usecount)
+ VREF(vp);
+ /*
+ * Even if the count is zero, the VOP_INACTIVE routine may still
+ * have the object locked while it cleans it out. The VOP_LOCK
+ * ensures that the VOP_INACTIVE routine is done with its work.
+ * For active vnodes, it ensures that no other activity can
+ * occur while the underlying object is being cleaned out.
+ */
+ VOP_LOCK(vp);
+ /*
+ * Prevent the vnode from being recycled or
+ * brought into use while we clean it out.
+ */
+ if (vp->v_flag & VXLOCK)
+ panic("vclean: deadlock");
+ vp->v_flag |= VXLOCK;
+ /*
+ * Clean out any buffers associated with the vnode.
+ */
+ if (flags & DOCLOSE)
+ vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ /*
+ * Any other processes trying to obtain this lock must first
+ * wait for VXLOCK to clear, then call the new lock operation.
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * If purging an active vnode, it must be closed and
+ * deactivated before being reclaimed.
+ */
+ if (active) {
+ if (flags & DOCLOSE)
+ VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
+ VOP_INACTIVE(vp);
+ }
+ /*
+ * Reclaim the vnode.
+ */
+ if (VOP_RECLAIM(vp))
+ panic("vclean: cannot reclaim");
+ if (active)
+ vrele(vp);
+
+ /*
+ * Done with purge, notify sleepers of the grim news.
+ */
+ vp->v_op = dead_vnodeop_p;
+ vp->v_tag = VT_NON;
+ vp->v_flag &= ~VXLOCK;
+ if (vp->v_flag & VXWANT) {
+ vp->v_flag &= ~VXWANT;
+ wakeup((caddr_t)vp);
+ }
+}
+
+/*
+ * Eliminate all activity associated with the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+void vgoneall(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_flag & VALIASED) {
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Ensure that vp will not be vgone'd while we
+ * are eliminating its aliases.
+ */
+ vp->v_flag |= VXLOCK;
+ while (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type || vp == vq)
+ continue;
+ vgone(vq);
+ break;
+ }
+ }
+ /*
+ * Remove the lock so that vgone below will
+ * really eliminate the vnode after which time
+ * vgone will awaken any sleepers.
+ */
+ vp->v_flag &= ~VXLOCK;
+ }
+ vgone(vp);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void vgone(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+ struct vnode *vx;
+
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Clean out the filesystem specific data.
+ */
+ vclean(vp, DOCLOSE);
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL) {
+ LIST_REMOVE(vp, v_mntvnodes);
+ vp->v_mount = NULL;
+ }
+ /*
+ * If special device, remove it from special device alias list.
+ */
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ if (*vp->v_hashchain == vp) {
+ *vp->v_hashchain = vp->v_specnext;
+ } else {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_specnext != vp)
+ continue;
+ vq->v_specnext = vp->v_specnext;
+ break;
+ }
+ if (vq == NULL)
+ panic("missing bdev");
+ }
+ if (vp->v_flag & VALIASED) {
+ vx = NULL;
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vx)
+ break;
+ vx = vq;
+ }
+ if (vx == NULL)
+ panic("missing alias");
+ if (vq == NULL)
+ vx->v_flag &= ~VALIASED;
+ vp->v_flag &= ~VALIASED;
+ }
+ FREE(vp->v_specinfo, M_VNODE);
+ vp->v_specinfo = NULL;
+ }
+ /*
+ * If it is on the freelist and not already at the head,
+ * move it to the head of the list. The test of the back
+ * pointer and the reference count of zero is because
+ * it will be removed from the free list by getnewvnode,
+ * but will not have its reference count incremented until
+ * after calling vgone. If the reference count were
+ * incremented first, vgone would (incorrectly) try to
+ * close the previous instance of the underlying object.
+ * So, the back pointer is explicitly set to `0xdeadb' in
+ * getnewvnode after removing it from the freelist to ensure
+ * that we do not try to move it here.
+ */
+ if (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
+ vnode_free_list.tqh_first != vp) {
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ }
+ vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+vfinddev(dev, type, vpp)
+ dev_t dev;
+ enum vtype type;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+
+ for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+ if (dev != vp->v_rdev || type != vp->v_type)
+ continue;
+ *vpp = vp;
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+vcount(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq, *vnext;
+ int count;
+
+loop:
+ if ((vp->v_flag & VALIASED) == 0)
+ return (vp->v_usecount);
+ for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+ vnext = vq->v_specnext;
+ if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vq->v_usecount == 0 && vq != vp) {
+ vgone(vq);
+ goto loop;
+ }
+ count += vq->v_usecount;
+ }
+ return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+ { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+vprint(label, vp)
+ char *label;
+ register struct vnode *vp;
+{
+ char buf[64];
+
+ if (label != NULL)
+ printf("%s: ", label);
+ printf("type %s, usecount %d, writecount %d, refcount %d,",
+ typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+ vp->v_holdcnt);
+ buf[0] = '\0';
+ if (vp->v_flag & VROOT)
+ strcat(buf, "|VROOT");
+ if (vp->v_flag & VTEXT)
+ strcat(buf, "|VTEXT");
+ if (vp->v_flag & VSYSTEM)
+ strcat(buf, "|VSYSTEM");
+ if (vp->v_flag & VXLOCK)
+ strcat(buf, "|VXLOCK");
+ if (vp->v_flag & VXWANT)
+ strcat(buf, "|VXWANT");
+ if (vp->v_flag & VBWAIT)
+ strcat(buf, "|VBWAIT");
+ if (vp->v_flag & VALIASED)
+ strcat(buf, "|VALIASED");
+ if (buf[0] != '\0')
+ printf(" flags (%s)", &buf[1]);
+ if (vp->v_data == NULL) {
+ printf("\n");
+ } else {
+ printf("\n\t");
+ VOP_PRINT(vp);
+ }
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+printlockedvnodes()
+{
+ register struct mount *mp;
+ register struct vnode *vp;
+
+ printf("Locked vnodes\n");
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next)
+ if (VOP_ISLOCKED(vp))
+ vprint((char *)0, vp);
+ }
+}
+#endif
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP 10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+sysctl_vnode(where, sizep)
+ char *where;
+ size_t *sizep;
+{
+ register struct mount *mp, *nmp;
+ struct vnode *vp;
+ register char *bp = where, *savebp;
+ char *ewhere;
+ int error;
+
+#define VPTRSZ sizeof (struct vnode *)
+#define VNODESZ sizeof (struct vnode)
+ if (where == NULL) {
+ *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+ return (0);
+ }
+ ewhere = where + *sizep;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (vfs_busy(mp))
+ continue;
+ savebp = bp;
+again:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * Check that the vp is still associated with
+ * this filesystem. RACE: could have been
+ * recycled onto the same filesystem.
+ */
+ if (vp->v_mount != mp) {
+ if (kinfo_vdebug)
+ printf("kinfo: vp changed\n");
+ bp = savebp;
+ goto again;
+ }
+ if (bp + VPTRSZ + VNODESZ > ewhere) {
+ *sizep = bp - where;
+ return (ENOMEM);
+ }
+ if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+ (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+ return (error);
+ bp += VPTRSZ + VNODESZ;
+ }
+ vfs_unbusy(mp);
+ }
+
+ *sizep = bp - where;
+ return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ if (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vq->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ register int i;
+ struct radix_node *rn;
+ struct sockaddr *saddr, *smask = 0;
+ struct domain *dom;
+ int error;
+
+ if (argp->ex_addrlen == 0) {
+ if (mp->mnt_flag & MNT_DEFEXPORTED)
+ return (EPERM);
+ np = &nep->ne_defexported;
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ mp->mnt_flag |= MNT_DEFEXPORTED;
+ return (0);
+ }
+ i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+ np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+ bzero((caddr_t)np, i);
+ saddr = (struct sockaddr *)(np + 1);
+ if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+ goto out;
+ if (saddr->sa_len > argp->ex_addrlen)
+ saddr->sa_len = argp->ex_addrlen;
+ if (argp->ex_masklen) {
+ smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+ error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+ if (error)
+ goto out;
+ if (smask->sa_len > argp->ex_masklen)
+ smask->sa_len = argp->ex_masklen;
+ }
+ i = saddr->sa_family;
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ /*
+ * Seems silly to initialize every AF when most are not
+ * used, do so on demand here
+ */
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_family == i && dom->dom_rtattach) {
+ dom->dom_rtattach((void **)&nep->ne_rtable[i],
+ dom->dom_rtoffset);
+ break;
+ }
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ error = ENOBUFS;
+ goto out;
+ }
+ }
+ rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+ np->netc_rnodes);
+ if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
+ error = EPERM;
+ goto out;
+ }
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ return (0);
+out:
+ free(np, M_NETADDR);
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+ struct radix_node *rn;
+ caddr_t w;
+{
+ register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+ (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+ free((caddr_t)rn, M_NETADDR);
+ return (0);
+}
+
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+ struct netexport *nep;
+{
+ register int i;
+ register struct radix_node_head *rnh;
+
+ for (i = 0; i <= AF_MAX; i++)
+ if (rnh = nep->ne_rtable[i]) {
+ (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+ (caddr_t)rnh);
+ free((caddr_t)rnh, M_RTABLE);
+ nep->ne_rtable[i] = 0;
+ }
+}
+
+int
+vfs_export(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ int error;
+
+ if (argp->ex_flags & MNT_DELEXPORT) {
+ vfs_free_addrlist(nep);
+ mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+ }
+ if (argp->ex_flags & MNT_EXPORTED) {
+ if (error = vfs_hang_addrlist(mp, nep, argp))
+ return (error);
+ mp->mnt_flag |= MNT_EXPORTED;
+ }
+ return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+ register struct mount *mp;
+ struct netexport *nep;
+ struct mbuf *nam;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ struct sockaddr *saddr;
+
+ np = NULL;
+ if (mp->mnt_flag & MNT_EXPORTED) {
+ /*
+ * Lookup in the export list first.
+ */
+ if (nam != NULL) {
+ saddr = mtod(nam, struct sockaddr *);
+ rnh = nep->ne_rtable[saddr->sa_family];
+ if (rnh != NULL) {
+ np = (struct netcred *)
+ (*rnh->rnh_matchaddr)((caddr_t)saddr,
+ rnh);
+ if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+ np = NULL;
+ }
+ }
+ /*
+ * If no address match, use the default if it exists.
+ */
+ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+ np = &nep->ne_defexported;
+ }
+ return (np);
+}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
new file mode 100644
index 0000000..345c7a7
--- /dev/null
+++ b/sys/kern/vfs_extattr.c
@@ -0,0 +1,2107 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+struct mount_args {
+ int type;
+ char *path;
+ int flags;
+ caddr_t data;
+};
+/* ARGSUSED */
+mount(p, uap, retval)
+ struct proc *p;
+ register struct mount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ register struct mount *mp;
+ int error, flag;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ /*
+ * Get vnode to be covered
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (uap->flags & MNT_UPDATE) {
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ flag = mp->mnt_flag;
+ /*
+ * We only allow the filesystem to be reloaded if it
+ * is currently mounted read-only.
+ */
+ if ((uap->flags & MNT_RELOAD) &&
+ ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+ vput(vp);
+ return (EOPNOTSUPP); /* Needs translation */
+ }
+ mp->mnt_flag |=
+ uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+ VOP_UNLOCK(vp);
+ goto update;
+ }
+ if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+ if (vp->v_type != VDIR) {
+ vput(vp);
+ return (ENOTDIR);
+ }
+ if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) {
+ vput(vp);
+ return (ENODEV);
+ }
+
+ /*
+ * Allocate and initialize the file system.
+ */
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = vfssw[uap->type];
+ if (error = vfs_lock(mp)) {
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (error);
+ }
+ if (vp->v_mountedhere != NULL) {
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (EBUSY);
+ }
+ vp->v_mountedhere = mp;
+ mp->mnt_vnodecovered = vp;
+update:
+ /*
+ * Set the mount level flags.
+ */
+ if (uap->flags & MNT_RDONLY)
+ mp->mnt_flag |= MNT_RDONLY;
+ else if (mp->mnt_flag & MNT_RDONLY)
+ mp->mnt_flag |= MNT_WANTRDWR;
+ mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ /*
+ * Mount the filesystem.
+ */
+ error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p);
+ if (mp->mnt_flag & MNT_UPDATE) {
+ vrele(vp);
+ if (mp->mnt_flag & MNT_WANTRDWR)
+ mp->mnt_flag &= ~MNT_RDONLY;
+ mp->mnt_flag &=~
+ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+ if (error)
+ mp->mnt_flag = flag;
+ return (error);
+ }
+ /*
+ * Put the new filesystem on the mount list after root.
+ */
+ cache_purge(vp);
+ if (!error) {
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ VOP_UNLOCK(vp);
+ vfs_unlock(mp);
+ error = VFS_START(mp, 0, p);
+ } else {
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+struct unmount_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+unmount(p, uap, retval)
+ struct proc *p;
+ register struct unmount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+
+ /*
+ * Unless this is a user mount, then must
+ * have suser privilege.
+ */
+ if (((vp->v_mount->mnt_flag & MNT_USER) == 0) &&
+ (error = suser(p->p_ucred, &p->p_acflag))) {
+ vput(vp);
+ return (error);
+ }
+
+ /*
+ * Must be the root of the filesystem
+ */
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ vput(vp);
+ return (dounmount(mp, uap->flags, p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+dounmount(mp, flags, p)
+ register struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ struct vnode *coveredvp;
+ int error;
+
+ coveredvp = mp->mnt_vnodecovered;
+ if (vfs_busy(mp))
+ return (EBUSY);
+ mp->mnt_flag |= MNT_UNMOUNT;
+ if (error = vfs_lock(mp))
+ return (error);
+
+ mp->mnt_flag &=~ MNT_ASYNC;
+ vnode_pager_umount(mp); /* release cached vnodes */
+ cache_purgevfs(mp); /* remove cache entries for this file sys */
+ if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
+ (flags & MNT_FORCE))
+ error = VFS_UNMOUNT(mp, flags, p);
+ mp->mnt_flag &= ~MNT_UNMOUNT;
+ vfs_unbusy(mp);
+ if (error) {
+ vfs_unlock(mp);
+ } else {
+ vrele(coveredvp);
+ TAILQ_REMOVE(&mountlist, mp, mnt_list);
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ if (mp->mnt_vnodelist.lh_first != NULL)
+ panic("unmount: dangling vnode");
+ free((caddr_t)mp, M_MOUNT);
+ }
+ return (error);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DIAGNOSTIC
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+struct sync_args {
+ int dummy;
+};
+/* ARGSUSED */
+sync(p, uap, retval)
+ struct proc *p;
+ struct sync_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ int asyncflag;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ /*
+ * The lock check below is to avoid races with mount
+ * and unmount.
+ */
+ if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
+ !vfs_busy(mp)) {
+ asyncflag = mp->mnt_flag & MNT_ASYNC;
+ mp->mnt_flag &= ~MNT_ASYNC;
+ VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+ if (asyncflag)
+ mp->mnt_flag |= MNT_ASYNC;
+ vfs_unbusy(mp);
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (syncprt)
+ vfs_bufstats();
+#endif /* DIAGNOSTIC */
+ return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+struct quotactl_args {
+ char *path;
+ int cmd;
+ int uid;
+ caddr_t arg;
+};
+/* ARGSUSED */
+quotactl(p, uap, retval)
+ struct proc *p;
+ register struct quotactl_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ vrele(nd.ni_vp);
+ return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct statfs_args {
+ char *path;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+statfs(p, uap, retval)
+ struct proc *p;
+ register struct statfs_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ register struct statfs *sp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ sp = &mp->mnt_stat;
+ vrele(nd.ni_vp);
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct fstatfs_args {
+ int fd;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+fstatfs(p, uap, retval)
+ struct proc *p;
+ register struct fstatfs_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mount *mp;
+ register struct statfs *sp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ mp = ((struct vnode *)fp->f_data)->v_mount;
+ sp = &mp->mnt_stat;
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+struct getfsstat_args {
+ struct statfs *buf;
+ long bufsize;
+ int flags;
+};
+getfsstat(p, uap, retval)
+ struct proc *p;
+ register struct getfsstat_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ register struct statfs *sp;
+ caddr_t sfsp;
+ long count, maxcount, error;
+
+ maxcount = uap->bufsize / sizeof(struct statfs);
+ sfsp = (caddr_t)uap->buf;
+ for (count = 0, mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (sfsp && count < maxcount &&
+ ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+ sp = &mp->mnt_stat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the
+ * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+ */
+ if (((uap->flags & MNT_NOWAIT) == 0 ||
+ (uap->flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ continue;
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+ return (error);
+ sfsp += sizeof(*sp);
+ }
+ count++;
+ }
+ if (sfsp && count > maxcount)
+ *retval = maxcount;
+ else
+ *retval = count;
+ return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+struct fchdir_args {
+ int fd;
+};
+/* ARGSUSED */
+fchdir(p, uap, retval)
+ struct proc *p;
+ struct fchdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(fdp, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ VREF(vp);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = vp;
+ return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+struct chdir_args {
+ char *path;
+};
+/* ARGSUSED */
+chdir(p, uap, retval)
+ struct proc *p;
+ struct chdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+struct chroot_args {
+ char *path;
+};
+/* ARGSUSED */
+chroot(p, uap, retval)
+ struct proc *p;
+ struct chroot_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ if (fdp->fd_rdir != NULL)
+ vrele(fdp->fd_rdir);
+ fdp->fd_rdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+ register struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *vp;
+ int error;
+
+ if (error = namei(ndp))
+ return (error);
+ vp = ndp->ni_vp;
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+struct open_args {
+ char *path;
+ int flags;
+ int mode;
+};
+open(p, uap, retval)
+ struct proc *p;
+ register struct open_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register struct vnode *vp;
+ int flags, cmode;
+ struct file *nfp;
+ int type, indx, error;
+ struct flock lf;
+ struct nameidata nd;
+ extern struct fileops vnops;
+
+ if (error = falloc(p, &nfp, &indx))
+ return (error);
+ fp = nfp;
+ flags = FFLAGS(uap->flags);
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ p->p_dupfd = -indx - 1; /* XXX check for fdopen */
+ if (error = vn_open(&nd, flags, cmode)) {
+ ffree(fp);
+ if ((error == ENODEV || error == ENXIO) &&
+ p->p_dupfd >= 0 && /* XXX from fdopen */
+ (error =
+ dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+ *retval = indx;
+ return (0);
+ }
+ if (error == ERESTART)
+ error = EINTR;
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ p->p_dupfd = 0;
+ vp = nd.ni_vp;
+ fp->f_flag = flags & FMASK;
+ fp->f_type = DTYPE_VNODE;
+ fp->f_ops = &vnops;
+ fp->f_data = (caddr_t)vp;
+ if (flags & (O_EXLOCK | O_SHLOCK)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (flags & O_EXLOCK)
+ lf.l_type = F_WRLCK;
+ else
+ lf.l_type = F_RDLCK;
+ type = F_FLOCK;
+ if ((flags & FNONBLOCK) == 0)
+ type |= F_WAIT;
+ VOP_UNLOCK(vp);
+ if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+ (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+ ffree(fp);
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ VOP_LOCK(vp);
+ fp->f_flag |= FHASLOCK;
+ }
+ VOP_UNLOCK(vp);
+ *retval = indx;
+ return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+struct ocreat_args {
+ char *path;
+ int mode;
+};
+ocreat(p, uap, retval)
+ struct proc *p;
+ register struct ocreat_args *uap;
+ int *retval;
+{
+ struct open_args openuap;
+
+ openuap.path = uap->path;
+ openuap.mode = uap->mode;
+ openuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
+ return (open(p, &openuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+struct mknod_args {
+ char *path;
+ int mode;
+ int dev;
+};
+/* ARGSUSED */
+mknod(p, uap, retval)
+ struct proc *p;
+ register struct mknod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL)
+ error = EEXIST;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ vattr.va_rdev = uap->dev;
+
+ switch (uap->mode & S_IFMT) {
+ case S_IFMT: /* used by badsect to flag bad sectors */
+ vattr.va_type = VBAD;
+ break;
+ case S_IFCHR:
+ vattr.va_type = VCHR;
+ break;
+ case S_IFBLK:
+ vattr.va_type = VBLK;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ }
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (vp)
+ vrele(vp);
+ }
+ return (error);
+}
+
+/*
+ * Create named pipe.
+ */
+struct mkfifo_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkfifo(p, uap, retval)
+ struct proc *p;
+ register struct mkfifo_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+#ifndef FIFO
+ return (EOPNOTSUPP);
+#else
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ if (nd.ni_vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VFIFO;
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+struct link_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+link(p, uap, retval)
+ struct proc *p;
+ register struct link_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct nameidata nd;
+ int error;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ nd.ni_dirp = uap->link;
+ if ((error = namei(&nd)) == 0) {
+ if (nd.ni_vp != NULL)
+ error = EEXIST;
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp,
+ p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp,
+ p, p->p_ucred, LEASE_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+ }
+ }
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+struct symlink_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+symlink(p, uap, retval)
+ struct proc *p;
+ register struct symlink_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ char *path;
+ int error;
+ struct nameidata nd;
+
+ MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ if (error = copyinstr(uap->path, path, MAXPATHLEN, NULL))
+ goto out;
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p);
+ if (error = namei(&nd))
+ goto out;
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+ FREE(path, M_NAMEI);
+ return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+struct unlink_args {
+ char *path;
+};
+/* ARGSUSED */
+unlink(p, uap, retval)
+ struct proc *p;
+ struct unlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+ else
+ (void)vnode_pager_uncache(vp);
+ }
+
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+struct lseek_args {
+ int fd;
+ int pad;
+ off_t offset;
+ int whence;
+};
+lseek(p, uap, retval)
+ struct proc *p;
+ register struct lseek_args *uap;
+ int *retval;
+{
+ struct ucred *cred = p->p_ucred;
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vattr vattr;
+ int error;
+
+ if ((u_int)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (ESPIPE);
+ switch (uap->whence) {
+ case L_INCR:
+ fp->f_offset += uap->offset;
+ break;
+ case L_XTND:
+ if (error =
+ VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+ return (error);
+ fp->f_offset = uap->offset + vattr.va_size;
+ break;
+ case L_SET:
+ fp->f_offset = uap->offset;
+ break;
+ default:
+ return (EINVAL);
+ }
+ *(off_t *)retval = fp->f_offset;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+struct olseek_args {
+ int fd;
+ long offset;
+ int whence;
+};
+olseek(p, uap, retval)
+ struct proc *p;
+ register struct olseek_args *uap;
+ int *retval;
+{
+ struct lseek_args nuap;
+ off_t qret;
+ int error;
+
+ nuap.fd = uap->fd;
+ nuap.offset = uap->offset;
+ nuap.whence = uap->whence;
+ error = lseek(p, &nuap, &qret);
+ *(long *)retval = qret;
+ return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+struct access_args {
+ char *path;
+ int flags;
+};
+access(p, uap, retval)
+ struct proc *p;
+ register struct access_args *uap;
+ int *retval;
+{
+ register struct ucred *cred = p->p_ucred;
+ register struct vnode *vp;
+ int error, flags, t_gid, t_uid;
+ struct nameidata nd;
+
+ t_uid = cred->cr_uid;
+ t_gid = cred->cr_groups[0];
+ cred->cr_uid = p->p_cred->p_ruid;
+ cred->cr_groups[0] = p->p_cred->p_rgid;
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ goto out1;
+ vp = nd.ni_vp;
+
+ /* Flags == 0 means only check for existence. */
+ if (uap->flags) {
+ flags = 0;
+ if (uap->flags & R_OK)
+ flags |= VREAD;
+ if (uap->flags & W_OK)
+ flags |= VWRITE;
+ if (uap->flags & X_OK)
+ flags |= VEXEC;
+ if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+ error = VOP_ACCESS(vp, flags, cred, p);
+ }
+ vput(vp);
+out1:
+ cred->cr_uid = t_uid;
+ cred->cr_groups[0] = t_gid;
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+struct ostat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+ostat(p, uap, retval)
+ struct proc *p;
+ register struct ostat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct olstat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+olstat(p, uap, retval)
+ struct proc *p;
+ register struct olstat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+cvtstat(st, ost)
+ struct stat *st;
+ struct ostat *ost;
+{
+
+ ost->st_dev = st->st_dev;
+ ost->st_ino = st->st_ino;
+ ost->st_mode = st->st_mode;
+ ost->st_nlink = st->st_nlink;
+ ost->st_uid = st->st_uid;
+ ost->st_gid = st->st_gid;
+ ost->st_rdev = st->st_rdev;
+ if (st->st_size < (quad_t)1 << 32)
+ ost->st_size = st->st_size;
+ else
+ ost->st_size = -2;
+ ost->st_atime = st->st_atime;
+ ost->st_mtime = st->st_mtime;
+ ost->st_ctime = st->st_ctime;
+ ost->st_blksize = st->st_blksize;
+ ost->st_blocks = st->st_blocks;
+ ost->st_flags = st->st_flags;
+ ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+struct stat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+stat(p, uap, retval)
+ struct proc *p;
+ register struct stat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct lstat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+lstat(p, uap, retval)
+ struct proc *p;
+ register struct lstat_args *uap;
+ int *retval;
+{
+ int error;
+ struct vnode *vp, *dvp;
+ struct stat sb, sb1;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+ uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ /*
+ * For symbolic links, always return the attributes of its
+ * containing directory, except for mode, size, and links.
+ */
+ vp = nd.ni_vp;
+ dvp = nd.ni_dvp;
+ if (vp->v_type != VLNK) {
+ if (dvp == vp)
+ vrele(dvp);
+ else
+ vput(dvp);
+ error = vn_stat(vp, &sb, p);
+ vput(vp);
+ if (error)
+ return (error);
+ } else {
+ error = vn_stat(dvp, &sb, p);
+ vput(dvp);
+ if (error) {
+ vput(vp);
+ return (error);
+ }
+ error = vn_stat(vp, &sb1, p);
+ vput(vp);
+ if (error)
+ return (error);
+ sb.st_mode &= ~S_IFDIR;
+ sb.st_mode |= S_IFLNK;
+ sb.st_nlink = sb1.st_nlink;
+ sb.st_size = sb1.st_size;
+ sb.st_blocks = sb1.st_blocks;
+ }
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+struct pathconf_args {
+ char *path;
+ int name;
+};
+/* ARGSUSED */
+pathconf(p, uap, retval)
+ struct proc *p;
+ register struct pathconf_args *uap;
+ int *retval;
+{
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = VOP_PATHCONF(nd.ni_vp, uap->name, retval);
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+struct readlink_args {
+ char *path;
+ char *buf;
+ int count;
+};
+/* ARGSUSED */
+readlink(p, uap, retval)
+ struct proc *p;
+ register struct readlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct iovec aiov;
+ struct uio auio;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VLNK)
+ error = EINVAL;
+ else {
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ error = VOP_READLINK(vp, &auio, p->p_ucred);
+ }
+ vput(vp);
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+struct chflags_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+chflags(p, uap, retval)
+ struct proc *p;
+ register struct chflags_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+struct fchflags_args {
+ int fd;
+ int flags;
+};
+/* ARGSUSED */
+fchflags(p, uap, retval)
+ struct proc *p;
+ register struct fchflags_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+struct chmod_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+chmod(p, uap, retval)
+ struct proc *p;
+ register struct chmod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+struct fchmod_args {
+ int fd;
+ int mode;
+};
+/* ARGSUSED */
+fchmod(p, uap, retval)
+ struct proc *p;
+ register struct fchmod_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+struct chown_args {
+ char *path;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+chown(p, uap, retval)
+ struct proc *p;
+ register struct chown_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+struct fchown_args {
+ int fd;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+fchown(p, uap, retval)
+ struct proc *p;
+ register struct fchown_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+struct utimes_args {
+ char *path;
+ struct timeval *tptr;
+};
+/* ARGSUSED */
+utimes(p, uap, retval)
+ struct proc *p;
+ register struct utimes_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct timeval tv[2];
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ VATTR_NULL(&vattr);
+ if (uap->tptr == NULL) {
+ microtime(&tv[0]);
+ tv[1] = tv[0];
+ vattr.va_vaflags |= VA_UTIMES_NULL;
+ } else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ vattr.va_atime.ts_sec = tv[0].tv_sec;
+ vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+ vattr.va_mtime.ts_sec = tv[1].tv_sec;
+ vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+struct truncate_args {
+ char *path;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+truncate(p, uap, retval)
+ struct proc *p;
+ register struct truncate_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0 &&
+ (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct ftruncate_args {
+ int fd;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+ftruncate(p, uap, retval)
+ struct proc *p;
+ register struct ftruncate_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FWRITE) == 0)
+ return (EINVAL);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+struct otruncate_args {
+ char *path;
+ long length;
+};
+/* ARGSUSED */
+otruncate(p, uap, retval)
+ struct proc *p;
+ register struct otruncate_args *uap;
+ int *retval;
+{
+ struct truncate_args nuap;
+
+ nuap.path = uap->path;
+ nuap.length = uap->length;
+ return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct oftruncate_args {
+ int fd;
+ long length;
+};
+/* ARGSUSED */
+oftruncate(p, uap, retval)
+ struct proc *p;
+ register struct oftruncate_args *uap;
+ int *retval;
+{
+ struct ftruncate_args nuap;
+
+ nuap.fd = uap->fd;
+ nuap.length = uap->length;
+ return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+struct fsync_args {
+ int fd;
+};
+/* ARGSUSED */
+fsync(p, uap, retval)
+ struct proc *p;
+ struct fsync_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+struct rename_args {
+ char *from;
+ char *to;
+};
+/* ARGSUSED */
+rename(p, uap, retval)
+ struct proc *p;
+ register struct rename_args *uap;
+ int *retval;
+{
+ register struct vnode *tvp, *fvp, *tdvp;
+ struct nameidata fromnd, tond;
+ int error;
+
+ NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+ uap->from, p);
+ if (error = namei(&fromnd))
+ return (error);
+ fvp = fromnd.ni_vp;
+ NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+ UIO_USERSPACE, uap->to, p);
+ if (error = namei(&tond)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE);
+ if (fromnd.ni_dvp != tdvp)
+ LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (tvp)
+ LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ if (fromnd.ni_startdir)
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+struct mkdir_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkdir(p, uap, retval)
+ struct proc *p;
+ register struct mkdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VDIR;
+ vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ if (!error)
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+struct rmdir_args {
+ char *path;
+};
+/* ARGSUSED */
+rmdir(p, uap, retval)
+ struct proc *p;
+ struct rmdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct ogetdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+ogetdirentries(p, uap, retval)
+ struct proc *p;
+ register struct ogetdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio, kuio;
+ struct iovec aiov, kiov;
+ struct dirent *dp, *edp;
+ caddr_t dirbuf;
+ int error, readcnt;
+ long loff;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+# if (BYTE_ORDER != LITTLE_ENDIAN)
+ if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ } else
+# endif
+ {
+ kuio = auio;
+ kuio.uio_iov = &kiov;
+ kuio.uio_segflg = UIO_SYSSPACE;
+ kiov.iov_len = uap->count;
+ MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
+ kiov.iov_base = dirbuf;
+ error = VOP_READDIR(vp, &kuio, fp->f_cred);
+ fp->f_offset = kuio.uio_offset;
+ if (error == 0) {
+ readcnt = uap->count - kuio.uio_resid;
+ edp = (struct dirent *)&dirbuf[readcnt];
+ for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ /*
+ * The expected low byte of
+ * dp->d_namlen is our dp->d_type.
+ * The high MBZ byte of dp->d_namlen
+ * is our dp->d_namlen.
+ */
+ dp->d_type = dp->d_namlen;
+ dp->d_namlen = 0;
+# else
+ /*
+ * The dp->d_type is the high byte
+ * of the expected dp->d_namlen,
+ * so must be zero'ed.
+ */
+ dp->d_type = 0;
+# endif
+ if (dp->d_reclen > 0) {
+ dp = (struct dirent *)
+ ((char *)dp + dp->d_reclen);
+ } else {
+ error = EIO;
+ break;
+ }
+ }
+ if (dp >= edp)
+ error = uiomove(dirbuf, readcnt, &auio);
+ }
+ FREE(dirbuf, M_TEMP);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+#endif
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct getdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+getdirentries(p, uap, retval)
+ struct proc *p;
+ register struct getdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio;
+ struct iovec aiov;
+ long loff;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+unionread:
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+
+#ifdef UNION
+{
+ extern int (**union_vnodeop_p)();
+ extern struct vnode *union_lowervp __P((struct vnode *));
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_op == union_vnodeop_p)) {
+ struct vnode *tvp = vp;
+
+ vp = union_lowervp(vp);
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_OPEN(vp, FREAD);
+ VOP_UNLOCK(vp);
+
+ if (error) {
+ vrele(vp);
+ return (error);
+ }
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ error = vn_close(tvp, FREAD, fp->f_cred, p);
+ if (error)
+ return (error);
+ goto unionread;
+ }
+ }
+}
+#endif
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_flag & VROOT) &&
+ (vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ vp = vp->v_mount->mnt_vnodecovered;
+ VREF(vp);
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ vrele(tvp);
+ goto unionread;
+ }
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+struct umask_args {
+ int newmask;
+};
+mode_t /* XXX */
+umask(p, uap, retval)
+ struct proc *p;
+ struct umask_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp;
+
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = uap->newmask & ALLPERMS;
+ return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+struct revoke_args {
+ char *path;
+};
+/* ARGSUSED */
+revoke(p, uap, retval)
+ struct proc *p;
+ register struct revoke_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VCHR && vp->v_type != VBLK) {
+ error = EINVAL;
+ goto out;
+ }
+ if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+ goto out;
+ if (p->p_ucred->cr_uid != vattr.va_uid &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+ vgoneall(vp);
+out:
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+getvnode(fdp, fd, fpp)
+ struct filedesc *fdp;
+ struct file **fpp;
+ int fd;
+{
+ struct file *fp;
+
+ if ((u_int)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EINVAL);
+ *fpp = fp;
+ return (0);
+}
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
new file mode 100644
index 0000000..2fe39eb
--- /dev/null
+++ b/sys/kern/vfs_mount.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_conf.c 8.8 (Berkeley) 3/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#ifdef FFS
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * This specifies the filesystem used to mount the root.
+ * This specification should be done by /etc/config.
+ */
+int (*mountroot)() = ffs_mountroot;
+#endif
+
+/*
+ * These define the root filesystem and device.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ * The types are defined in mount.h.
+ */
+#ifdef FFS
+extern struct vfsops ufs_vfsops;
+#define UFS_VFSOPS &ufs_vfsops
+#else
+#define UFS_VFSOPS NULL
+#endif
+
+#ifdef LFS
+extern struct vfsops lfs_vfsops;
+#define LFS_VFSOPS &lfs_vfsops
+#else
+#define LFS_VFSOPS NULL
+#endif
+
+#ifdef MFS
+extern struct vfsops mfs_vfsops;
+#define MFS_VFSOPS &mfs_vfsops
+#else
+#define MFS_VFSOPS NULL
+#endif
+
+#ifdef NFS
+extern struct vfsops nfs_vfsops;
+#define NFS_VFSOPS &nfs_vfsops
+#else
+#define NFS_VFSOPS NULL
+#endif
+
+#ifdef FDESC
+extern struct vfsops fdesc_vfsops;
+#define FDESC_VFSOPS &fdesc_vfsops
+#else
+#define FDESC_VFSOPS NULL
+#endif
+
+#ifdef PORTAL
+extern struct vfsops portal_vfsops;
+#define PORTAL_VFSOPS &portal_vfsops
+#else
+#define PORTAL_VFSOPS NULL
+#endif
+
+#ifdef NULLFS
+extern struct vfsops null_vfsops;
+#define NULL_VFSOPS &null_vfsops
+#else
+#define NULL_VFSOPS NULL
+#endif
+
+#ifdef UMAPFS
+extern struct vfsops umap_vfsops;
+#define UMAP_VFSOPS &umap_vfsops
+#else
+#define UMAP_VFSOPS NULL
+#endif
+
+#ifdef KERNFS
+extern struct vfsops kernfs_vfsops;
+#define KERNFS_VFSOPS &kernfs_vfsops
+#else
+#define KERNFS_VFSOPS NULL
+#endif
+
+#ifdef PROCFS
+extern struct vfsops procfs_vfsops;
+#define PROCFS_VFSOPS &procfs_vfsops
+#else
+#define PROCFS_VFSOPS NULL
+#endif
+
+#ifdef AFS
+extern struct vfsops afs_vfsops;
+#define AFS_VFSOPS &afs_vfsops
+#else
+#define AFS_VFSOPS NULL
+#endif
+
+#ifdef CD9660
+extern struct vfsops cd9660_vfsops;
+#define CD9660_VFSOPS &cd9660_vfsops
+#else
+#define CD9660_VFSOPS NULL
+#endif
+
+#ifdef UNION
+extern struct vfsops union_vfsops;
+#define UNION_VFSOPS &union_vfsops
+#else
+#define UNION_VFSOPS NULL
+#endif
+
+struct vfsops *vfssw[] = {
+ NULL, /* 0 = MOUNT_NONE */
+ UFS_VFSOPS, /* 1 = MOUNT_UFS */
+ NFS_VFSOPS, /* 2 = MOUNT_NFS */
+ MFS_VFSOPS, /* 3 = MOUNT_MFS */
+ NULL, /* 4 = MOUNT_PC */
+ LFS_VFSOPS, /* 5 = MOUNT_LFS */
+ NULL, /* 6 = MOUNT_LOFS */
+ FDESC_VFSOPS, /* 7 = MOUNT_FDESC */
+ PORTAL_VFSOPS, /* 8 = MOUNT_PORTAL */
+ NULL_VFSOPS, /* 9 = MOUNT_NULL */
+ UMAP_VFSOPS, /* 10 = MOUNT_UMAP */
+ KERNFS_VFSOPS, /* 11 = MOUNT_KERNFS */
+ PROCFS_VFSOPS, /* 12 = MOUNT_PROCFS */
+ AFS_VFSOPS, /* 13 = MOUNT_AFS */
+ CD9660_VFSOPS, /* 14 = MOUNT_CD9660 */
+ UNION_VFSOPS, /* 15 = MOUNT_UNION */
+ 0
+};
+
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector. It is consulted at system boot to build operation
+ * vectors. It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+ &ffs_vnodeop_opv_desc,
+ &ffs_specop_opv_desc,
+#ifdef FIFO
+ &ffs_fifoop_opv_desc,
+#endif
+ &dead_vnodeop_opv_desc,
+#ifdef FIFO
+ &fifo_vnodeop_opv_desc,
+#endif
+ &spec_vnodeop_opv_desc,
+#ifdef LFS
+ &lfs_vnodeop_opv_desc,
+ &lfs_specop_opv_desc,
+#ifdef FIFO
+ &lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+ &mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+ &nfsv2_vnodeop_opv_desc,
+ &spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+ &fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+ &fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+ &portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+ &null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+ &umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+ &kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+ &procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+ &cd9660_vnodeop_opv_desc,
+ &cd9660_specop_opv_desc,
+#ifdef FIFO
+ &cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+ &union_vnodeop_opv_desc,
+#endif
+ NULL
+};
diff --git a/sys/kern/vnode_if.pl b/sys/kern/vnode_if.pl
new file mode 100644
index 0000000..e190fa0
--- /dev/null
+++ b/sys/kern/vnode_if.pl
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+# (where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk. Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+ echo 'usage: vnode_if.sh srcfile'
+ exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ name = $1;
+ uname = toupper(name);
+
+ # Get the function arguments.
+ for (c1 = 0;; ++c1) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ a[c1] = $0;
+ }
+
+ # Print out the vop_F_args structure.
+ printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+ name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%sa_%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("};\n");
+
+ # Print out extern declaration.
+ printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+ # Print out inline struct.
+ printf("static inline int %s(", uname);
+ sep = ", ";
+ for (c2 = 0; c2 < c1; ++c2) {
+ if (c2 == c1 - 1)
+ sep = ")\n";
+ c3 = split(a[c2], t);
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("%s%s", substr(t[c3], beg, end - beg), sep);
+ }
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%s%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("{\n\tstruct %s_args a;\n\n", name);
+ printf("\ta.a_desc = VDESC(%s);\n", name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("a.a_%s = %s\n",
+ substr(t[c3], beg, end - beg), substr(t[c3], beg));
+ }
+ c1 = split(a[0], t);
+ beg = match(t[c1], "[^*]");
+ end = match(t[c1], ";");
+ printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+ substr(t[c1], beg, end - beg), name);
+ }' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+ 0,
+ "default",
+ 0,
+ NULL,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+ sub (/^[ \t]*/, "", s);
+ sub (/[ \t]*$/, "", s);
+ return s;
+ }
+
+ function read_args() {
+ numargs = 0;
+ while (getline ln) {
+ if (ln ~ /}/) {
+ break;
+ };
+
+ # Delete comments, if any.
+ gsub (/\/\*.*\*\//, "", ln);
+
+ # Delete leading/trailing space.
+ ln = kill_surrounding_ws(ln);
+
+ # Pick off direction.
+ if (1 == sub(/^INOUT[ \t]+/, "", ln))
+ dir = "INOUT";
+ else if (1 == sub(/^IN[ \t]+/, "", ln))
+ dir = "IN";
+ else if (1 == sub(/^OUT[ \t]+/, "", ln))
+ dir = "OUT";
+ else
+ bail("No IN/OUT direction for \"" ln "\".");
+
+ # check for "WILLRELE"
+ if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+ rele = "WILLRELE";
+ } else {
+ rele = "WONTRELE";
+ };
+
+ # kill trailing ;
+ if (1 != sub (/;$/, "", ln)) {
+ bail("Missing end-of-line ; in \"" ln "\".");
+ };
+
+ # pick off variable name
+ if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+ bail("Missing var name \"a_foo\" in \"" ln "\".");
+ };
+ arg = substr (ln, i);
+ # Want to <<substr(ln, i) = "";>>, but nawk cannot.
+ # Hack around this.
+ ln = substr(ln, 1, i-1);
+
+ # what is left must be type
+ # (put clean it up some)
+ type = ln;
+ gsub (/[ \t]+/, " ", type); # condense whitespace
+ type = kill_surrounding_ws(type);
+
+ # (boy this was easier in Perl)
+
+ numargs++;
+ dirs[numargs] = dir;
+ reles[numargs] = rele;
+ types[numargs] = type;
+ args[numargs] = arg;
+ };
+ }
+
+ function generate_operation_vp_offsets() {
+ printf ("int %s_vp_offsets[] = {\n", name);
+ # as a side effect, figure out the releflags
+ releflags = "";
+ vpnum = 0;
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode *") {
+ printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+ name, args[i]);
+ if (reles[i] == "WILLRELE") {
+ releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+ };
+ vpnum++;
+ };
+ };
+ sub (/^\|/, "", releflags);
+ print "\tVDESC_NO_OFFSET";
+ print "};";
+ }
+
+ function find_arg_with_type (type) {
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == type) {
+ return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+ };
+ };
+ return "VDESC_NO_OFFSET";
+ }
+
+ function generate_operation_desc() {
+ printf ("struct vnodeop_desc %s_desc = {\n", name);
+ # offset
+ printf ("\t0,\n");
+ # printable name
+ printf ("\t\"%s\",\n", name);
+ # flags
+ vppwillrele = "";
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode **" &&
+ (reles[i] == "WILLRELE")) {
+ vppwillrele = "|VDESC_VPP_WILLRELE";
+ };
+ };
+ if (releflags == "") {
+ printf ("\t0%s,\n", vppwillrele);
+ } else {
+ printf ("\t%s%s,\n", releflags, vppwillrele);
+ };
+ # vp offsets
+ printf ("\t%s_vp_offsets,\n", name);
+ # vpp (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+ # cred (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+ # proc (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+ # componentname
+ printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+ # transport layer information
+ printf ("\tNULL,\n};\n");
+ }
+
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # get the function name
+ name = $1;
+
+ # get the function arguments
+ read_args();
+
+ # Print out the vop_F_vp_offsets structure. This all depends
+ # on naming conventions and nothing else.
+ generate_operation_vp_offsets();
+
+ # Print out the vnodeop_desc structure.
+ generate_operation_desc();
+
+ printf "\n";
+
+ }' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+#
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments. This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+ struct buf *bp;
+{
+ struct vop_strategy_args a;
+
+ a.a_desc = VDESC(vop_strategy);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+ struct buf *bp;
+{
+ struct vop_bwrite_args a;
+
+ a.a_desc = VDESC(vop_bwrite);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+ 0,
+ "vop_strategy",
+ 0,
+ vop_strategy_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+ 0,
+ "vop_bwrite",
+ 0,
+ vop_bwrite_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+ BEGIN {
+ printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+ printf("\t&vop_default_desc, /* MUST BE FIRST */\n");
+ printf("\t&vop_strategy_desc, /* XXX: SPECIAL CASE */\n");
+ printf("\t&vop_bwrite_desc, /* XXX: SPECIAL CASE */\n");
+ }
+ END {
+ printf("\tNULL\n};\n");
+ }
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ printf("\t&%s_desc,\n", $1);
+
+ # Skip the function arguments.
+ for (;;) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ }
+ }' < $SRC >> $CFILE
+
OpenPOWER on IntegriCloud