summaryrefslogtreecommitdiffstats
path: root/share/doc/papers/memfs
diff options
context:
space:
mode:
Diffstat (limited to 'share/doc/papers/memfs')
-rw-r--r--share/doc/papers/memfs/0.t86
-rw-r--r--share/doc/papers/memfs/1.t392
-rw-r--r--share/doc/papers/memfs/A.t173
-rw-r--r--share/doc/papers/memfs/Makefile22
-rw-r--r--share/doc/papers/memfs/ref.bib49
-rw-r--r--share/doc/papers/memfs/spell.ok18
-rw-r--r--share/doc/papers/memfs/tmac.srefs177
7 files changed, 917 insertions, 0 deletions
diff --git a/share/doc/papers/memfs/0.t b/share/doc/papers/memfs/0.t
new file mode 100644
index 0000000..d476f17
--- /dev/null
+++ b/share/doc/papers/memfs/0.t
@@ -0,0 +1,86 @@
+.\" Copyright (c) 1990 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 5.1 (Berkeley) 4/16/91
+.\"
+.rm CM
+.nr PO 1.25i
+.ds CH "
+.ds CF "%
+.nr Fn 0 1
+.ds b3 4.3\s-1BSD\s+1
+.de KI
+.ds Lb "Fig. \\n+(Fn
+.KF
+.ce 1
+Figure \\n(Fn - \\$1.
+..
+.de SM
+\\s-1\\$1\\s+1\\$2
+..
+.de NM
+\&\fI\\$1\fP\\$2
+..
+.de RN
+\&\fI\\$1\fP\^(\^)\\$2
+..
+.de PN
+\&\fB\\$1\fP\\$2
+..
+.TL
+A Pageable Memory Based Filesystem
+.AU
+Marshall Kirk McKusick
+.AU
+Michael J. Karels
+.AU
+Keith Bostic
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.sp
+email: mckusick@cs.Berkeley.EDU
+telephone: 415-642-4948
+.AB
+This paper describes the motivations for memory-based filesystems.
+It compares techniques used to implement them and
+describes the drawbacks of using dedicated memory to
+support such filesystems.
+To avoid the drawbacks of using dedicated memory,
+it discusses building a simple memory-based
+filesystem in pageable memory.
+It details the performance characteristics of this filesystem
+and concludes with areas for future work.
+.AE
+.LP
diff --git a/share/doc/papers/memfs/1.t b/share/doc/papers/memfs/1.t
new file mode 100644
index 0000000..a065844
--- /dev/null
+++ b/share/doc/papers/memfs/1.t
@@ -0,0 +1,392 @@
+.\" Copyright (c) 1990 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 5.1 (Berkeley) 4/16/91
+.\"
+.nr PS 11
+.nr VS 13
+.SH
+Introduction
+.PP
+This paper describes the motivation for and implementation of
+a memory-based filesystem.
+Memory-based filesystems have existed for a long time;
+they have generally been marketed as RAM disks or sometimes
+as software packages that use the machine's general purpose memory.
+.[
+white
+.]
+.PP
+A RAM disk is designed to appear like any other disk peripheral
+connected to a machine.
+It is normally interfaced to the processor through the I/O bus
+and is accessed through a device driver similar or sometimes identical
+to the device driver used for a normal magnetic disk.
+The device driver sends requests for blocks of data to the device
+and the requested data is then DMA'ed to or from the requested block.
+Instead of storing its data on a rotating magnetic disk,
+the RAM disk stores its data in a large array of random access memory
+or bubble memory.
+Thus, the latency of accessing the RAM disk is nearly zero
+compared to the 15-50 milliseconds of latency incurred when
+access rotating magnetic media.
+RAM disks also have the benefit of being able to transfer data at
+the maximum DMA rate of the system,
+while disks are typically limited by the rate that the data passes
+under the disk head.
+.PP
+Software packages simulating RAM disks operate by allocating
+a fixed partition of the system memory.
+The software then provides a device driver interface similar
+to the one described for hardware RAM disks,
+except that it uses memory-to-memory copy instead of DMA to move
+the data between the RAM disk and the system buffers,
+or it maps the contents of the RAM disk into the system buffers.
+Because the memory used by the RAM disk is not available for
+other purposes, software RAM-disk solutions are used primarily
+for machines with limited addressing capabilities such as PC's
+that do not have an effective way of using the extra memory anyway.
+.PP
+Most software RAM disks lose their contents when the system is powered
+down or rebooted.
+The contents can be saved by using battery backed-up memory,
+by storing critical filesystem data structures in the filesystem,
+and by running a consistency check program after each reboot.
+These conditions increase the hardware cost
+and potentially slow down the speed of the disk.
+Thus, RAM-disk filesystems are not typically
+designed to survive power failures;
+because of their volatility, their usefulness is limited to transient
+or easily recreated information such as might be found in
+.PN /tmp .
+Their primary benefit is that they have higher throughput
+than disk based filesystems.
+.[
+smith
+.]
+This improved throughput is particularly useful for utilities that
+make heavy use of temporary files, such as compilers.
+On fast processors, nearly half of the elapsed time for a compilation
+is spent waiting for synchronous operations required for file
+creation and deletion.
+The use of the memory-based filesystem nearly eliminates this waiting time.
+.PP
+Using dedicated memory to exclusively support a RAM disk
+is a poor use of resources.
+The overall throughput of the system can be improved
+by using the memory where it is getting the highest access rate.
+These needs may shift between supporting process virtual address spaces
+and caching frequently used disk blocks.
+If the memory is dedicated to the filesystem,
+it is better used in a buffer cache.
+The buffer cache permits faster access to the data
+because it requires only a single memory-to-memory copy
+from the kernel to the user process.
+The use of memory is used in a RAM-disk configuration may require two
+memory-to-memory copies, one from the RAM disk
+to the buffer cache,
+then another copy from the buffer cache to the user process.
+.PP
+The new work being presented in this paper is building a prototype
+RAM-disk filesystem in pageable memory instead of dedicated memory.
+The goal is to provide the speed benefits of a RAM disk
+without paying the performance penalty inherent in dedicating
+part of the physical memory on the machine to the RAM disk.
+By building the filesystem in pageable memory,
+it competes with other processes for the available memory.
+When memory runs short, the paging system pushes its
+least-recently-used pages to backing store.
+Being pageable also allows the filesystem to be much larger than
+would be practical if it were limited by the amount of physical
+memory that could be dedicated to that purpose.
+We typically operate our
+.PN /tmp
+with 30 to 60 megabytes of space
+which is larger than the amount of memory on the machine.
+This configuration allows small files to be accessed quickly,
+while still allowing
+.PN /tmp
+to be used for big files,
+although at a speed more typical of normal, disk-based filesystems.
+.PP
+An alternative to building a memory-based filesystem would be to have
+a filesystem that never did operations synchronously and never
+flushed its dirty buffers to disk.
+However, we believe that such a filesystem would either
+use a disproportionately large percentage of the buffer
+cache space, to the detriment of other filesystems,
+or would require the paging system to flush its dirty pages.
+Waiting for other filesystems to push dirty pages
+subjects them to delays while waiting for the pages to be written.
+We await the results of others trying this approach.
+.[
+Ohta
+.]
+.SH
+Implementation
+.PP
+The current implementation took less time to write than did this paper.
+It consists of 560 lines of kernel code (1.7K text + data)
+and some minor modifications to the program that builds
+disk based filesystems, \fInewfs\fP.
+A condensed version of the kernel code for the
+memory-based filesystem are reproduced in Appendix 1.
+.PP
+A filesystem is created by invoking the modified \fInewfs\fP, with
+an option telling it to create a memory-based filesystem.
+It allocates a section of virtual address space of the requested
+size and builds a filesystem in the memory
+instead of on a disk partition.
+When built, it does a \fImount\fP system call specifying a filesystem type of
+.SM MFS
+(Memory File System).
+The auxiliary data parameter to the mount call specifies a pointer
+to the base of the memory in which it has built the filesystem.
+(The auxiliary data parameter used by the local filesystem, \fIufs\fP,
+specifies the block device containing the filesystem.)
+.PP
+The mount system call allocates and initializes a mount table
+entry and then calls the filesystem-specific mount routine.
+The filesystem-specific routine is responsible for doing
+the mount and initializing the filesystem-specific
+portion of the mount table entry.
+The memory-based filesystem-specific mount routine,
+.RN mfs_mount ,
+is shown in Appendix 1.
+It allocates a block-device vnode to represent the memory disk device.
+In the private area of this vnode it stores the base address of
+the filesystem and the process identifier of the \fInewfs\fP process
+for later reference when doing I/O.
+It also initializes an I/O list that it
+uses to record outstanding I/O requests.
+It can then call the \fIufs\fP filesystem mount routine,
+passing the special block-device vnode that it has created
+instead of the usual disk block-device vnode.
+The mount proceeds just as any other local mount, except that
+requests to read from the block device are vectored through
+.RN mfs_strategy
+(described below) instead of the usual
+.RN spec_strategy
+block device I/O function.
+When the mount is completed,
+.RN mfs_mount
+does not return as most other filesystem mount functions do;
+instead it sleeps in the kernel awaiting I/O requests.
+Each time an I/O request is posted for the filesystem,
+a wakeup is issued for the corresponding \fInewfs\fP process.
+When awakened, the process checks for requests on its buffer list.
+A read request is serviced by copying data from the section of the
+\fInewfs\fP address space corresponding to the requested disk block
+to the kernel buffer.
+Similarly a write request is serviced by copying data to the section of the
+\fInewfs\fP address space corresponding to the requested disk block
+from the kernel buffer.
+When all the requests have been serviced, the \fInewfs\fP
+process returns to sleep to await more requests.
+.PP
+Once mounted,
+all operations on files in the memory-based filesystem are handled
+by the \fIufs\fP filesystem code until they get to the point where the
+filesystem needs to do I/O on the device.
+Here, the filesystem encounters the second piece of the
+memory-based filesystem.
+Instead of calling the special-device strategy routine,
+it calls the memory-based strategy routine,
+.RN mfs_strategy .
+Usually,
+the request is serviced by linking the buffer onto the
+I/O list for the memory-based filesystem
+vnode and sending a wakeup to the \fInewfs\fP process.
+This wakeup results in a context-switch to the \fInewfs\fP
+process, which does a copyin or copyout as described above.
+The strategy routine must be careful to check whether
+the I/O request is coming from the \fInewfs\fP process itself, however.
+Such requests happen during mount and unmount operations,
+when the kernel is reading and writing the superblock.
+Here,
+.RN mfs_strategy
+must do the I/O itself to avoid deadlock.
+.PP
+The final piece of kernel code to support the
+memory-based filesystem is the close routine.
+After the filesystem has been successfully unmounted,
+the device close routine is called.
+For a memory-based filesystem, the device close routine is
+.RN mfs_close .
+This routine flushes any pending I/O requests,
+then sets the I/O list head to a special value
+that is recognized by the I/O servicing loop in
+.RN mfs_mount
+as an indication that the filesystem is unmounted.
+The
+.RN mfs_mount
+routine exits, in turn causing the \fInewfs\fP process
+to exit, resulting in the filesystem vanishing in a cloud of dirty pages.
+.PP
+The paging of the filesystem does not require any additional
+code beyond that already in the kernel to support virtual memory.
+The \fInewfs\fP process competes with other processes on an equal basis
+for the machine's available memory.
+Data pages of the filesystem that have not yet been used
+are zero-fill-on-demand pages that do not occupy memory,
+although they currently allocate space in backing store.
+As long as memory is plentiful, the entire contents of the filesystem
+remain memory resident.
+When memory runs short, the oldest pages of \fInewfs\fP will be
+pushed to backing store as part of the normal paging activity.
+The pages that are pushed usually hold the contents of
+files that have been created in the memory-based filesystem
+but have not been recently accessed (or have been deleted).
+.[
+leffler
+.]
+.SH
+Performance
+.PP
+The performance of the current memory-based filesystem is determined by
+the memory-to-memory copy speed of the processor.
+Empirically we find that the throughput is about 45% of this
+memory-to-memory copy speed.
+The basic set of steps for each block written is:
+.IP 1)
+memory-to-memory copy from the user process doing the write to a kernel buffer
+.IP 2)
+context-switch to the \fInewfs\fP process
+.IP 3)
+memory-to-memory copy from the kernel buffer to the \fInewfs\fP address space
+.IP 4)
+context switch back to the writing process
+.LP
+Thus each write requires at least two memory-to-memory copies
+accounting for about 90% of the
+.SM CPU
+time.
+The remaining 10% is consumed in the context switches and
+the filesystem allocation and block location code.
+The actual context switch count is really only about half
+of the worst case outlined above because
+read-ahead and write-behind allow multiple blocks
+to be handled with each context switch.
+.PP
+On the six-\c
+.SM "MIPS CCI"
+Power 6/32 machine,
+the raw reading and writing speed is only about twice that of
+a regular disk-based filesystem.
+However, for processes that create and delete many files,
+the speedup is considerably greater.
+The reason for the speedup is that the filesystem
+must do two synchronous operations to create a file,
+first writing the allocated inode to disk, then creating the
+directory entry.
+Deleting a file similarly requires at least two synchronous
+operations.
+Here, the low latency of the memory-based filesystem is
+noticeable compared to the disk-based filesystem,
+as a synchronous operation can be done with
+just two context switches instead of incurring the disk latency.
+.SH
+Future Work
+.PP
+The most obvious shortcoming of the current implementation
+is that filesystem blocks are copied twice, once between the \fInewfs\fP
+process' address space and the kernel buffer cache,
+and once between the kernel buffer and the requesting process.
+These copies are done in different process contexts, necessitating
+two context switches per group of I/O requests.
+These problems arise because of the current inability of the kernel
+to do page-in operations
+for an address space other than that of the currently-running process,
+and the current inconvenience of mapping process-owned pages into the kernel
+buffer cache.
+Both of these problems are expected to be solved in the next version
+of the virtual memory system,
+and thus we chose not to address them in the current implementation.
+With the new version of the virtual memory system, we expect to use
+any part of physical memory as part of the buffer cache,
+even though it will not be entirely addressable at once within the kernel.
+In that system, the implementation of a memory-based filesystem
+that avoids the double copy and context switches will be much easier.
+.PP
+Ideally part of the kernel's address space would reside in pageable memory.
+Once such a facility is available it would be most efficient to
+build a memory-based filesystem within the kernel.
+One potential problem with such a scheme is that many kernels
+are limited to a small address space (usually a few megabytes).
+This restriction limits the size of memory-based
+filesystem that such a machine can support.
+On such a machine, the kernel can describe a memory-based filesystem
+that is larger than its address space and use a ``window''
+to map the larger filesystem address space into its limited address space.
+The window would maintain a cache of recently accessed pages.
+The problem with this scheme is that if the working set of
+active pages is greater than the size of the window, then
+much time is spent remapping pages and invalidating
+translation buffers.
+Alternatively, a separate address space could be constructed for each
+memory-based filesystem as in the current implementation,
+and the memory-resident pages of that address space could be mapped
+exactly as other cached pages are accessed.
+.PP
+The current system uses the existing local filesystem structures
+and code to implement the memory-based filesystem.
+The major advantages of this approach are the sharing of code
+and the simplicity of the approach.
+There are several disadvantages, however.
+One is that the size of the filesystem is fixed at mount time.
+This means that a fixed number of inodes (files) and data blocks
+can be supported.
+Currently, this approach requires enough swap space for the entire
+filesystem, and prevents expansion and contraction of the filesystem on demand.
+The current design also prevents the filesystem from taking advantage
+of the memory-resident character of the filesystem.
+It would be interesting to explore other filesystem implementations
+that would be less expensive to execute and that would make better
+use of the space.
+For example, the current filesystem structure is optimized for magnetic
+disks.
+It includes replicated control structures, ``cylinder groups''
+with separate allocation maps and control structures,
+and data structures that optimize rotational layout of files.
+None of this is useful in a memory-based filesystem (at least when the
+backing store for the filesystem is dynamically allocated and not
+contiguous on a single disk type).
+On the other hand,
+directories could be implemented using dynamically-allocated
+memory organized as linked lists or trees rather than as files stored
+in ``disk'' blocks.
+Allocation and location of pages for file data might use virtual memory
+primitives and data structures rather than direct and indirect blocks.
+A reimplementation along these lines will be considered when the virtual
+memory system in the current system has been replaced.
+.[
+$LIST$
+.]
diff --git a/share/doc/papers/memfs/A.t b/share/doc/papers/memfs/A.t
new file mode 100644
index 0000000..c1938c8
--- /dev/null
+++ b/share/doc/papers/memfs/A.t
@@ -0,0 +1,173 @@
+.\" Copyright (c) 1990 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)A.t 5.1 (Berkeley) 4/16/91
+.\"
+.bp
+.nr PS 10
+.nr VS 12
+.SH
+Appendix A - Implementation Details
+.LP
+.nf
+.vS
+/*
+ * This structure defines the control data for the memory
+ * based file system.
+ */
+struct mfsnode {
+ struct vnode *mfs_vnode; /* vnode associated with this mfsnode */
+ caddr_t mfs_baseoff; /* base of file system in memory */
+ long mfs_size; /* size of memory file system */
+ pid_t mfs_pid; /* supporting process pid */
+ struct buf *mfs_buflist; /* list of I/O requests */
+};
+
+/*
+ * Convert between mfsnode pointers and vnode pointers
+ */
+#define VTOMFS(vp) ((struct mfsnode *)(vp)->v_data)
+#define MFSTOV(mfsp) ((mfsp)->mfs_vnode)
+#define MFS_EXIT (struct buf *)-1
+
+/*
+ * Arguments to mount MFS
+ */
+struct mfs_args {
+ char *name; /* name to export for statfs */
+ caddr_t base; /* base address of file system in memory */
+ u_long size; /* size of file system */
+};
+.bp
+/*
+ * Mount an MFS filesystem.
+ */
+mfs_mount(mp, path, data)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+{
+ struct vnode *devvp;
+ struct mfsnode *mfsp;
+ struct buf *bp;
+ struct mfs_args args;
+
+ /*
+ * Create a block device to represent the disk.
+ */
+ devvp = getnewvnode(VT_MFS, VBLK, &mfs_vnodeops);
+ mfsp = VTOMFS(devvp);
+ /*
+ * Save base address of the filesystem from the supporting process.
+ */
+ copyin(data, &args, (sizeof mfs_args));
+ mfsp->mfs_baseoff = args.base;
+ mfsp->mfs_size = args.size;
+ /*
+ * Record the process identifier of the supporting process.
+ */
+ mfsp->mfs_pid = u.u_procp->p_pid;
+ /*
+ * Mount the filesystem.
+ */
+ mfsp->mfs_buflist = NULL;
+ mountfs(devvp, mp);
+ /*
+ * Loop processing I/O requests.
+ */
+ while (mfsp->mfs_buflist != MFS_EXIT) {
+ while (mfsp->mfs_buflist != NULL) {
+ bp = mfsp->mfs_buflist;
+ mfsp->mfs_buflist = bp->av_forw;
+ offset = mfsp->mfs_baseoff + (bp->b_blkno * DEV_BSIZE);
+ if (bp->b_flags & B_READ)
+ copyin(offset, bp->b_un.b_addr, bp->b_bcount);
+ else /* write_request */
+ copyout(bp->b_un.b_addr, offset, bp->b_bcount);
+ biodone(bp);
+ }
+ sleep((caddr_t)devvp, PWAIT);
+ }
+}
+.bp
+/*
+ * If the MFS process requests the I/O then we must do it directly.
+ * Otherwise put the request on the list and request the MFS process
+ * to be run.
+ */
+mfs_strategy(bp)
+ struct buf *bp;
+{
+ struct vnode *devvp;
+ struct mfsnode *mfsp;
+ off_t offset;
+
+ devvp = bp->b_vp;
+ mfsp = VTOMFS(devvp);
+ if (mfsp->mfs_pid == u.u_procp->p_pid) {
+ offset = mfsp->mfs_baseoff + (bp->b_blkno * DEV_BSIZE);
+ if (bp->b_flags & B_READ)
+ copyin(offset, bp->b_un.b_addr, bp->b_bcount);
+ else /* write_request */
+ copyout(bp->b_un.b_addr, offset, bp->b_bcount);
+ biodone(bp);
+ } else {
+ bp->av_forw = mfsp->mfs_buflist;
+ mfsp->mfs_buflist = bp;
+ wakeup((caddr_t)bp->b_vp);
+ }
+}
+
+/*
+ * The close routine is called by unmount after the filesystem
+ * has been successfully unmounted.
+ */
+mfs_close(devvp)
+ struct vnode *devvp;
+{
+ struct mfsnode *mfsp = VTOMFS(vp);
+ struct buf *bp;
+
+ /*
+ * Finish any pending I/O requests.
+ */
+ while (bp = mfsp->mfs_buflist) {
+ mfsp->mfs_buflist = bp->av_forw;
+ mfs_doio(bp, mfsp->mfs_baseoff);
+ wakeup((caddr_t)bp);
+ }
+ /*
+ * Send a request to the filesystem server to exit.
+ */
+ mfsp->mfs_buflist = MFS_EXIT;
+ wakeup((caddr_t)vp);
+}
+.vE
diff --git a/share/doc/papers/memfs/Makefile b/share/doc/papers/memfs/Makefile
new file mode 100644
index 0000000..3e67998
--- /dev/null
+++ b/share/doc/papers/memfs/Makefile
@@ -0,0 +1,22 @@
+# @(#)Makefile 1.8 (Berkeley) 6/8/93
+
+DIR= papers/memfs
+SRCS= 0.t 1.t
+MACROS= -ms
+REFER= refer -n -e -l -s -p ref.bib
+EXTRA= ref.bib A.t tmac.srefs
+CLEANFILES=ref.bib.i A.gt paper.t
+
+paper.ps: paper.t
+ ${ROFF} tmac.srefs paper.t > ${.TARGET}
+
+paper.t: ${SRCS} ref.bib.i A.gt
+ ${REFER} ${SRCS} A.gt > ${.TARGET}
+
+ref.bib.i: ref.bib
+ ${INDXBIB} ref.bib
+
+A.gt: A.t
+ ${GRIND} < A.t > A.gt
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/memfs/ref.bib b/share/doc/papers/memfs/ref.bib
new file mode 100644
index 0000000..89ae507
--- /dev/null
+++ b/share/doc/papers/memfs/ref.bib
@@ -0,0 +1,49 @@
+%A M. K. McKusick
+%A J. M. Bloom
+%A M. J. Karels
+%T Bug Fixes and Changes in 4.3BSD
+%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
+%I \s-1USENIX\s0 Association
+%C Berkeley, CA
+%P 12:1\-22
+%D 1986
+
+%A M. J. Karels
+%T Changes to the Kernel in 4.3BSD
+%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
+%I \s-1USENIX\s0 Association
+%C Berkeley, CA
+%P 13:1\-32
+%D 1986
+
+%A S. J. Leffler
+%A M. K. McKusick
+%A M. J. Karels
+%A J. S. Quarterman
+%T The Design and Implementation of the 4.3BSD UNIX Operating System
+%I Addison-Wesley
+%C Reading, MA
+%D 1989
+
+%A R. M. White
+%T Disk Storage Technology
+%J Scientific American
+%V 243
+%N 2
+%P 138\-148
+%D August 1980
+
+%A A. J. Smith
+%T Bibliography on file and I/O system optimizations and related topics
+%J Operating Systems Review
+%V 14
+%N 4
+%P 39\-54
+%D October 1981
+
+%A Masataka Ohta
+%A Hiroshi Tezuka
+%T A Fast /tmp File System by Async Mount Option
+%J \s-1USENIX\s0 Association Conference Proceedings
+%P ???\-???
+%D June 1990
diff --git a/share/doc/papers/memfs/spell.ok b/share/doc/papers/memfs/spell.ok
new file mode 100644
index 0000000..7aa465f
--- /dev/null
+++ b/share/doc/papers/memfs/spell.ok
@@ -0,0 +1,18 @@
+Berkeley.EDU
+Bostic
+CH
+CM
+Fn
+Karels
+Lb
+MFS
+McKusick
+Pageable
+copyin
+copyout
+email
+filesystem
+filesystems
+mckusick
+pageable
+tmp
diff --git a/share/doc/papers/memfs/tmac.srefs b/share/doc/papers/memfs/tmac.srefs
new file mode 100644
index 0000000..6245118
--- /dev/null
+++ b/share/doc/papers/memfs/tmac.srefs
@@ -0,0 +1,177 @@
+.\" @(#)tmac.srefs 1.14 11/2/88
+.\" REFER macros .... citations
+.de []
+.][ \\$1
+..
+.de ][
+.if \\$1>5 .tm Bad arg to []
+.[\\$1
+..
+.if n .ds [. [
+.\".if t .ds [. \s-2\v'-.4m'\f1
+.if t .ds [. [
+.if n .ds .] ]
+.\".if t .ds .] \v'.4m'\s+2\fP
+.if t .ds .] ]
+.ds (. \& [
+.ds .) ]
+.if n .ds [o ""
+.if n .ds [c ""
+.if t .ds [o ``
+.if t .ds [c ''
+.ds [e \\fIet al.\\fP
+.\" for author list in reference:
+.ds &1 &
+.\" for -m signal (auth1 and auth2, year):
+.ds &2 &
+.\" the next lines deal with the problem of .[1] or [1].
+.\" refer will write "linexxx\*(<.[1]\*(>.
+.\" and either "<." or ">." should produce the .;
+.\" similarly for , and ;
+.rm <. <, <;
+.if n .ds >. .
+.if t .ds >. .
+.if n .ds >, ,
+.if t .ds >, ,
+.if n .ds >; ;
+.if t .ds >; ;
+.de [5 \" tm style
+.FS
+.IP "\\*([F.\0"
+\\*([A, \\f2\\*([T\\f1,
+.ie \\n(TN \\*([M.
+.el Bell Laboratories internal memorandum (\\*([D).
+.RT
+.FE
+..
+.de [0 \" other
+.FS
+.nr [: 0
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \{.nr [: 1
+\\*([A\c\}
+.if !"\\*([T"" \{.if \\n([:>0 ,
+.nr [: 1
+\\f2\\*([T\\f1\c\}
+.if !"\\*([O""\{.if \\n([:>0 ,
+.nr [: 1
+.if \\n([O>0 .nr [: 0
+\\*([O\c
+.if \\n([O>0 \& \c\}
+.ie !"\\*([D"" \{.if \\n([:>0 ,
+.nr [: 1
+\\*([D\c\}
+.if \\n([:>0 \&.
+.RT
+.FE
+..
+.de [1 \" journal article
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\*([o\\*([T,\\*([c
+\\f2\\*([J\\f1\c
+.if !"\\*([V"" .if n \& Vol.\&\c
+.if !"\\*([V"" \& \\f3\\*([V\\f1\c
+.if !"\\*([N"" (\\*([N)\c
+.if !"\\*([P"" \{\
+.ie \\n([P>0 , pp. \c
+.el , p. \c
+\\*([P\c\}
+.if !"\\*([I"" .if "\\*([R"" , \\*([I\c
+.if !"\\*([O"" .if \\n([O=0 , \\*([O\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" .if \\n([O>0 \\*([O
+.RT
+.FE
+..
+.de [2 \" book
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\f2\\*([T,\\f1
+\\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([G"" Gov't. ordering no. \\*([G.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de [4 \" report
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+\\*([A, \\*([o\\*([T,\\*([c
+\\*([R\c
+.if !"\\*([G"" \& (\\*([G)\c
+.if !"\\*([I"" , \\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de [3 \" article in book
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\*([o\\*([T,\\*([c
+.if !"\\*([P"" pp. \\*([P
+in \\f2\\*([B\\f1\c
+.if !"\\*([E"" , ed. \\*([E\c
+.if !"\\*([I"" , \\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de ]<
+.[<
+..
+.de [<
+.RT
+.ne 62p
+.ie \\n(rS \{\
+. rs
+. sp 4p
+.\}
+.el .sp 27p
+.Li 2 30.5P
+\fBReferences\fP
+.br
+.if \\n(Ns<2 \{\
+. nr Ns 1
+. ds ST References
+.\}
+.\"nr Tt 7
+.sp 8p
+.rm FS FE
+.\"sy echo '.T3 "\\\\t\\\\tReferences" \\n%' >>Toc
+.ns
+..
+.de [>
+.]>
+..
+.de ]>
+.sp
+..
+.de ]-
+.[-
+..
+.de [-
+.rm [V [P [A [T
+.rm [N [C [B [O
+.rm [R [I [E [D
+..
+.de ]]
+this is never
+executed
+and just
+uses up an end-of-file
+bug.
+..
OpenPOWER on IntegriCloud