diff options
author | rmacklem <rmacklem@FreeBSD.org> | 2009-05-04 15:23:58 +0000 |
---|---|---|
committer | rmacklem <rmacklem@FreeBSD.org> | 2009-05-04 15:23:58 +0000 |
commit | e3d34903b6fb9cb09f7e616bde59d97341958fa2 (patch) | |
tree | 0246ff14527b554e60f1c9212be00ee8c1128197 /sys/fs/nfsclient/nfs_clnfsiod.c | |
parent | fb2908c8ff440e0985013b83071bd8dfecb11371 (diff) | |
download | FreeBSD-src-e3d34903b6fb9cb09f7e616bde59d97341958fa2.zip FreeBSD-src-e3d34903b6fb9cb09f7e616bde59d97341958fa2.tar.gz |
Add the experimental nfs subtree to the kernel, that includes
support for NFSv4 as well as NFSv2 and 3.
It lives in 3 subdirs under sys/fs:
nfs - functions that are common to the client and server
nfsclient - a mutation of sys/nfsclient that call generic functions
to do RPCs and handle state. As such, it retains the
buffer cache handling characteristics and vnode semantics that
are found in sys/nfsclient, for the most part.
nfsserver - the server. It includes a DRC designed specifically for
NFSv4, that is used instead of the generic DRC in sys/rpc.
The build glue will be checked in later, so at this point, it
consists of 3 new subdirs that should not affect kernel building.
Approved by: kib (mentor)
Diffstat (limited to 'sys/fs/nfsclient/nfs_clnfsiod.c')
-rw-r--r-- | sys/fs/nfsclient/nfs_clnfsiod.c | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/sys/fs/nfsclient/nfs_clnfsiod.c b/sys/fs/nfsclient/nfs_clnfsiod.c new file mode 100644 index 0000000..f38aed8 --- /dev/null +++ b/sys/fs/nfsclient/nfs_clnfsiod.c @@ -0,0 +1,308 @@ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from nfs_syscalls.c 8.5 (Berkeley) 3/30/95 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysproto.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/namei.h> +#include <sys/unistd.h> +#include <sys/kthread.h> +#include <sys/fcntl.h> +#include <sys/lockf.h> +#include <sys/mutex.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include <fs/nfs/nfsport.h> +#include <fs/nfsclient/nfsmount.h> +#include <fs/nfsclient/nfs.h> +#include <fs/nfsclient/nfsnode.h> +#include <fs/nfsclient/nfs_lock.h> + +extern struct mtx ncl_iod_mutex; + +int ncl_numasync; +struct proc *ncl_iodwant[NFS_MAXRAHEAD]; +struct nfsmount *ncl_iodmount[NFS_MAXRAHEAD]; + +static void nfssvc_iod(void *); + +static int nfs_asyncdaemon[NFS_MAXRAHEAD]; + +SYSCTL_DECL(_vfs_newnfs); + +/* Maximum number of seconds a nfsiod kthread will sleep before exiting */ +static unsigned int ncl_iodmaxidle = 120; +SYSCTL_UINT(_vfs_newnfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &ncl_iodmaxidle, 0, ""); + +/* Maximum number of nfsiod kthreads */ +unsigned int ncl_iodmax = NFS_MAXRAHEAD; + +/* Minimum number of nfsiod kthreads to keep as spares */ +static unsigned int nfs_iodmin = 0; + +static int +sysctl_iodmin(SYSCTL_HANDLER_ARGS) +{ + int error, i; + int newmin; + + newmin = nfs_iodmin; + error = sysctl_handle_int(oidp, &newmin, 0, req); + if (error || (req->newptr == NULL)) + return (error); + mtx_lock(&ncl_iod_mutex); + if (newmin > ncl_iodmax) { + error = EINVAL; + goto out; + } + nfs_iodmin = newmin; + if (ncl_numasync >= nfs_iodmin) + goto out; + /* + * If the current number of nfsiod is lower + * than the new minimum, create some more. + */ + for (i = nfs_iodmin - ncl_numasync; i > 0; i--) + ncl_nfsiodnew(); +out: + mtx_unlock(&ncl_iod_mutex); + return (0); +} +SYSCTL_PROC(_vfs_newnfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0, + sizeof (nfs_iodmin), sysctl_iodmin, "IU", ""); + + +static int +sysctl_iodmax(SYSCTL_HANDLER_ARGS) +{ + int error, i; + int iod, newmax; + + newmax = ncl_iodmax; + error = sysctl_handle_int(oidp, &newmax, 0, req); + if (error || (req->newptr == NULL)) + return (error); + if (newmax > NFS_MAXRAHEAD) + return (EINVAL); + mtx_lock(&ncl_iod_mutex); + ncl_iodmax = newmax; + if (ncl_numasync <= ncl_iodmax) + goto out; + /* + * If there are some asleep nfsiods that should + * exit, wakeup() them so that they check ncl_iodmax + * and exit. Those who are active will exit as + * soon as they finish I/O. + */ + iod = ncl_numasync - 1; + for (i = 0; i < ncl_numasync - ncl_iodmax; i++) { + if (ncl_iodwant[iod]) + wakeup(&ncl_iodwant[iod]); + iod--; + } +out: + mtx_unlock(&ncl_iod_mutex); + return (0); +} +SYSCTL_PROC(_vfs_newnfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0, + sizeof (ncl_iodmax), sysctl_iodmax, "IU", ""); + +int +ncl_nfsiodnew(void) +{ + int error, i; + int newiod; + + if (ncl_numasync >= ncl_iodmax) + return (-1); + newiod = -1; + for (i = 0; i < ncl_iodmax; i++) + if (nfs_asyncdaemon[i] == 0) { + nfs_asyncdaemon[i]++; + newiod = i; + break; + } + if (newiod == -1) + return (-1); + mtx_unlock(&ncl_iod_mutex); + error = kproc_create(nfssvc_iod, nfs_asyncdaemon + i, NULL, RFHIGHPID, + 0, "nfsiod %d", newiod); + mtx_lock(&ncl_iod_mutex); + if (error) + return (-1); + ncl_numasync++; + return (newiod); +} + +static void +nfsiod_setup(void *dummy) +{ + int i; + int error; + + TUNABLE_INT_FETCH("vfs.newnfs.iodmin", &nfs_iodmin); + nfscl_init(); + mtx_lock(&ncl_iod_mutex); + /* Silently limit the start number of nfsiod's */ + if (nfs_iodmin > NFS_MAXRAHEAD) + nfs_iodmin = NFS_MAXRAHEAD; + + for (i = 0; i < nfs_iodmin; i++) { + error = ncl_nfsiodnew(); + if (error == -1) + panic("newnfsiod_setup: ncl_nfsiodnew failed"); + } + mtx_unlock(&ncl_iod_mutex); +} +SYSINIT(newnfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL); + +static int nfs_defect = 0; +SYSCTL_INT(_vfs_newnfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); + +/* + * Asynchronous I/O daemons for client nfs. + * They do read-ahead and write-behind operations on the block I/O cache. + * Returns if we hit the timeout defined by the iodmaxidle sysctl. + */ +static void +nfssvc_iod(void *instance) +{ + struct buf *bp; + struct nfsmount *nmp; + int myiod, timo; + int error = 0; + + mtx_lock(&ncl_iod_mutex); + myiod = (int *)instance - nfs_asyncdaemon; + /* + * Main loop + */ + for (;;) { + while (((nmp = ncl_iodmount[myiod]) == NULL) + || !TAILQ_FIRST(&nmp->nm_bufq)) { + if (myiod >= ncl_iodmax) + goto finish; + if (nmp) + nmp->nm_bufqiods--; + ncl_iodwant[myiod] = curthread->td_proc; + ncl_iodmount[myiod] = NULL; + /* + * Always keep at least nfs_iodmin kthreads. + */ + timo = (myiod < nfs_iodmin) ? 0 : ncl_iodmaxidle * hz; + error = msleep(&ncl_iodwant[myiod], &ncl_iod_mutex, PWAIT | PCATCH, + "-", timo); + if (error) { + nmp = ncl_iodmount[myiod]; + /* + * Rechecking the nm_bufq closes a rare race where the + * nfsiod is woken up at the exact time the idle timeout + * fires + */ + if (nmp && TAILQ_FIRST(&nmp->nm_bufq)) + error = 0; + break; + } + } + if (error) + break; + while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { + + /* Take one off the front of the list */ + TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); + nmp->nm_bufqlen--; + if (nmp->nm_bufqwant && nmp->nm_bufqlen <= ncl_numasync) { + nmp->nm_bufqwant = 0; + wakeup(&nmp->nm_bufq); + } + mtx_unlock(&ncl_iod_mutex); + if (bp->b_flags & B_DIRECT) { + KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set")); + (void)ncl_doio_directwrite(bp); + } else { + if (bp->b_iocmd == BIO_READ) + (void) ncl_doio(bp->b_vp, bp, bp->b_rcred, NULL); + else + (void) ncl_doio(bp->b_vp, bp, bp->b_wcred, NULL); + } + mtx_lock(&ncl_iod_mutex); + /* + * If there are more than one iod on this mount, then defect + * so that the iods can be shared out fairly between the mounts + */ + if (nfs_defect && nmp->nm_bufqiods > 1) { + NFS_DPF(ASYNCIO, + ("nfssvc_iod: iod %d defecting from mount %p\n", + myiod, nmp)); + ncl_iodmount[myiod] = NULL; + nmp->nm_bufqiods--; + break; + } + } + } +finish: + nfs_asyncdaemon[myiod] = 0; + if (nmp) + nmp->nm_bufqiods--; + ncl_iodwant[myiod] = NULL; + ncl_iodmount[myiod] = NULL; + /* Someone may be waiting for the last nfsiod to terminate. */ + if (--ncl_numasync == 0) + wakeup(&ncl_numasync); + mtx_unlock(&ncl_iod_mutex); + if ((error == 0) || (error == EWOULDBLOCK)) + kproc_exit(0); + /* Abnormal termination */ + kproc_exit(1); +} |