summaryrefslogtreecommitdiffstats
path: root/sys/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'sys/nfs')
-rw-r--r--sys/nfs/bootp_subr.c1257
-rw-r--r--sys/nfs/krpc.h31
-rw-r--r--sys/nfs/krpc_subr.c481
-rw-r--r--sys/nfs/nfs.h739
-rw-r--r--sys/nfs/nfs_bio.c1555
-rw-r--r--sys/nfs/nfs_common.c2272
-rw-r--r--sys/nfs/nfs_common.h564
-rw-r--r--sys/nfs/nfs_node.c390
-rw-r--r--sys/nfs/nfs_nqlease.c1307
-rw-r--r--sys/nfs/nfs_serv.c4068
-rw-r--r--sys/nfs/nfs_socket.c2284
-rw-r--r--sys/nfs/nfs_srvcache.c356
-rw-r--r--sys/nfs/nfs_subs.c2272
-rw-r--r--sys/nfs/nfs_syscalls.c1199
-rw-r--r--sys/nfs/nfs_vfsops.c1078
-rw-r--r--sys/nfs/nfs_vnops.c3354
-rw-r--r--sys/nfs/nfsdiskless.h124
-rw-r--r--sys/nfs/nfsm_subs.h564
-rw-r--r--sys/nfs/nfsmount.h106
-rw-r--r--sys/nfs/nfsnode.h210
-rw-r--r--sys/nfs/nfsproto.h439
-rw-r--r--sys/nfs/nfsrtt.h104
-rw-r--r--sys/nfs/nfsrvcache.h91
-rw-r--r--sys/nfs/nfsv2.h40
-rw-r--r--sys/nfs/nqnfs.h218
-rw-r--r--sys/nfs/rpcv2.h143
-rw-r--r--sys/nfs/xdr_subs.h95
27 files changed, 25341 insertions, 0 deletions
diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c
new file mode 100644
index 0000000..3295248
--- /dev/null
+++ b/sys/nfs/bootp_subr.c
@@ -0,0 +1,1257 @@
+/* $FreeBSD$ */
+
+/*
+ * Copyright (c) 1995 Gordon Ross, Adam Glass
+ * Copyright (c) 1992 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * based on:
+ * nfs/krpc_subr.c
+ * $NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $
+ */
+
+#include "opt_bootp.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <net/if_types.h>
+#include <net/if_dl.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsdiskless.h>
+#include <nfs/krpc.h>
+#include <nfs/xdr_subs.h>
+
+
+#define BOOTP_MIN_LEN 300 /* Minimum size of bootp udp packet */
+
+/*
+ * What is the longest we will wait before re-sending a request?
+ * Note this is also the frequency of "RPC timeout" messages.
+ * The re-send loop count sup linearly to this maximum, so the
+ * first complaint will happen after (1+2+3+4+5)=15 seconds.
+ */
+#define MAX_RESEND_DELAY 5 /* seconds */
+
+/* Definitions from RFC951 */
+struct bootp_packet {
+ u_int8_t op;
+ u_int8_t htype;
+ u_int8_t hlen;
+ u_int8_t hops;
+ u_int32_t xid;
+ u_int16_t secs;
+ u_int16_t flags;
+ struct in_addr ciaddr;
+ struct in_addr yiaddr;
+ struct in_addr siaddr;
+ struct in_addr giaddr;
+ unsigned char chaddr[16];
+ char sname[64];
+ char file[128];
+ unsigned char vend[256];
+};
+
+#define IPPORT_BOOTPC 68
+#define IPPORT_BOOTPS 67
+
+extern int nfs_diskless_valid;
+extern struct nfsv3_diskless nfsv3_diskless;
+
+/* mountd RPC */
+static int md_mount __P((struct sockaddr_in *mdsin, char *path,
+ u_char *fhp, int *fhsizep, struct nfs_args *args,struct proc *procp));
+static int md_lookup_swap __P((struct sockaddr_in *mdsin,char *path,
+ u_char *fhp, int *fhsizep,
+ struct nfs_args *args,
+ struct proc *procp));
+static int setfs __P((struct sockaddr_in *addr, char *path, char *p));
+static int getdec __P((char **ptr));
+static char *substr __P((char *a,char *b));
+static void mountopts __P((struct nfs_args *args, char *p));
+static int xdr_opaque_decode __P((struct mbuf **ptr,u_char *buf,
+ int len));
+static int xdr_int_decode __P((struct mbuf **ptr,int *iptr));
+static void printip __P((char *prefix,struct in_addr addr));
+
+#ifdef BOOTP_DEBUG
+void bootpboot_p_sa(struct sockaddr *sa,struct sockaddr *ma);
+void bootpboot_p_ma(struct sockaddr *ma);
+void bootpboot_p_rtentry(struct rtentry *rt);
+void bootpboot_p_tree(struct radix_node *rn);
+void bootpboot_p_rtlist(void);
+void bootpboot_p_iflist(void);
+#endif
+
+static int bootpc_call(struct bootp_packet *call,
+ struct bootp_packet *reply,
+ struct proc *procp);
+
+static int bootpc_fakeup_interface(struct ifreq *ireq,
+ struct socket *so,
+ struct proc *procp);
+
+static int
+bootpc_adjust_interface(struct ifreq *ireq,struct socket *so,
+ struct sockaddr_in *myaddr,
+ struct sockaddr_in *netmask,
+ struct sockaddr_in *gw,
+ struct proc *procp);
+
+void bootpc_init(void);
+
+#ifdef BOOTP_DEBUG
+void bootpboot_p_sa(sa,ma)
+ struct sockaddr *sa;
+ struct sockaddr *ma;
+{
+ if (!sa) {
+ printf("(sockaddr *) <null>");
+ return;
+ }
+ switch (sa->sa_family) {
+ case AF_INET:
+ {
+ struct sockaddr_in *sin = (struct sockaddr_in *) sa;
+ printf("inet %x",ntohl(sin->sin_addr.s_addr));
+ if (ma) {
+ struct sockaddr_in *sin = (struct sockaddr_in *) ma;
+ printf(" mask %x",ntohl(sin->sin_addr.s_addr));
+ }
+ }
+ break;
+ case AF_LINK:
+ {
+ struct sockaddr_dl *sli = (struct sockaddr_dl *) sa;
+ int i;
+ printf("link %.*s ",sli->sdl_nlen,sli->sdl_data);
+ for (i=0;i<sli->sdl_alen;i++) {
+ if (i>0)
+ printf(":");
+ printf("%x",(unsigned char) sli->sdl_data[i+sli->sdl_nlen]);
+ }
+ }
+ break;
+ default:
+ printf("af%d",sa->sa_family);
+ }
+}
+
+void bootpboot_p_ma(ma)
+ struct sockaddr *ma;
+{
+ if (!ma) {
+ printf("<null>");
+ return;
+ }
+ printf("%x",*(int*)ma);
+}
+
+void bootpboot_p_rtentry(rt)
+ struct rtentry *rt;
+{
+ bootpboot_p_sa(rt_key(rt),rt_mask(rt));
+ printf(" ");
+ bootpboot_p_ma(rt->rt_genmask);
+ printf(" ");
+ bootpboot_p_sa(rt->rt_gateway,NULL);
+ printf(" ");
+ printf("flags %x",(unsigned short) rt->rt_flags);
+ printf(" %d",rt->rt_rmx.rmx_expire);
+ printf(" %s%d\n",rt->rt_ifp->if_name,rt->rt_ifp->if_unit);
+}
+void bootpboot_p_tree(rn)
+ struct radix_node *rn;
+{
+ while (rn) {
+ if (rn->rn_b < 0) {
+ if (rn->rn_flags & RNF_ROOT) {
+ } else {
+ bootpboot_p_rtentry((struct rtentry *) rn);
+ }
+ rn = rn->rn_dupedkey;
+ } else {
+ bootpboot_p_tree(rn->rn_l);
+ bootpboot_p_tree(rn->rn_r);
+ return;
+ }
+
+ }
+}
+
+void bootpboot_p_rtlist(void)
+{
+ printf("Routing table:\n");
+ bootpboot_p_tree(rt_tables[AF_INET]->rnh_treetop);
+}
+
+void bootpboot_p_iflist(void)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ printf("Interface list:\n");
+ for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link))
+ {
+ for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa;
+ ifa=TAILQ_NEXT(ifa,ifa_link))
+ if (ifa->ifa_addr->sa_family == AF_INET ) {
+ printf("%s%d flags %x, addr %x, bcast %x, net %x\n",
+ ifp->if_name,ifp->if_unit,
+ (unsigned short) ifp->if_flags,
+ ntohl(((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr),
+ ntohl(((struct sockaddr_in *) ifa->ifa_dstaddr)->sin_addr.s_addr),
+ ntohl(((struct sockaddr_in *) ifa->ifa_netmask)->sin_addr.s_addr)
+ );
+ }
+ }
+}
+#endif
+
+static int
+bootpc_call(call,reply,procp)
+ struct bootp_packet *call;
+ struct bootp_packet *reply; /* output */
+ struct proc *procp;
+{
+ struct socket *so;
+ struct sockaddr_in *sin, sa;
+ struct uio auio;
+ struct sockopt sopt;
+ struct iovec aio;
+ struct timeval tv;
+ int error, on, len, rcvflg, secs, timo;
+
+ /*
+ * Create socket and set its recieve timeout.
+ */
+ if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp)))
+ goto out;
+
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_RCVTIMEO;
+ sopt.sopt_val = &tv;
+ sopt.sopt_valsize = sizeof tv;
+
+ if ((error = sosetopt(so, &sopt)) != 0)
+ goto out;
+
+ /*
+ * Enable broadcast.
+ */
+ on = 1;
+ sopt.sopt_val = &on;
+ sopt.sopt_valsize = sizeof on;
+ sopt.sopt_name = SO_BROADCAST;
+ if ((error = sosetopt(so, &sopt)) != 0)
+ goto out;
+
+ /*
+ * Bind the local endpoint to a bootp client port.
+ */
+ sin = &sa;
+ bzero(sin, sizeof *sin);
+ sin->sin_len = sizeof(*sin);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ sin->sin_port = htons(IPPORT_BOOTPC);
+ error = sobind(so, (struct sockaddr *)sin, procp);
+ if (error) {
+ printf("bind failed\n");
+ goto out;
+ }
+
+ /*
+ * Setup socket address for the server.
+ */
+ sin = &sa;
+ bzero(sin, sizeof *sin);
+ sin->sin_len = sizeof(*sin);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_BROADCAST;
+ sin->sin_port = htons(IPPORT_BOOTPS);
+
+ /*
+ * Send it, repeatedly, until a reply is received,
+ * but delay each re-send by an increasing amount.
+ * If the delay hits the maximum, start complaining.
+ */
+ timo = 0;
+ for (;;) {
+ /* Send BOOTP request (or re-send). */
+
+ aio.iov_base = (caddr_t) call;
+ aio.iov_len = sizeof(*call);
+
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(*call);
+ auio.uio_procp = procp;
+
+ error = sosend(so, (struct sockaddr *)sin, &auio, NULL,
+ NULL, 0, procp);
+ if (error) {
+ printf("bootpc_call: sosend: %d state %08x\n", error, (int)so->so_state);
+ goto out;
+ }
+
+ /* Determine new timeout. */
+ if (timo < MAX_RESEND_DELAY)
+ timo++;
+ else
+ printf("BOOTP timeout for server 0x%lx\n",
+ (u_long)ntohl(sin->sin_addr.s_addr));
+
+ /*
+ * Wait for up to timo seconds for a reply.
+ * The socket receive timeout was set to 1 second.
+ */
+ secs = timo;
+ while (secs > 0) {
+ aio.iov_base = (caddr_t) reply;
+ aio.iov_len = sizeof(*reply);
+
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(*reply);
+ auio.uio_procp = procp;
+
+ rcvflg = 0;
+ error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
+ if (error == EWOULDBLOCK) {
+ secs--;
+ call->secs=htons(ntohs(call->secs)+1);
+ continue;
+ }
+ if (error)
+ goto out;
+ len = sizeof(*reply) - auio.uio_resid;
+
+ /* Do we have the required number of bytes ? */
+ if (len < BOOTP_MIN_LEN)
+ continue;
+
+ /* Is it the right reply? */
+ if (reply->op != 2)
+ continue;
+
+ if (reply->xid != call->xid)
+ continue;
+
+ if (reply->hlen != call->hlen)
+ continue;
+
+ if (bcmp(reply->chaddr,call->chaddr,call->hlen))
+ continue;
+
+ goto gotreply; /* break two levels */
+
+ } /* while secs */
+ } /* forever send/receive */
+
+ error = ETIMEDOUT;
+ goto out;
+
+ gotreply:
+ out:
+ soclose(so);
+ return error;
+}
+
+static int
+bootpc_fakeup_interface(struct ifreq *ireq,struct socket *so,
+ struct proc *procp)
+{
+ struct sockaddr_in *sin;
+ int error;
+ struct sockaddr_in dst;
+ struct sockaddr_in gw;
+ struct sockaddr_in mask;
+
+ /*
+ * Bring up the interface.
+ *
+ * Get the old interface flags and or IFF_UP into them; if
+ * IFF_UP set blindly, interface selection can be clobbered.
+ */
+ error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, procp);
+ if (error)
+ panic("bootpc_fakeup_interface: GIFFLAGS, error=%d", error);
+ ireq->ifr_flags |= IFF_UP;
+ error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, procp);
+ if (error)
+ panic("bootpc_fakeup_interface: SIFFLAGS, error=%d", error);
+
+ /*
+ * Do enough of ifconfig(8) so that the chosen interface
+ * can talk to the servers. (just set the address)
+ */
+
+ /* addr is 0.0.0.0 */
+
+ sin = (struct sockaddr_in *)&ireq->ifr_addr;
+ bzero((caddr_t)sin, sizeof(*sin));
+ sin->sin_len = sizeof(*sin);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp);
+ if (error)
+ panic("bootpc_fakeup_interface: set if addr, error=%d", error);
+
+ /* netmask is 0.0.0.0 */
+
+ sin = (struct sockaddr_in *)&ireq->ifr_addr;
+ bzero((caddr_t)sin, sizeof(*sin));
+ sin->sin_len = sizeof(*sin);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp);
+ if (error)
+ panic("bootpc_fakeup_interface: set if net addr, error=%d", error);
+
+ /* Broadcast is 255.255.255.255 */
+
+ sin = (struct sockaddr_in *)&ireq->ifr_addr;
+ bzero((caddr_t)sin, sizeof(*sin));
+ sin->sin_len = sizeof(*sin);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_BROADCAST;
+ error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp);
+ if (error)
+ panic("bootpc_fakeup_interface: set if broadcast addr, error=%d", error);
+
+ /* Add default route to 0.0.0.0 so we can send data */
+
+ bzero((caddr_t) &dst, sizeof(dst));
+ dst.sin_len=sizeof(dst);
+ dst.sin_family=AF_INET;
+ dst.sin_addr.s_addr = htonl(0);
+
+ bzero((caddr_t) &gw, sizeof(gw));
+ gw.sin_len=sizeof(gw);
+ gw.sin_family=AF_INET;
+ gw.sin_addr.s_addr = htonl(0x0);
+
+ bzero((caddr_t) &mask, sizeof(mask));
+ mask.sin_len=sizeof(mask);
+ mask.sin_family=AF_INET;
+ mask.sin_addr.s_addr = htonl(0);
+
+ error = rtrequest(RTM_ADD,
+ (struct sockaddr *) &dst,
+ (struct sockaddr *) &gw,
+ (struct sockaddr *) &mask,
+ RTF_UP | RTF_STATIC
+ , NULL);
+ if (error)
+ printf("bootpc_fakeup_interface: add default route, error=%d\n", error);
+ return error;
+}
+
+static int
+bootpc_adjust_interface(struct ifreq *ireq,struct socket *so,
+ struct sockaddr_in *myaddr,
+ struct sockaddr_in *netmask,
+ struct sockaddr_in *gw,
+ struct proc *procp)
+{
+ int error;
+ struct sockaddr_in oldgw;
+ struct sockaddr_in olddst;
+ struct sockaddr_in oldmask;
+ struct sockaddr_in *sin;
+
+ /* Remove old default route to 0.0.0.0 */
+
+ bzero((caddr_t) &olddst, sizeof(olddst));
+ olddst.sin_len=sizeof(olddst);
+ olddst.sin_family=AF_INET;
+ olddst.sin_addr.s_addr = INADDR_ANY;
+
+ bzero((caddr_t) &oldgw, sizeof(oldgw));
+ oldgw.sin_len=sizeof(oldgw);
+ oldgw.sin_family=AF_INET;
+ oldgw.sin_addr.s_addr = INADDR_ANY;
+
+ bzero((caddr_t) &oldmask, sizeof(oldmask));
+ oldmask.sin_len=sizeof(oldmask);
+ oldmask.sin_family=AF_INET;
+ oldmask.sin_addr.s_addr = INADDR_ANY;
+
+ error = rtrequest(RTM_DELETE,
+ (struct sockaddr *) &olddst,
+ (struct sockaddr *) &oldgw,
+ (struct sockaddr *) &oldmask,
+ (RTF_UP | RTF_STATIC), NULL);
+ if (error) {
+ printf("nfs_boot: del default route, error=%d\n", error);
+ return error;
+ }
+
+ /*
+ * Do enough of ifconfig(8) so that the chosen interface
+ * can talk to the servers. (just set the address)
+ */
+ bcopy(netmask,&ireq->ifr_addr,sizeof(*netmask));
+ error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp);
+ if (error)
+ panic("nfs_boot: set if netmask, error=%d", error);
+
+ /* Broadcast is with host part of IP address all 1's */
+
+ sin = (struct sockaddr_in *)&ireq->ifr_addr;
+ bzero((caddr_t)sin, sizeof(*sin));
+ sin->sin_len = sizeof(*sin);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = myaddr->sin_addr.s_addr | ~ netmask->sin_addr.s_addr;
+ error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp);
+ if (error)
+ panic("bootpc_call: set if broadcast addr, error=%d", error);
+
+ bcopy(myaddr,&ireq->ifr_addr,sizeof(*myaddr));
+ error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp);
+ if (error)
+ panic("nfs_boot: set if addr, error=%d", error);
+
+ /* Add new default route */
+
+ error = rtrequest(RTM_ADD,
+ (struct sockaddr *) &olddst,
+ (struct sockaddr *) gw,
+ (struct sockaddr *) &oldmask,
+ (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL);
+ if (error) {
+ printf("nfs_boot: add net route, error=%d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int setfs(addr, path, p)
+ struct sockaddr_in *addr;
+ char *path;
+ char *p;
+{
+ unsigned ip = 0;
+ int val;
+
+ if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
+ ip = val << 24;
+ if (*p != '.') return(0);
+ p++;
+ if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
+ ip |= (val << 16);
+ if (*p != '.') return(0);
+ p++;
+ if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
+ ip |= (val << 8);
+ if (*p != '.') return(0);
+ p++;
+ if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
+ ip |= val;
+ if (*p != ':') return(0);
+ p++;
+
+ addr->sin_addr.s_addr = htonl(ip);
+ addr->sin_len = sizeof(struct sockaddr_in);
+ addr->sin_family = AF_INET;
+
+ strncpy(path,p,MNAMELEN-1);
+ return(1);
+}
+
+static int getdec(ptr)
+ char **ptr;
+{
+ char *p = *ptr;
+ int ret=0;
+ if ((*p < '0') || (*p > '9')) return(-1);
+ while ((*p >= '0') && (*p <= '9')) {
+ ret = ret*10 + (*p - '0');
+ p++;
+ }
+ *ptr = p;
+ return(ret);
+}
+
+static char *substr(a,b)
+ char *a,*b;
+{
+ char *loc1;
+ char *loc2;
+
+ while (*a != '\0') {
+ loc1 = a;
+ loc2 = b;
+ while (*loc1 == *loc2++) {
+ if (*loc1 == '\0') return (0);
+ loc1++;
+ if (*loc2 == '\0') return (loc1);
+ }
+ a++;
+ }
+ return (0);
+}
+
+static void mountopts(args,p)
+ struct nfs_args *args;
+ char *p;
+{
+ char *tmp;
+
+ args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT;
+ args->sotype = SOCK_DGRAM;
+ if ((tmp = (char *)substr(p,"rsize=")))
+ args->rsize=getdec(&tmp);
+ if ((tmp = (char *)substr(p,"wsize=")))
+ args->wsize=getdec(&tmp);
+ if ((tmp = (char *)substr(p,"intr")))
+ args->flags |= NFSMNT_INT;
+ if ((tmp = (char *)substr(p,"soft")))
+ args->flags |= NFSMNT_SOFT;
+ if ((tmp = (char *)substr(p,"noconn")))
+ args->flags |= NFSMNT_NOCONN;
+ if ((tmp = (char *)substr(p, "tcp")))
+ args->sotype = SOCK_STREAM;
+}
+
+static int xdr_opaque_decode(mptr,buf,len)
+ struct mbuf **mptr;
+ u_char *buf;
+ int len;
+{
+ struct mbuf *m;
+ int alignedlen;
+
+ m = *mptr;
+ alignedlen = ( len + 3 ) & ~3;
+
+ if (m->m_len < alignedlen) {
+ m = m_pullup(m,alignedlen);
+ if (m == NULL) {
+ *mptr = NULL;
+ return EBADRPC;
+ }
+ }
+ bcopy(mtod(m,u_char *),buf,len);
+ m_adj(m,alignedlen);
+ *mptr = m;
+ return 0;
+}
+
+static int xdr_int_decode(mptr,iptr)
+ struct mbuf **mptr;
+ int *iptr;
+{
+ u_int32_t i;
+ if (xdr_opaque_decode(mptr,(u_char *) &i,sizeof(u_int32_t)))
+ return EBADRPC;
+ *iptr = fxdr_unsigned(u_int32_t,i);
+ return 0;
+}
+
+static void printip(char *prefix,struct in_addr addr)
+{
+ unsigned int ip;
+
+ ip = ntohl(addr.s_addr);
+
+ printf("%s is %d.%d.%d.%d\n",prefix,
+ ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 );
+}
+
+void
+bootpc_init(void)
+{
+ struct bootp_packet call;
+ struct bootp_packet reply;
+ static u_int32_t xid = ~0xFF;
+
+ struct ifreq ireq;
+ struct ifnet *ifp;
+ struct socket *so;
+ int error;
+ int code,ncode,len;
+ int j;
+ char *p;
+ unsigned int ip;
+
+ struct sockaddr_in myaddr;
+ struct sockaddr_in netmask;
+ struct sockaddr_in gw;
+ int gotgw=0;
+ int gotnetmask=0;
+ int gotrootpath=0;
+ int gotswappath=0;
+ char lookup_path[24];
+
+#define EALEN 6
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl = NULL;
+ char *delim;
+
+ struct nfsv3_diskless *nd = &nfsv3_diskless;
+ struct proc *procp = curproc;
+
+ /*
+ * If already filled in, don't touch it here
+ */
+ if (nfs_diskless_valid)
+ return;
+
+ /*
+ * Wait until arp entries can be handled.
+ */
+ while (time_second == 0)
+ tsleep(&time_second, PZERO+8, "arpkludge", 10);
+
+ /*
+ * Find a network interface.
+ */
+#ifdef BOOTP_WIRED_TO
+ printf("bootpc_init: wired to interface '%s'\n",
+ __XSTRING(BOOTP_WIRED_TO));
+#endif
+ bzero(&ireq, sizeof(ireq));
+ for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link))
+ {
+ snprintf(ireq.ifr_name, sizeof(ireq.ifr_name),
+ "%s%d", ifp->if_name, ifp->if_unit);
+#ifdef BOOTP_WIRED_TO
+ if (strcmp(ireq.ifr_name, __XSTRING(BOOTP_WIRED_TO)) == 0)
+ break;
+#else
+ if ((ifp->if_flags &
+ (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
+ break;
+#endif
+ }
+ if (ifp == NULL)
+ panic("bootpc_init: no suitable interface");
+ strcpy(nd->myif.ifra_name,ireq.ifr_name);
+ printf("bootpc_init: using network interface '%s'\n",
+ ireq.ifr_name);
+
+ if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp)) != 0)
+ panic("nfs_boot: socreate, error=%d", error);
+
+ bootpc_fakeup_interface(&ireq,so,procp);
+
+ printf("Bootpc testing starting\n");
+
+ /* Get HW address */
+
+ for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa;
+ ifa=TAILQ_NEXT(ifa,ifa_link))
+ if (ifa->ifa_addr->sa_family == AF_LINK &&
+ (sdl = ((struct sockaddr_dl *) ifa->ifa_addr)) &&
+ sdl->sdl_type == IFT_ETHER)
+ break;
+
+ if (!sdl)
+ panic("bootpc: Unable to find HW address");
+ if (sdl->sdl_alen != EALEN )
+ panic("bootpc: HW address len is %d, expected value is %d",
+ sdl->sdl_alen,EALEN);
+
+ printf("bootpc hw address is ");
+ delim="";
+ for (j=0;j<sdl->sdl_alen;j++) {
+ printf("%s%x",delim,((unsigned char *)LLADDR(sdl))[j]);
+ delim=":";
+ }
+ printf("\n");
+
+#if 0
+ bootpboot_p_iflist();
+ bootpboot_p_rtlist();
+#endif
+
+ bzero((caddr_t) &call, sizeof(call));
+
+ /* bootpc part */
+ call.op = 1; /* BOOTREQUEST */
+ call.htype= 1; /* 10mb ethernet */
+ call.hlen=sdl->sdl_alen; /* Hardware address length */
+ call.hops=0;
+ xid++;
+ call.xid = txdr_unsigned(xid);
+ bcopy(LLADDR(sdl),&call.chaddr,sdl->sdl_alen);
+
+ call.vend[0]=99;
+ call.vend[1]=130;
+ call.vend[2]=83;
+ call.vend[3]=99;
+ call.vend[4]=255;
+
+ call.secs = 0;
+ call.flags = htons(0x8000); /* We need an broadcast answer */
+
+ error = bootpc_call(&call,&reply,procp);
+
+ if (error) {
+#ifdef BOOTP_NFSROOT
+ panic("BOOTP call failed");
+#endif
+ return;
+ }
+
+ bzero(&myaddr,sizeof(myaddr));
+ bzero(&netmask,sizeof(netmask));
+ bzero(&gw,sizeof(gw));
+
+ myaddr.sin_len = sizeof(myaddr);
+ myaddr.sin_family = AF_INET;
+
+ netmask.sin_len = sizeof(netmask);
+ netmask.sin_family = AF_INET;
+
+ gw.sin_len = sizeof(gw);
+ gw.sin_family= AF_INET;
+
+ nd->root_args.version = NFS_ARGSVERSION;
+ nd->root_args.rsize = 8192;
+ nd->root_args.wsize = 8192;
+ nd->root_args.sotype = SOCK_DGRAM;
+ nd->root_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT);
+
+ nd->swap_saddr.sin_len = sizeof(gw);
+ nd->swap_saddr.sin_family = AF_INET;
+
+ nd->swap_args.version = NFS_ARGSVERSION;
+ nd->swap_args.rsize = 8192;
+ nd->swap_args.wsize = 8192;
+ nd->swap_args.sotype = SOCK_DGRAM;
+ nd->swap_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT);
+
+ myaddr.sin_addr = reply.yiaddr;
+
+ ip = ntohl(myaddr.sin_addr.s_addr);
+ snprintf(lookup_path, sizeof(lookup_path), "swap.%d.%d.%d.%d",
+ ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 );
+
+ printip("My ip address",myaddr.sin_addr);
+
+ printip("Server ip address",reply.siaddr);
+
+ gw.sin_addr = reply.giaddr;
+ printip("Gateway ip address",reply.giaddr);
+
+ if (reply.sname[0])
+ printf("Server name is %s\n",reply.sname);
+ if (reply.file[0])
+ printf("boot file is %s\n",reply.file);
+ if (reply.vend[0]==99 && reply.vend[1]==130 &&
+ reply.vend[2]==83 && reply.vend[3]==99) {
+ j=4;
+ ncode = reply.vend[j];
+ while (j<sizeof(reply.vend)) {
+ code = reply.vend[j] = ncode;
+ if (code==255)
+ break;
+ if (code==0) {
+ j++;
+ continue;
+ }
+ len = reply.vend[j+1];
+ j+=2;
+ if (len+j>=sizeof(reply.vend)) {
+ printf("Truncated field");
+ break;
+ }
+ ncode = reply.vend[j+len];
+ reply.vend[j+len]='\0';
+ p = &reply.vend[j];
+ switch (code) {
+ case 1:
+ if (len!=4)
+ panic("bootpc: subnet mask len is %d",len);
+ bcopy(&reply.vend[j],&netmask.sin_addr,4);
+ gotnetmask=1;
+ printip("Subnet mask",netmask.sin_addr);
+ break;
+ case 6: /* Domain Name servers. Unused */
+ case 16: /* Swap server IP address. unused */
+ case 2:
+ /* Time offset */
+ break;
+ case 3:
+ /* Routers */
+ if (len % 4)
+ panic("bootpc: Router Len is %d",len);
+ if (len > 0) {
+ bcopy(&reply.vend[j],&gw.sin_addr,4);
+ printip("Router",gw.sin_addr);
+ gotgw=1;
+ }
+ break;
+ case 17:
+ if (setfs(&nd->root_saddr, nd->root_hostnam, p)) {
+ printf("rootfs is %s\n",p);
+ gotrootpath=1;
+ } else
+ panic("Failed to set rootfs to %s",p);
+ break;
+ case 12:
+ if (len>=MAXHOSTNAMELEN)
+ panic("bootpc: hostname >=%d bytes",MAXHOSTNAMELEN);
+ strncpy(nd->my_hostnam,&reply.vend[j],len);
+ nd->my_hostnam[len]=0;
+ strncpy(hostname,&reply.vend[j],len);
+ hostname[len]=0;
+ printf("Hostname is %s\n",hostname);
+ break;
+ case 128:
+ if (setfs(&nd->swap_saddr, nd->swap_hostnam, p)) {
+ gotswappath=1;
+ printf("swapfs is %s\n",p);
+ } else
+ panic("Failed to set swapfs to %s",p);
+ break;
+ case 129:
+ {
+ int swaplen;
+ if (len!=4)
+ panic("bootpc: Expected 4 bytes for swaplen, not %d bytes",len);
+ bcopy(&reply.vend[j],&swaplen,4);
+ nd->swap_nblks = ntohl(swaplen);
+ printf("bootpc: Swap size is %d KB\n",nd->swap_nblks);
+ }
+ break;
+ case 130: /* root mount options */
+ mountopts(&nd->root_args,p);
+ break;
+ case 131: /* swap mount options */
+ mountopts(&nd->swap_args,p);
+ break;
+ default:
+ printf("Ignoring field type %d\n",code);
+ }
+ j+=len;
+ }
+ }
+
+ if (!gotswappath)
+ nd->swap_nblks = 0;
+#ifdef BOOTP_NFSROOT
+ if (!gotrootpath)
+ panic("bootpc: No root path offered");
+#endif
+
+ if (!gotnetmask) {
+ if (IN_CLASSA(ntohl(myaddr.sin_addr.s_addr)))
+ netmask.sin_addr.s_addr = htonl(IN_CLASSA_NET);
+ else if (IN_CLASSB(ntohl(myaddr.sin_addr.s_addr)))
+ netmask.sin_addr.s_addr = htonl(IN_CLASSB_NET);
+ else
+ netmask.sin_addr.s_addr = htonl(IN_CLASSC_NET);
+ }
+ if (!gotgw) {
+ /* Use proxyarp */
+ gw.sin_addr.s_addr = myaddr.sin_addr.s_addr;
+ }
+
+#if 0
+ bootpboot_p_iflist();
+ bootpboot_p_rtlist();
+#endif
+ error = bootpc_adjust_interface(&ireq,so,
+ &myaddr,&netmask,&gw,procp);
+
+ soclose(so);
+
+#if 0
+ bootpboot_p_iflist();
+ bootpboot_p_rtlist();
+#endif
+
+ if (gotrootpath) {
+
+ error = md_mount(&nd->root_saddr, nd->root_hostnam,
+ nd->root_fh, &nd->root_fhsize,
+ &nd->root_args,procp);
+ if (error)
+ panic("nfs_boot: mountd root, error=%d", error);
+
+ if (gotswappath) {
+
+ error = md_mount(&nd->swap_saddr,
+ nd->swap_hostnam,
+ nd->swap_fh, &nd->swap_fhsize,&nd->swap_args,procp);
+ if (error)
+ panic("nfs_boot: mountd swap, error=%d", error);
+
+ error = md_lookup_swap(&nd->swap_saddr,lookup_path,nd->swap_fh,
+ &nd->swap_fhsize, &nd->swap_args,procp);
+ if (error)
+ panic("nfs_boot: lookup swap, error=%d", error);
+ }
+ nfs_diskless_valid = 3;
+ }
+
+
+ bcopy(&myaddr,&nd->myif.ifra_addr,sizeof(myaddr));
+ bcopy(&myaddr,&nd->myif.ifra_broadaddr,sizeof(myaddr));
+ ((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr =
+ myaddr.sin_addr.s_addr | ~ netmask.sin_addr.s_addr;
+ bcopy(&netmask,&nd->myif.ifra_mask,sizeof(netmask));
+
+#if 0
+ bootpboot_p_iflist();
+ bootpboot_p_rtlist();
+#endif
+ return;
+}
+
+/*
+ * RPC: mountd/mount
+ * Given a server pathname, get an NFS file handle.
+ * Also, sets sin->sin_port to the NFS service port.
+ */
+static int
+md_mount(mdsin, path, fhp, fhsizep, args, procp)
+ struct sockaddr_in *mdsin; /* mountd server address */
+ char *path;
+ u_char *fhp;
+ int *fhsizep;
+ struct nfs_args *args;
+ struct proc *procp;
+{
+ struct mbuf *m;
+ int error;
+ int authunixok;
+ int authcount;
+ int authver;
+
+#ifdef BOOTP_NFSV3
+ /* First try NFS v3 */
+ /* Get port number for MOUNTD. */
+ error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3,
+ &mdsin->sin_port, procp);
+ if (!error) {
+ m = xdr_string_encode(path, strlen(path));
+
+ /* Do RPC to mountd. */
+ error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER3,
+ RPCMNT_MOUNT, &m, NULL, curproc);
+ }
+ if (!error) {
+ args->flags |= NFSMNT_NFSV3;
+ } else {
+#endif
+ /* Fallback to NFS v2 */
+
+ /* Get port number for MOUNTD. */
+ error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1,
+ &mdsin->sin_port, procp);
+ if (error) return error;
+
+ m = xdr_string_encode(path, strlen(path));
+
+ /* Do RPC to mountd. */
+ error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1,
+ RPCMNT_MOUNT, &m, NULL, curproc);
+ if (error)
+ return error; /* message already freed */
+
+#ifdef BOOTP_NFSV3
+ }
+#endif
+
+ if (xdr_int_decode(&m,&error) || error)
+ goto bad;
+
+ if (args->flags & NFSMNT_NFSV3) {
+ if (xdr_int_decode(&m,fhsizep) ||
+ *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 )
+ goto bad;
+ } else
+ *fhsizep = NFSX_V2FH;
+
+ if (xdr_opaque_decode(&m,fhp,*fhsizep))
+ goto bad;
+
+ if (args->flags & NFSMNT_NFSV3) {
+ if (xdr_int_decode(&m,&authcount))
+ goto bad;
+ authunixok = 0;
+ if (authcount<0 || authcount>100)
+ goto bad;
+ while (authcount>0) {
+ if (xdr_int_decode(&m,&authver))
+ goto bad;
+ if (authver == RPCAUTH_UNIX)
+ authunixok = 1;
+ authcount--;
+ }
+ if (!authunixok)
+ goto bad;
+ }
+
+ /* Set port number for NFS use. */
+ error = krpc_portmap(mdsin, NFS_PROG,
+ (args->flags & NFSMNT_NFSV3)?NFS_VER3:NFS_VER2,
+ &mdsin->sin_port, procp);
+
+ goto out;
+
+bad:
+ error = EBADRPC;
+
+out:
+ m_freem(m);
+ return error;
+}
+
+
+static int md_lookup_swap(mdsin, path, fhp, fhsizep, args, procp)
+ struct sockaddr_in *mdsin; /* mountd server address */
+ char *path;
+ u_char *fhp;
+ int *fhsizep;
+ struct nfs_args *args;
+ struct proc *procp;
+{
+ struct mbuf *m;
+ int error;
+ int size = -1;
+ int attribs_present;
+ int status;
+ union {
+ u_int32_t v2[17];
+ u_int32_t v3[21];
+ } fattribs;
+
+ m = m_get(M_WAIT,MT_DATA);
+ if (!m)
+ return ENOBUFS;
+
+ if (args->flags & NFSMNT_NFSV3) {
+ *mtod(m,u_int32_t *) = txdr_unsigned(*fhsizep);
+ bcopy(fhp,mtod(m,u_char *)+sizeof(u_int32_t),*fhsizep);
+ m->m_len = *fhsizep + sizeof(u_int32_t);
+ } else {
+ bcopy(fhp,mtod(m,u_char *),NFSX_V2FH);
+ m->m_len = NFSX_V2FH;
+ }
+
+ m->m_next = xdr_string_encode(path, strlen(path));
+ if (!m->m_next) {
+ error = ENOBUFS;
+ goto out;
+ }
+
+ /* Do RPC to nfsd. */
+ if (args->flags & NFSMNT_NFSV3)
+ error = krpc_call(mdsin, NFS_PROG, NFS_VER3,
+ NFSPROC_LOOKUP, &m, NULL, procp);
+ else
+ error = krpc_call(mdsin, NFS_PROG, NFS_VER2,
+ NFSV2PROC_LOOKUP, &m, NULL, procp);
+ if (error)
+ return error; /* message already freed */
+
+ if (xdr_int_decode(&m,&status))
+ goto bad;
+ if (status) {
+ error = ENOENT;
+ goto out;
+ }
+
+ if (args->flags & NFSMNT_NFSV3) {
+ if (xdr_int_decode(&m,fhsizep) ||
+ *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 )
+ goto bad;
+ } else
+ *fhsizep = NFSX_V2FH;
+
+ if (xdr_opaque_decode(&m, fhp, *fhsizep))
+ goto bad;
+
+ if (args->flags & NFSMNT_NFSV3) {
+ if (xdr_int_decode(&m,&attribs_present))
+ goto bad;
+ if (attribs_present) {
+ if (xdr_opaque_decode(&m,(u_char *) &fattribs.v3,
+ sizeof(u_int32_t)*21))
+ goto bad;
+ size = fxdr_unsigned(u_int32_t, fattribs.v3[6]);
+ }
+ } else {
+ if (xdr_opaque_decode(&m,(u_char *) &fattribs.v2,
+ sizeof(u_int32_t)*17))
+ goto bad;
+ size = fxdr_unsigned(u_int32_t, fattribs.v2[5]);
+ }
+
+ if (!nfsv3_diskless.swap_nblks && size!= -1) {
+ nfsv3_diskless.swap_nblks = size/1024;
+ printf("md_lookup_swap: Swap size is %d KB\n",
+ nfsv3_diskless.swap_nblks);
+ }
+
+ goto out;
+
+bad:
+ error = EBADRPC;
+
+out:
+ m_freem(m);
+ return error;
+}
diff --git a/sys/nfs/krpc.h b/sys/nfs/krpc.h
new file mode 100644
index 0000000..6c889b5
--- /dev/null
+++ b/sys/nfs/krpc.h
@@ -0,0 +1,31 @@
+/* $NetBSD: krpc.h,v 1.4 1995/12/19 23:07:11 cgd Exp $ */
+/* $FreeBSD$ */
+
+#include <sys/cdefs.h>
+
+struct mbuf;
+struct proc;
+struct sockaddr;
+struct sockaddr_in;
+
+int krpc_call __P((struct sockaddr_in *_sin,
+ u_int prog, u_int vers, u_int func,
+ struct mbuf **data, struct sockaddr **from, struct proc *procp));
+
+int krpc_portmap __P((struct sockaddr_in *_sin,
+ u_int prog, u_int vers, u_int16_t *portp,struct proc *procp));
+
+struct mbuf *xdr_string_encode __P((char *str, int len));
+
+/*
+ * RPC definitions for the portmapper
+ */
+#define PMAPPORT 111
+#define PMAPPROG 100000
+#define PMAPVERS 2
+#define PMAPPROC_NULL 0
+#define PMAPPROC_SET 1
+#define PMAPPROC_UNSET 2
+#define PMAPPROC_GETPORT 3
+#define PMAPPROC_DUMP 4
+#define PMAPPROC_CALLIT 5
diff --git a/sys/nfs/krpc_subr.c b/sys/nfs/krpc_subr.c
new file mode 100644
index 0000000..2127d46
--- /dev/null
+++ b/sys/nfs/krpc_subr.c
@@ -0,0 +1,481 @@
+/* $NetBSD: krpc_subr.c,v 1.12.4.1 1996/06/07 00:52:26 cgd Exp $ */
+/* $FreeBSD$ */
+
+/*
+ * Copyright (c) 1995 Gordon Ross, Adam Glass
+ * Copyright (c) 1992 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * partially based on:
+ * libnetboot/rpc.c
+ * @(#) Header: rpc.c,v 1.12 93/09/28 08:31:56 leres Exp (LBL)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/krpc.h>
+#include <nfs/xdr_subs.h>
+
+/*
+ * Kernel support for Sun RPC
+ *
+ * Used currently for bootstrapping in nfs diskless configurations.
+ */
+
+/*
+ * Generic RPC headers
+ */
+
+struct auth_info {
+ u_int32_t authtype; /* auth type */
+ u_int32_t authlen; /* auth length */
+};
+
+struct auth_unix {
+ int32_t ua_time;
+ int32_t ua_hostname; /* null */
+ int32_t ua_uid;
+ int32_t ua_gid;
+ int32_t ua_gidlist; /* null */
+};
+
+struct rpc_call {
+ u_int32_t rp_xid; /* request transaction id */
+ int32_t rp_direction; /* call direction (0) */
+ u_int32_t rp_rpcvers; /* rpc version (2) */
+ u_int32_t rp_prog; /* program */
+ u_int32_t rp_vers; /* version */
+ u_int32_t rp_proc; /* procedure */
+ struct auth_info rpc_auth;
+ struct auth_unix rpc_unix;
+ struct auth_info rpc_verf;
+};
+
+struct rpc_reply {
+ u_int32_t rp_xid; /* request transaction id */
+ int32_t rp_direction; /* call direction (1) */
+ int32_t rp_astatus; /* accept status (0: accepted) */
+ union {
+ u_int32_t rpu_errno;
+ struct {
+ struct auth_info rok_auth;
+ u_int32_t rok_status;
+ } rpu_rok;
+ } rp_u;
+};
+#define rp_errno rp_u.rpu_errno
+#define rp_auth rp_u.rpu_rok.rok_auth
+#define rp_status rp_u.rpu_rok.rok_status
+
+#define MIN_REPLY_HDR 16 /* xid, dir, astat, errno */
+
+/*
+ * What is the longest we will wait before re-sending a request?
+ * Note this is also the frequency of "RPC timeout" messages.
+ * The re-send loop count sup linearly to this maximum, so the
+ * first complaint will happen after (1+2+3+4+5)=15 seconds.
+ */
+#define MAX_RESEND_DELAY 5 /* seconds */
+
+/*
+ * Call portmap to lookup a port number for a particular rpc program
+ * Returns non-zero error on failure.
+ */
+int
+krpc_portmap(sin, prog, vers, portp, procp)
+ struct sockaddr_in *sin; /* server address */
+ u_int prog, vers; /* host order */
+ u_int16_t *portp; /* network order */
+ struct proc *procp;
+{
+ struct sdata {
+ u_int32_t prog; /* call program */
+ u_int32_t vers; /* call version */
+ u_int32_t proto; /* call protocol */
+ u_int32_t port; /* call port (unused) */
+ } *sdata;
+ struct rdata {
+ u_int16_t pad;
+ u_int16_t port;
+ } *rdata;
+ struct mbuf *m;
+ int error;
+
+ /* The portmapper port is fixed. */
+ if (prog == PMAPPROG) {
+ *portp = htons(PMAPPORT);
+ return 0;
+ }
+
+ m = m_get(M_WAIT, MT_DATA);
+ if (m == NULL)
+ return ENOBUFS;
+ sdata = mtod(m, struct sdata *);
+ m->m_len = sizeof(*sdata);
+
+ /* Do the RPC to get it. */
+ sdata->prog = txdr_unsigned(prog);
+ sdata->vers = txdr_unsigned(vers);
+ sdata->proto = txdr_unsigned(IPPROTO_UDP);
+ sdata->port = 0;
+
+ sin->sin_port = htons(PMAPPORT);
+ error = krpc_call(sin, PMAPPROG, PMAPVERS,
+ PMAPPROC_GETPORT, &m, NULL, procp);
+ if (error)
+ return error;
+
+ if (m->m_len < sizeof(*rdata)) {
+ m = m_pullup(m, sizeof(*rdata));
+ if (m == NULL)
+ return ENOBUFS;
+ }
+ rdata = mtod(m, struct rdata *);
+ *portp = rdata->port;
+
+ m_freem(m);
+ return 0;
+}
+
+/*
+ * Do a remote procedure call (RPC) and wait for its reply.
+ * If from_p is non-null, then we are doing broadcast, and
+ * the address from whence the response came is saved there.
+ */
+int
+krpc_call(sa, prog, vers, func, data, from_p, procp)
+ struct sockaddr_in *sa;
+ u_int prog, vers, func;
+ struct mbuf **data; /* input/output */
+ struct sockaddr **from_p; /* output */
+ struct proc *procp;
+{
+ struct socket *so;
+ struct sockaddr_in *sin, ssin;
+ struct sockaddr *from;
+ struct mbuf *m, *nam, *mhead;
+ struct rpc_call *call;
+ struct rpc_reply *reply;
+ struct sockopt sopt;
+ struct timeval tv;
+ struct uio auio;
+ int error, rcvflg, timo, secs, len;
+ static u_int32_t xid = ~0xFF;
+ u_int16_t tport;
+
+ /*
+ * Validate address family.
+ * Sorry, this is INET specific...
+ */
+ if (sa->sin_family != AF_INET)
+ return (EAFNOSUPPORT);
+
+ /* Free at end if not null. */
+ nam = mhead = NULL;
+ from = NULL;
+
+ /*
+ * Create socket and set its recieve timeout.
+ */
+ if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, procp)))
+ goto out;
+
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_RCVTIMEO;
+ sopt.sopt_val = &tv;
+ sopt.sopt_valsize = sizeof tv;
+
+ if ((error = sosetopt(so, &sopt)) != 0)
+ goto out;
+
+ /*
+ * Enable broadcast if necessary.
+ */
+ if (from_p) {
+ int on = 1;
+ sopt.sopt_name = SO_BROADCAST;
+ sopt.sopt_val = &on;
+ sopt.sopt_valsize = sizeof on;
+ if ((error = sosetopt(so, &sopt)) != 0)
+ goto out;
+ }
+
+ /*
+ * Bind the local endpoint to a reserved port,
+ * because some NFS servers refuse requests from
+ * non-reserved (non-privileged) ports.
+ */
+ sin = &ssin;
+ bzero(sin, sizeof *sin);
+ sin->sin_len = sizeof(*sin);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ tport = IPPORT_RESERVED;
+ do {
+ tport--;
+ sin->sin_port = htons(tport);
+ error = sobind(so, (struct sockaddr *)sin, procp);
+ } while (error == EADDRINUSE &&
+ tport > IPPORT_RESERVED / 2);
+ if (error) {
+ printf("bind failed\n");
+ goto out;
+ }
+
+ /*
+ * Setup socket address for the server.
+ */
+
+ /*
+ * Prepend RPC message header.
+ */
+ mhead = m_gethdr(M_WAIT, MT_DATA);
+ mhead->m_next = *data;
+ call = mtod(mhead, struct rpc_call *);
+ mhead->m_len = sizeof(*call);
+ bzero((caddr_t)call, sizeof(*call));
+ /* rpc_call part */
+ xid++;
+ call->rp_xid = txdr_unsigned(xid);
+ /* call->rp_direction = 0; */
+ call->rp_rpcvers = txdr_unsigned(2);
+ call->rp_prog = txdr_unsigned(prog);
+ call->rp_vers = txdr_unsigned(vers);
+ call->rp_proc = txdr_unsigned(func);
+ /* rpc_auth part (auth_unix as root) */
+ call->rpc_auth.authtype = txdr_unsigned(RPCAUTH_UNIX);
+ call->rpc_auth.authlen = txdr_unsigned(sizeof(struct auth_unix));
+ /* rpc_verf part (auth_null) */
+ call->rpc_verf.authtype = 0;
+ call->rpc_verf.authlen = 0;
+
+ /*
+ * Setup packet header
+ */
+ len = 0;
+ m = mhead;
+ while (m) {
+ len += m->m_len;
+ m = m->m_next;
+ }
+ mhead->m_pkthdr.len = len;
+ mhead->m_pkthdr.rcvif = NULL;
+
+ /*
+ * Send it, repeatedly, until a reply is received,
+ * but delay each re-send by an increasing amount.
+ * If the delay hits the maximum, start complaining.
+ */
+ timo = 0;
+ for (;;) {
+ /* Send RPC request (or re-send). */
+ m = m_copym(mhead, 0, M_COPYALL, M_WAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto out;
+ }
+ error = sosend(so, (struct sockaddr *)sa, NULL, m,
+ NULL, 0, 0);
+ if (error) {
+ printf("krpc_call: sosend: %d\n", error);
+ goto out;
+ }
+ m = NULL;
+
+ /* Determine new timeout. */
+ if (timo < MAX_RESEND_DELAY)
+ timo++;
+ else
+ printf("RPC timeout for server 0x%lx\n",
+ (u_long)ntohl(sa->sin_addr.s_addr));
+
+ /*
+ * Wait for up to timo seconds for a reply.
+ * The socket receive timeout was set to 1 second.
+ */
+ secs = timo;
+ while (secs > 0) {
+ if (from) {
+ FREE(from, M_SONAME);
+ from = NULL;
+ }
+ if (m) {
+ m_freem(m);
+ m = NULL;
+ }
+ bzero(&auio,sizeof(auio));
+ auio.uio_resid = len = 1<<16;
+ rcvflg = 0;
+ error = soreceive(so, &from, &auio, &m, NULL, &rcvflg);
+ if (error == EWOULDBLOCK) {
+ secs--;
+ continue;
+ }
+ if (error)
+ goto out;
+ len -= auio.uio_resid;
+
+ /* Does the reply contain at least a header? */
+ if (len < MIN_REPLY_HDR)
+ continue;
+ if (m->m_len < MIN_REPLY_HDR)
+ continue;
+ reply = mtod(m, struct rpc_reply *);
+
+ /* Is it the right reply? */
+ if (reply->rp_direction != txdr_unsigned(RPC_REPLY))
+ continue;
+
+ if (reply->rp_xid != txdr_unsigned(xid))
+ continue;
+
+ /* Was RPC accepted? (authorization OK) */
+ if (reply->rp_astatus != 0) {
+ error = fxdr_unsigned(u_int32_t, reply->rp_errno);
+ printf("rpc denied, error=%d\n", error);
+ continue;
+ }
+
+ /* Did the call succeed? */
+ if (reply->rp_status != 0) {
+ error = fxdr_unsigned(u_int32_t, reply->rp_status);
+ if (error == RPC_PROGMISMATCH) {
+ error = EBADRPC;
+ goto out;
+ }
+ printf("rpc denied, status=%d\n", error);
+ continue;
+ }
+
+ goto gotreply; /* break two levels */
+
+ } /* while secs */
+ } /* forever send/receive */
+
+ error = ETIMEDOUT;
+ goto out;
+
+ gotreply:
+
+ /*
+ * Get RPC reply header into first mbuf,
+ * get its length, then strip it off.
+ */
+ len = sizeof(*reply);
+ if (m->m_len < len) {
+ m = m_pullup(m, len);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto out;
+ }
+ }
+ reply = mtod(m, struct rpc_reply *);
+ if (reply->rp_auth.authtype != 0) {
+ len += fxdr_unsigned(u_int32_t, reply->rp_auth.authlen);
+ len = (len + 3) & ~3; /* XXX? */
+ }
+ m_adj(m, len);
+
+ /* result */
+ *data = m;
+ if (from_p) {
+ *from_p = from;
+ from = NULL;
+ }
+
+ out:
+ if (mhead) m_freem(mhead);
+ if (from) free(from, M_SONAME);
+ soclose(so);
+ return error;
+}
+
+/*
+ * eXternal Data Representation routines.
+ * (but with non-standard args...)
+ */
+
+/*
+ * String representation for RPC.
+ */
+struct xdr_string {
+ u_int32_t len; /* length without null or padding */
+ char data[4]; /* data (longer, of course) */
+ /* data is padded to a long-word boundary */
+};
+
+struct mbuf *
+xdr_string_encode(str, len)
+ char *str;
+ int len;
+{
+ struct mbuf *m;
+ struct xdr_string *xs;
+ int dlen; /* padded string length */
+ int mlen; /* message length */
+
+ dlen = (len + 3) & ~3;
+ mlen = dlen + 4;
+
+ if (mlen > MCLBYTES) /* If too big, we just can't do it. */
+ return (NULL);
+
+ m = m_get(M_WAIT, MT_DATA);
+ if (mlen > MLEN) {
+ MCLGET(m, M_WAIT);
+ if ((m->m_flags & M_EXT) == 0) {
+ (void) m_free(m); /* There can be only one. */
+ return (NULL);
+ }
+ }
+ xs = mtod(m, struct xdr_string *);
+ m->m_len = mlen;
+ xs->len = txdr_unsigned(len);
+ bcopy(str, xs->data, len);
+ return (m);
+}
diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h
new file mode 100644
index 0000000..75a3982
--- /dev/null
+++ b/sys/nfs/nfs.h
@@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.4 (Berkeley) 5/1/95
+ * $FreeBSD$
+ */
+
+#ifndef _NFS_NFS_H_
+#define _NFS_NFS_H_
+
+#ifdef KERNEL
+#include "opt_nfs.h"
+#endif
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */
+#define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */
+#define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#ifndef NFS_MINATTRTIMO
+#define NFS_MINATTRTIMO 3 /* VREG attrib cache timeout in sec */
+#endif
+#ifndef NFS_MAXATTRTIMO
+#define NFS_MAXATTRTIMO 60
+#endif
+#ifndef NFS_MINDIRATTRTIMO
+#define NFS_MINDIRATTRTIMO 30 /* VDIR attrib cache timeout in sec */
+#endif
+#ifndef NFS_MAXDIRATTRTIMO
+#define NFS_MAXDIRATTRTIMO 60
+#endif
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_READDIRSIZE 8192 /* Def. readdir size */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */
+#define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */
+#ifndef NFS_GATHERDELAY
+#define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */
+#endif
+#define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */
+#ifdef KERNEL
+#define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */
+#endif
+
+/*
+ * Oddballs
+ */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+#define NFS_CMPFH(n, f, s) \
+ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s)))
+#define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3)
+#define NFS_SRVMAXDATA(n) \
+ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \
+ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA)
+
+/*
+ * XXX
+ * The B_INVAFTERWRITE flag should be set to whatever is required by the
+ * buffer cache code to say "Invalidate the block after it is written back".
+ */
+#define B_INVAFTERWRITE B_NOCACHE
+
+/*
+ * The IO_METASYNC flag should be implemented for local file systems.
+ * (Until then, it is nothin at all.)
+ */
+#ifndef IO_METASYNC
+#define IO_METASYNC 0
+#endif
+
+/*
+ * Expected allocation sizes for major data structures. If the actual size
+ * of the structure exceeds these sizes, then malloc() will be allocating
+ * almost twice the memory required. This is used in nfs_init() to warn
+ * the sysadmin that the size of a structure should be reduced.
+ * (These sizes are always a power of 2. If the kernel malloc() changes
+ * to one that does not allocate space in powers of 2 size, then this all
+ * becomes bunk!)
+ */
+#define NFS_SVCALLOC 256
+#define NFS_UIDALLOC 128
+
+/*
+ * Arguments to mount NFS
+ */
+#define NFS_ARGSVERSION 3 /* change when nfs_args changes */
+struct nfs_args {
+ int version; /* args structure version number */
+ struct sockaddr *addr; /* file server address */
+ int addrlen; /* length of address */
+ int sotype; /* Socket type */
+ int proto; /* and Protocol */
+ u_char *fh; /* File handle to be mounted */
+ int fhsize; /* Size, in bytes, of fh */
+ int flags; /* flags */
+ int wsize; /* write size in bytes */
+ int rsize; /* read size in bytes */
+ int readdirsize; /* readdir size in bytes */
+ int timeo; /* initial timeout in .1 secs */
+ int retrans; /* times to retry send */
+ int maxgrouplist; /* Max. size of group list */
+ int readahead; /* # of blocks to readahead */
+ int leaseterm; /* Term (sec) of lease */
+ int deadthresh; /* Retrans threshold */
+ char *hostname; /* server's name */
+ int acregmin; /* cache attrs for reg files min time */
+ int acregmax; /* cache attrs for reg files max time */
+ int acdirmin; /* cache attrs for dirs min time */
+ int acdirmax; /* cache attrs for dirs max time */
+};
+
+/*
+ * NFS mount option flags
+ */
+#define NFSMNT_SOFT 0x00000001 /* soft mount (hard is default) */
+#define NFSMNT_WSIZE 0x00000002 /* set write size */
+#define NFSMNT_RSIZE 0x00000004 /* set read size */
+#define NFSMNT_TIMEO 0x00000008 /* set initial timeout */
+#define NFSMNT_RETRANS 0x00000010 /* set number of request retries */
+#define NFSMNT_MAXGRPS 0x00000020 /* set maximum grouplist size */
+#define NFSMNT_INT 0x00000040 /* allow interrupts on hard mount */
+#define NFSMNT_NOCONN 0x00000080 /* Don't Connect the socket */
+#define NFSMNT_NQNFS 0x00000100 /* Use Nqnfs protocol */
+#define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */
+#define NFSMNT_KERB 0x00000400 /* Use Kerberos authentication */
+#define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */
+#define NFSMNT_LEASETERM 0x00001000 /* set lease term (nqnfs) */
+#define NFSMNT_READAHEAD 0x00002000 /* set read ahead */
+#define NFSMNT_DEADTHRESH 0x00004000 /* set dead server retry thresh */
+#define NFSMNT_RESVPORT 0x00008000 /* Allocate a reserved port */
+#define NFSMNT_RDIRPLUS 0x00010000 /* Use Readdirplus for V3 */
+#define NFSMNT_READDIRSIZE 0x00020000 /* Set readdir size */
+#define NFSMNT_ACREGMIN 0x00040000
+#define NFSMNT_ACREGMAX 0x00080000
+#define NFSMNT_ACDIRMIN 0x00100000
+#define NFSMNT_ACDIRMAX 0x00200000
+
+#define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier for V3 */
+#define NFSSTA_GOTPATHCONF 0x00080000 /* Got the V3 pathconf info */
+#define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */
+#define NFSSTA_MNTD 0x00200000 /* Mnt server for mnt point */
+#define NFSSTA_DISMINPROG 0x00400000 /* Dismount in progress */
+#define NFSSTA_DISMNT 0x00800000 /* Dismounted */
+#define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */
+#define NFSSTA_WANTSND 0x02000000 /* Want above */
+#define NFSSTA_RCVLOCK 0x04000000 /* Rcv socket lock */
+#define NFSSTA_WANTRCV 0x08000000 /* Want above */
+#define NFSSTA_WAITAUTH 0x10000000 /* Wait for authentication */
+#define NFSSTA_HASAUTH 0x20000000 /* Has authenticator */
+#define NFSSTA_WANTAUTH 0x40000000 /* Wants an authenticator */
+#define NFSSTA_AUTHERR 0x80000000 /* Authentication error */
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client addr for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_int32_t nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ u_char *nsd_authstr; /* Auth string (ret) */
+ int nsd_verflen; /* and the verfier */
+ u_char *nsd_verfstr;
+ struct timeval nsd_timestamp; /* timestamp from verifier */
+ u_int32_t nsd_ttl; /* credential ttl (sec) */
+ NFSKERBKEY_T nsd_key; /* Session key */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ u_char *ncd_authstr; /* Authenticator string */
+ int ncd_verflen; /* and the verifier */
+ u_char *ncd_verfstr;
+ NFSKERBKEY_T ncd_key; /* Session key */
+};
+
+/*
+ * XXX to allow amd to include nfs.h without nfsproto.h
+ */
+#ifdef NFS_NPROCS
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+ int srvvop_writes;
+ int accesscache_hits;
+ int accesscache_misses;
+};
+#endif
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * fs.nfs sysctl(3) identifiers
+ */
+#define NFS_NFSSTATS 1 /* struct: struct nfsstats */
+#define NFS_NFSPRIVPORT 2 /* int: prohibit nfs to resvports */
+
+#define FS_NFS_NAMES { \
+ { 0, 0 }, \
+ { "nfsstats", CTLTYPE_STRUCT }, \
+ { "nfsprivport", CTLTYPE_INT }, \
+}
+
+#ifdef KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_NFSREQ);
+MALLOC_DECLARE(M_NFSDIROFF);
+MALLOC_DECLARE(M_NFSRVDESC);
+MALLOC_DECLARE(M_NFSUID);
+MALLOC_DECLARE(M_NQLEASE);
+MALLOC_DECLARE(M_NFSD);
+MALLOC_DECLARE(M_NFSBIGFH);
+MALLOC_DECLARE(M_NFSHASH);
+#endif
+
+#ifdef ZONE_INTERRUPT
+extern vm_zone_t nfsmount_zone;
+#endif
+
+extern struct callout_handle nfs_timer_handle;
+
+struct uio; struct buf; struct vattr; struct nameidata; /* XXX */
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#define NFSINT_SIGMASK(set) \
+ (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \
+ SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \
+ SIGISMEMBER(set, SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ TAILQ_ENTRY(nfsreq) r_chain;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_int32_t r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ u_int32_t r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/*
+ * Queue head for nfsreq's
+ */
+extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq;
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#ifndef NFS_UIDHASHSIZ
+#define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */
+#endif
+#define NUIDHASH(sock, uid) \
+ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ])
+#ifndef NFS_WDELAYHASHSIZ
+#define NFS_WDELAYHASHSIZ 16 /* and with this */
+#endif
+#define NWDELAYHASH(sock, f) \
+ (&(sock)->ns_wdelayhashtbl[(*((u_int32_t *)(f))) % NFS_WDELAYHASHSIZ])
+#ifndef NFS_MUIDHASHSIZ
+#define NFS_MUIDHASHSIZ 63 /* Tune the size of nfsmount with this */
+#endif
+#define NMUIDHASH(nmp, uid) \
+ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ])
+#define NFSNOHASH(fhsum) \
+ (&nfsnodehashtbl[(fhsum) & nfsnodehash])
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_int32_t had_inetaddr;
+ struct sockaddr *had_nam;
+};
+
+struct nfsuid {
+ TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */
+ LIST_ENTRY(nfsuid) nu_hash; /* Hash list */
+ int nu_flag; /* Flags */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+ int nu_expire; /* Expiry time (sec) */
+ struct timeval nu_timestamp; /* Kerb. timestamp */
+ u_int32_t nu_nickname; /* Nickname on server */
+ NFSKERBKEY_T nu_key; /* and session key */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+#define NU_NAM 0x2
+#define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO)
+
+struct nfsrv_rec {
+ STAILQ_ENTRY(nfsrv_rec) nr_link;
+ struct sockaddr *nr_address;
+ struct mbuf *nr_packet;
+};
+
+struct nfssvc_sock {
+ TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */
+ TAILQ_HEAD(, nfsuid) ns_uidlruhead;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ struct sockaddr *ns_nam;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ STAILQ_HEAD(, nfsrv_rec) ns_rec;
+ struct mbuf *ns_frag;
+ int ns_flag;
+ int ns_solock;
+ int ns_cc;
+ int ns_reclen;
+ int ns_numuids;
+ u_int32_t ns_sref;
+ LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */
+ LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ];
+ LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_LASTFRAG 0x20
+#define SLP_ALLFLAGS 0xff
+
+extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead;
+extern int nfssvc_sockhead_flag;
+#define SLP_INIT 0x01
+#define SLP_WANTINIT 0x02
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */
+ int nfsd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nfsd_slp; /* Current socket */
+ int nfsd_authlen; /* Authenticator len */
+ u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ int nfsd_verflen; /* and the Verifier */
+ u_char nfsd_verfstr[RPCVERF_MAXSIZ];
+ struct proc *nfsd_procp; /* Proc ptr */
+ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */
+};
+
+/* Bits for "nfsd_flag" */
+#define NFSD_WAITING 0x01
+#define NFSD_REQINPROG 0x02
+#define NFSD_NEEDAUTH 0x04
+#define NFSD_AUTHFAIL 0x08
+
+/*
+ * This structure is used by the server for describing each request.
+ * Some fields are used only when write request gathering is performed.
+ */
+struct nfsrv_descript {
+ u_quad_t nd_time; /* Write deadline (usec) */
+ off_t nd_off; /* Start byte offset */
+ off_t nd_eoff; /* and end byte offset */
+ LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */
+ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */
+ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */
+ struct mbuf *nd_mrep; /* Request mbuf list */
+ struct mbuf *nd_md; /* Current dissect mbuf */
+ struct mbuf *nd_mreq; /* Reply mbuf list */
+ struct sockaddr *nd_nam; /* and socket addr */
+ struct sockaddr *nd_nam2; /* return socket addr */
+ caddr_t nd_dpos; /* Current dissect pos */
+ u_int32_t nd_procnum; /* RPC # */
+ int nd_stable; /* storage type */
+ int nd_flag; /* nd_flag */
+ int nd_len; /* Length of this write */
+ int nd_repstat; /* Reply status */
+ u_int32_t nd_retxid; /* Reply xid */
+ u_int32_t nd_duration; /* Lease duration */
+ struct timeval nd_starttime; /* Time RPC initiated */
+ fhandle_t nd_fh; /* File handle */
+ struct ucred nd_cr; /* Credentials */
+};
+
+/* Bits for "nd_flag" */
+#define ND_READ LEASE_READ
+#define ND_WRITE LEASE_WRITE
+#define ND_CHECK 0x04
+#define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK)
+#define ND_NFSV3 0x08
+#define ND_NQNFS 0x10
+#define ND_KERBNICK 0x20
+#define ND_KERBFULL 0x40
+#define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL)
+
+extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head;
+extern int nfsd_head_flag;
+#define NFSD_CHECKSLP 0x01
+
+/*
+ * These macros compare nfsrv_descript structures.
+ */
+#define NFSW_CONTIG(o, n) \
+ ((o)->nd_eoff >= (n)->nd_off && \
+ !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH))
+
+#define NFSW_SAMECRED(o, n) \
+ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \
+ !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \
+ sizeof (struct ucred)))
+
+/*
+ * Defines for WebNFS
+ */
+
+#define WEBNFS_ESC_CHAR '%'
+#define WEBNFS_SPECCHAR_START 0x80
+
+#define WEBNFS_NATIVE_CHAR 0x80
+/*
+ * ..
+ * Possibly more here in the future.
+ */
+
+/*
+ * Macro for converting escape characters in WebNFS pathnames.
+ * Should really be in libkern.
+ */
+
+#define HEXTOC(c) \
+ ((c) >= 'a' ? ((c) - ('a' - 10)) : \
+ ((c) >= 'A' ? ((c) - ('A' - 10)) : ((c) - '0')))
+#define HEXSTRTOI(p) \
+ ((HEXTOC(p[0]) << 4) + HEXTOC(p[1]))
+
+#ifdef NFS_DEBUG
+
+extern int nfs_debug;
+#define NFS_DEBUG_ASYNCIO 1 /* asynchronous i/o */
+#define NFS_DEBUG_WG 2 /* server write gathering */
+#define NFS_DEBUG_RC 4 /* server request caching */
+
+#define NFS_DPF(cat, args) \
+ do { \
+ if (nfs_debug & NFS_DEBUG_##cat) printf args; \
+ } while (0)
+
+#else
+
+#define NFS_DPF(cat, args)
+
+#endif
+
+u_quad_t nfs_curusec __P((void));
+int nfs_init __P((struct vfsconf *vfsp));
+int nfs_uninit __P((struct vfsconf *vfsp));
+int nfs_reply __P((struct nfsreq *));
+int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int));
+int nfs_send __P((struct socket *, struct sockaddr *, struct mbuf *,
+ struct nfsreq *));
+int nfs_rephead __P((int, struct nfsrv_descript *, struct nfssvc_sock *,
+ int, int, u_quad_t *, struct mbuf **, struct mbuf **,
+ caddr_t *));
+int nfs_sndlock __P((struct nfsreq *));
+void nfs_sndunlock __P((struct nfsreq *));
+int nfs_slplock __P((struct nfssvc_sock *, int));
+void nfs_slpunlock __P((struct nfssvc_sock *));
+int nfs_disct __P((struct mbuf **, caddr_t *, int, int, caddr_t *));
+int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *,
+ int));
+int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *));
+int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *,
+ int *));
+int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
+ struct ucred *cred, struct proc *procp));
+int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *));
+int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *));
+int nfs_doio __P((struct buf *, struct ucred *, struct proc *));
+int nfs_readlinkrpc __P((struct vnode *, struct uio *, struct ucred *));
+int nfs_sigintr __P((struct nfsmount *, struct nfsreq *, struct proc *));
+int nfs_readdirplusrpc __P((struct vnode *, struct uio *, struct ucred *));
+int nfsm_disct __P((struct mbuf **, caddr_t *, int, int, caddr_t *));
+void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *,
+ struct nfs_fattr *));
+void nfsm_srvwcc __P((struct nfsrv_descript *, int, struct vattr *, int,
+ struct vattr *, struct mbuf **, char **));
+void nfsm_srvpostopattr __P((struct nfsrv_descript *, int, struct vattr *,
+ struct mbuf **, char **));
+int netaddr_match __P((int, union nethostaddr *, struct sockaddr *));
+int nfs_request __P((struct vnode *, struct mbuf *, int, struct proc *,
+ struct ucred *, struct mbuf **, struct mbuf **,
+ caddr_t *));
+int nfs_loadattrcache __P((struct vnode **, struct mbuf **, caddr_t *,
+ struct vattr *));
+int nfs_namei __P((struct nameidata *, fhandle_t *, int,
+ struct nfssvc_sock *, struct sockaddr *, struct mbuf **,
+ caddr_t *, struct vnode **, struct proc *, int, int));
+void nfsm_adj __P((struct mbuf *, int, int));
+int nfsm_mbuftouio __P((struct mbuf **, struct uio *, int, caddr_t *));
+void nfsrv_initcache __P((void));
+int nfs_getauth __P((struct nfsmount *, struct nfsreq *, struct ucred *,
+ char **, int *, char *, int *, NFSKERBKEY_T));
+int nfs_getnickauth __P((struct nfsmount *, struct ucred *, char **,
+ int *, char *, int));
+int nfs_savenickauth __P((struct nfsmount *, struct ucred *, int,
+ NFSKERBKEY_T, struct mbuf **, char **,
+ struct mbuf *));
+int nfs_adv __P((struct mbuf **, caddr_t *, int, int));
+void nfs_nhinit __P((void));
+void nfs_timer __P((void*));
+u_long nfs_hash __P((nfsfh_t *, int));
+int nfsrv_dorec __P((struct nfssvc_sock *, struct nfsd *,
+ struct nfsrv_descript **));
+int nfsrv_getcache __P((struct nfsrv_descript *, struct nfssvc_sock *,
+ struct mbuf **));
+void nfsrv_updatecache __P((struct nfsrv_descript *, int, struct mbuf *));
+void nfsrv_cleancache __P((void));
+int nfs_connect __P((struct nfsmount *, struct nfsreq *));
+void nfs_disconnect __P((struct nfsmount *));
+void nfs_safedisconnect __P((struct nfsmount *));
+int nfs_getattrcache __P((struct vnode *, struct vattr *));
+int nfsm_strtmbuf __P((struct mbuf **, char **, const char *, long));
+int nfs_bioread __P((struct vnode *, struct uio *, int, struct ucred *));
+int nfsm_uiotombuf __P((struct uio *, struct mbuf **, int, caddr_t *));
+void nfsrv_init __P((int));
+void nfs_clearcommit __P((struct mount *));
+int nfsrv_errmap __P((struct nfsrv_descript *, int));
+void nfsrvw_sort __P((gid_t *, int));
+void nfsrv_setcred __P((struct ucred *, struct ucred *));
+int nfs_writebp __P((struct buf *, int, struct proc *));
+int nfsrv_object_create __P((struct vnode *));
+void nfsrv_wakenfsd __P((struct nfssvc_sock *slp));
+int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *,
+ struct proc *, struct mbuf **));
+int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *,
+ struct proc *p));
+
+int nfsrv3_access __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *,
+ struct nfssvc_sock *, struct sockaddr *, int *,
+ int, int));
+int nfsrv_setpublicfs __P((struct mount *, struct netexport *,
+ struct export_args *));
+int nfs_ispublicfh __P((fhandle_t *));
+int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_pathconf __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp, struct proc *procp,
+ struct mbuf **mrq));
+int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_readdir __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp, struct proc *procp,
+ struct mbuf **mrq));
+int nfsrv_readlink __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp, struct proc *procp,
+ struct mbuf **mrq));
+int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_setattr __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_statfs __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_symlink __P((struct nfsrv_descript *nfsd,
+ struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
+ struct proc *procp, struct mbuf **mrq));
+void nfsrv_rcv __P((struct socket *so, void *arg, int waitflag));
+void nfsrv_slpderef __P((struct nfssvc_sock *slp));
+#endif /* KERNEL */
+
+#endif
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
new file mode 100644
index 0000000..8e99d98
--- /dev/null
+++ b/sys/nfs/nfs_bio.c
@@ -0,0 +1,1555 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+#include <vm/vnode_pager.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsnode.h>
+
+static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
+ struct proc *p));
+
+extern int nfs_numasync;
+extern int nfs_pbuf_freecnt;
+extern struct nfsstats nfsstats;
+
+/*
+ * Vnode op for VM getpages.
+ */
+int
+nfs_getpages(ap)
+ struct vop_getpages_args /* {
+ struct vnode *a_vp;
+ vm_page_t *a_m;
+ int a_count;
+ int a_reqpage;
+ vm_ooffset_t a_offset;
+ } */ *ap;
+{
+ int i, error, nextoff, size, toff, count, npages;
+ struct uio uio;
+ struct iovec iov;
+ vm_offset_t kva;
+ struct buf *bp;
+ struct vnode *vp;
+ struct proc *p;
+ struct ucred *cred;
+ struct nfsmount *nmp;
+ vm_page_t *pages;
+
+ vp = ap->a_vp;
+ p = curproc; /* XXX */
+ cred = curproc->p_ucred; /* XXX */
+ nmp = VFSTONFS(vp->v_mount);
+ pages = ap->a_m;
+ count = ap->a_count;
+
+ if (vp->v_object == NULL) {
+ printf("nfs_getpages: called with non-merged cache vnode??\n");
+ return VM_PAGER_ERROR;
+ }
+
+ if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (void)nfs_fsinfo(nmp, vp, cred, p);
+
+ npages = btoc(count);
+
+ /*
+ * If the requested page is partially valid, just return it and
+ * allow the pager to zero-out the blanks. Partially valid pages
+ * can only occur at the file EOF.
+ */
+
+ {
+ vm_page_t m = pages[ap->a_reqpage];
+
+ if (m->valid != 0) {
+ /* handled by vm_fault now */
+ /* vm_page_zero_invalid(m, TRUE); */
+ for (i = 0; i < npages; ++i) {
+ if (i != ap->a_reqpage)
+ vnode_pager_freepage(pages[i]);
+ }
+ return(0);
+ }
+ }
+
+ /*
+ * We use only the kva address for the buffer, but this is extremely
+ * convienient and fast.
+ */
+ bp = getpbuf(&nfs_pbuf_freecnt);
+
+ kva = (vm_offset_t) bp->b_data;
+ pmap_qenter(kva, pages, npages);
+
+ iov.iov_base = (caddr_t) kva;
+ iov.iov_len = count;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
+ uio.uio_resid = count;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_READ;
+ uio.uio_procp = p;
+
+ error = nfs_readrpc(vp, &uio, cred);
+ pmap_qremove(kva, npages);
+
+ relpbuf(bp, &nfs_pbuf_freecnt);
+
+ if (error && (uio.uio_resid == count)) {
+ printf("nfs_getpages: error %d\n", error);
+ for (i = 0; i < npages; ++i) {
+ if (i != ap->a_reqpage)
+ vnode_pager_freepage(pages[i]);
+ }
+ return VM_PAGER_ERROR;
+ }
+
+ /*
+ * Calculate the number of bytes read and validate only that number
+ * of bytes. Note that due to pending writes, size may be 0. This
+ * does not mean that the remaining data is invalid!
+ */
+
+ size = count - uio.uio_resid;
+
+ for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
+ vm_page_t m;
+ nextoff = toff + PAGE_SIZE;
+ m = pages[i];
+
+ m->flags &= ~PG_ZERO;
+
+ if (nextoff <= size) {
+ /*
+ * Read operation filled an entire page
+ */
+ m->valid = VM_PAGE_BITS_ALL;
+ vm_page_undirty(m);
+ } else if (size > toff) {
+ /*
+ * Read operation filled a partial page.
+ */
+ m->valid = 0;
+ vm_page_set_validclean(m, 0, size - toff);
+ /* handled by vm_fault now */
+ /* vm_page_zero_invalid(m, TRUE); */
+ }
+
+ if (i != ap->a_reqpage) {
+ /*
+ * Whether or not to leave the page activated is up in
+ * the air, but we should put the page on a page queue
+ * somewhere (it already is in the object). Result:
+ * It appears that emperical results show that
+ * deactivating pages is best.
+ */
+
+ /*
+ * Just in case someone was asking for this page we
+ * now tell them that it is ok to use.
+ */
+ if (!error) {
+ if (m->flags & PG_WANTED)
+ vm_page_activate(m);
+ else
+ vm_page_deactivate(m);
+ vm_page_wakeup(m);
+ } else {
+ vnode_pager_freepage(m);
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Vnode op for VM putpages.
+ */
+int
+nfs_putpages(ap)
+ struct vop_putpages_args /* {
+ struct vnode *a_vp;
+ vm_page_t *a_m;
+ int a_count;
+ int a_sync;
+ int *a_rtvals;
+ vm_ooffset_t a_offset;
+ } */ *ap;
+{
+ struct uio uio;
+ struct iovec iov;
+ vm_offset_t kva;
+ struct buf *bp;
+ int iomode, must_commit, i, error, npages, count;
+ off_t offset;
+ int *rtvals;
+ struct vnode *vp;
+ struct proc *p;
+ struct ucred *cred;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ vm_page_t *pages;
+
+ vp = ap->a_vp;
+ np = VTONFS(vp);
+ p = curproc; /* XXX */
+ cred = curproc->p_ucred; /* XXX */
+ nmp = VFSTONFS(vp->v_mount);
+ pages = ap->a_m;
+ count = ap->a_count;
+ rtvals = ap->a_rtvals;
+ npages = btoc(count);
+ offset = IDX_TO_OFF(pages[0]->pindex);
+
+ if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (void)nfs_fsinfo(nmp, vp, cred, p);
+
+ for (i = 0; i < npages; i++) {
+ rtvals[i] = VM_PAGER_AGAIN;
+ }
+
+ /*
+ * When putting pages, do not extend file past EOF.
+ */
+
+ if (offset + count > np->n_size) {
+ count = np->n_size - offset;
+ if (count < 0)
+ count = 0;
+ }
+
+ /*
+ * We use only the kva address for the buffer, but this is extremely
+ * convienient and fast.
+ */
+ bp = getpbuf(&nfs_pbuf_freecnt);
+
+ kva = (vm_offset_t) bp->b_data;
+ pmap_qenter(kva, pages, npages);
+
+ iov.iov_base = (caddr_t) kva;
+ iov.iov_len = count;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = offset;
+ uio.uio_resid = count;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_procp = p;
+
+ if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
+ iomode = NFSV3WRITE_UNSTABLE;
+ else
+ iomode = NFSV3WRITE_FILESYNC;
+
+ error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
+
+ pmap_qremove(kva, npages);
+ relpbuf(bp, &nfs_pbuf_freecnt);
+
+ if (!error) {
+ int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
+ for (i = 0; i < nwritten; i++) {
+ rtvals[i] = VM_PAGER_OK;
+ vm_page_undirty(pages[i]);
+ }
+ if (must_commit)
+ nfs_clearcommit(vp->v_mount);
+ }
+ return rtvals[0];
+}
+
+/*
+ * Vnode op for read using bio
+ */
+int
+nfs_bioread(vp, uio, ioflag, cred)
+ register struct vnode *vp;
+ register struct uio *uio;
+ int ioflag;
+ struct ucred *cred;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register int biosize, i;
+ struct buf *bp = 0, *rabp;
+ struct vattr vattr;
+ struct proc *p;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ daddr_t lbn, rabn;
+ int bcount;
+ int seqcount;
+ int nra, error = 0, n = 0, on = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("nfs_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */
+ return (EINVAL);
+ p = uio->uio_procp;
+
+ if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (void)nfs_fsinfo(nmp, vp, cred, p);
+ if (vp->v_type != VDIR &&
+ (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
+ return (EFBIG);
+ biosize = vp->v_mount->mnt_stat.f_iosize;
+ seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE);
+ /*
+ * For nfs, cache consistency can only be maintained approximately.
+ * Although RFC1094 does not specify the criteria, the following is
+ * believed to be compatible with the reference port.
+ * For nqnfs, full cache consistency is maintained within the loop.
+ * For nfs:
+ * If the file's modify time on the server has changed since the
+ * last read rpc or you have written to the file,
+ * you may have lost data cache consistency with the
+ * server, so flush all of the file's data out of the cache.
+ * Then force a getattr rpc to ensure that you have up to date
+ * attributes.
+ * NB: This implies that cache data can be read when up to
+ * NFS_ATTRTIMEO seconds out of date. If you find that you need current
+ * attributes this could be forced by setting n_attrstamp to 0 before
+ * the VOP_GETATTR() call.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
+ if (np->n_flag & NMODIFIED) {
+ if (vp->v_type != VREG) {
+ if (vp->v_type != VDIR)
+ panic("nfs: bioread, not dir");
+ nfs_invaldir(vp);
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ return (error);
+ }
+ np->n_attrstamp = 0;
+ error = VOP_GETATTR(vp, &vattr, cred, p);
+ if (error)
+ return (error);
+ np->n_mtime = vattr.va_mtime.tv_sec;
+ } else {
+ error = VOP_GETATTR(vp, &vattr, cred, p);
+ if (error)
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.tv_sec) {
+ if (vp->v_type == VDIR)
+ nfs_invaldir(vp);
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ return (error);
+ np->n_mtime = vattr.va_mtime.tv_sec;
+ }
+ }
+ }
+ do {
+
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, ND_READ)) {
+ do {
+ error = nqnfs_getlease(vp, ND_READ, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE) ||
+ ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
+ if (vp->v_type == VDIR)
+ nfs_invaldir(vp);
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
+ nfs_invaldir(vp);
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ return (error);
+ }
+ }
+ if (np->n_flag & NQNFSNONCACHE) {
+ switch (vp->v_type) {
+ case VREG:
+ return (nfs_readrpc(vp, uio, cred));
+ case VLNK:
+ return (nfs_readlinkrpc(vp, uio, cred));
+ case VDIR:
+ break;
+ default:
+ printf(" NQNFSNONCACHE: type %x unexpected\n",
+ vp->v_type);
+ };
+ }
+ switch (vp->v_type) {
+ case VREG:
+ nfsstats.biocache_reads++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize - 1);
+
+ /*
+ * Start the read ahead(s), as required.
+ */
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
+ for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
+ (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
+ rabn = lbn + 1 + nra;
+ if (!incore(vp, rabn)) {
+ rabp = nfs_getcacheblk(vp, rabn, biosize, p);
+ if (!rabp)
+ return (EINTR);
+ if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ vfs_busy_pages(rabp, 0);
+ if (nfs_asyncio(rabp, cred, p)) {
+ rabp->b_flags |= B_INVAL|B_ERROR;
+ vfs_unbusy_pages(rabp);
+ brelse(rabp);
+ }
+ } else
+ brelse(rabp);
+ }
+ }
+ }
+
+ /*
+ * Obtain the buffer cache block. Figure out the buffer size
+ * when we are at EOF. If we are modifying the size of the
+ * buffer based on an EOF condition we need to hold
+ * nfs_rslock() through obtaining the buffer to prevent
+ * a potential writer-appender from messing with n_size.
+ * Otherwise we may accidently truncate the buffer and
+ * lose dirty data.
+ *
+ * Note that bcount is *not* DEV_BSIZE aligned.
+ */
+
+again:
+ bcount = biosize;
+ if ((off_t)lbn * biosize >= np->n_size) {
+ bcount = 0;
+ } else if ((off_t)(lbn + 1) * biosize > np->n_size) {
+ bcount = np->n_size - (off_t)lbn * biosize;
+ }
+ if (bcount != biosize && nfs_rslock(np, p) == ENOLCK)
+ goto again;
+
+ bp = nfs_getcacheblk(vp, lbn, bcount, p);
+
+ if (bcount != biosize)
+ nfs_rsunlock(np, p);
+ if (!bp)
+ return (EINTR);
+
+ /*
+ * If B_CACHE is not set, we must issue the read. If this
+ * fails, we return an error.
+ */
+
+ if ((bp->b_flags & B_CACHE) == 0) {
+ bp->b_flags |= B_READ;
+ vfs_busy_pages(bp, 0);
+ error = nfs_doio(bp, cred, p);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ /*
+ * on is the offset into the current bp. Figure out how many
+ * bytes we can copy out of the bp. Note that bcount is
+ * NOT DEV_BSIZE aligned.
+ *
+ * Then figure out how many bytes we can copy into the uio.
+ */
+
+ n = 0;
+ if (on < bcount)
+ n = min((unsigned)(bcount - on), uio->uio_resid);
+ break;
+ case VLNK:
+ nfsstats.biocache_readlinks++;
+ bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_CACHE) == 0) {
+ bp->b_flags |= B_READ;
+ vfs_busy_pages(bp, 0);
+ error = nfs_doio(bp, cred, p);
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ brelse(bp);
+ return (error);
+ }
+ }
+ n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+ on = 0;
+ break;
+ case VDIR:
+ nfsstats.biocache_readdirs++;
+ if (np->n_direofoffset
+ && uio->uio_offset >= np->n_direofoffset) {
+ return (0);
+ }
+ lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
+ on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
+ bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_CACHE) == 0) {
+ bp->b_flags |= B_READ;
+ vfs_busy_pages(bp, 0);
+ error = nfs_doio(bp, cred, p);
+ if (error) {
+ brelse(bp);
+ }
+ while (error == NFSERR_BAD_COOKIE) {
+ printf("got bad cookie vp %p bp %p\n", vp, bp);
+ nfs_invaldir(vp);
+ error = nfs_vinvalbuf(vp, 0, cred, p, 1);
+ /*
+ * Yuck! The directory has been modified on the
+ * server. The only way to get the block is by
+ * reading from the beginning to get all the
+ * offset cookies.
+ *
+ * Leave the last bp intact unless there is an error.
+ * Loop back up to the while if the error is another
+ * NFSERR_BAD_COOKIE (double yuch!).
+ */
+ for (i = 0; i <= lbn && !error; i++) {
+ if (np->n_direofoffset
+ && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
+ return (0);
+ bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_CACHE) == 0) {
+ bp->b_flags |= B_READ;
+ vfs_busy_pages(bp, 0);
+ error = nfs_doio(bp, cred, p);
+ /*
+ * no error + B_INVAL == directory EOF,
+ * use the block.
+ */
+ if (error == 0 && (bp->b_flags & B_INVAL))
+ break;
+ }
+ /*
+ * An error will throw away the block and the
+ * for loop will break out. If no error and this
+ * is not the block we want, we throw away the
+ * block and go for the next one via the for loop.
+ */
+ if (error || i < lbn)
+ brelse(bp);
+ }
+ }
+ /*
+ * The above while is repeated if we hit another cookie
+ * error. If we hit an error and it wasn't a cookie error,
+ * we give up.
+ */
+ if (error)
+ return (error);
+ }
+
+ /*
+ * If not eof and read aheads are enabled, start one.
+ * (You need the current block first, so that you have the
+ * directory offset cookie of the next block.)
+ */
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ (bp->b_flags & B_INVAL) == 0 &&
+ (np->n_direofoffset == 0 ||
+ (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
+ !(np->n_flag & NQNFSNONCACHE) &&
+ !incore(vp, lbn + 1)) {
+ rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
+ if (rabp) {
+ if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ vfs_busy_pages(rabp, 0);
+ if (nfs_asyncio(rabp, cred, p)) {
+ rabp->b_flags |= B_INVAL|B_ERROR;
+ vfs_unbusy_pages(rabp);
+ brelse(rabp);
+ }
+ } else {
+ brelse(rabp);
+ }
+ }
+ }
+ /*
+ * Unlike VREG files, whos buffer size ( bp->b_bcount ) is
+ * chopped for the EOF condition, we cannot tell how large
+ * NFS directories are going to be until we hit EOF. So
+ * an NFS directory buffer is *not* chopped to its EOF. Now,
+ * it just so happens that b_resid will effectively chop it
+ * to EOF. *BUT* this information is lost if the buffer goes
+ * away and is reconstituted into a B_CACHE state ( due to
+ * being VMIO ) later. So we keep track of the directory eof
+ * in np->n_direofoffset and chop it off as an extra step
+ * right here.
+ */
+ n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
+ if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset)
+ n = np->n_direofoffset - uio->uio_offset;
+ break;
+ default:
+ printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
+ break;
+ };
+
+ if (n > 0) {
+ error = uiomove(bp->b_data + on, (int)n, uio);
+ }
+ switch (vp->v_type) {
+ case VREG:
+ break;
+ case VLNK:
+ n = 0;
+ break;
+ case VDIR:
+ /*
+ * Invalidate buffer if caching is disabled, forcing a
+ * re-read from the remote later.
+ */
+ if (np->n_flag & NQNFSNONCACHE)
+ bp->b_flags |= B_INVAL;
+ break;
+ default:
+ printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
+ }
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n > 0);
+ return (error);
+}
+
+/*
+ * Vnode op for write using bio
+ */
+int
+nfs_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int biosize;
+ struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ struct ucred *cred = ap->a_cred;
+ int ioflag = ap->a_ioflag;
+ struct buf *bp;
+ struct vattr vattr;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ daddr_t lbn;
+ int bcount;
+ int n, on, error = 0, iomode, must_commit;
+ int haverslock = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("nfs_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("nfs_write proc");
+#endif
+ if (vp->v_type != VREG)
+ return (EIO);
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ return (np->n_error);
+ }
+ if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (void)nfs_fsinfo(nmp, vp, cred, p);
+
+ /*
+ * Synchronously flush pending buffers if we are in synchronous
+ * mode or if we are appending.
+ */
+ if (ioflag & (IO_APPEND | IO_SYNC)) {
+ if (np->n_flag & NMODIFIED) {
+ np->n_attrstamp = 0;
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ return (error);
+ }
+ }
+
+ /*
+ * If IO_APPEND then load uio_offset. We restart here if we cannot
+ * get the append lock.
+ */
+restart:
+ if (ioflag & IO_APPEND) {
+ np->n_attrstamp = 0;
+ error = VOP_GETATTR(vp, &vattr, cred, p);
+ if (error)
+ return (error);
+ uio->uio_offset = np->n_size;
+ }
+
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
+ return (EFBIG);
+ if (uio->uio_resid == 0)
+ return (0);
+
+ /*
+ * We need to obtain the rslock if we intend to modify np->n_size
+ * in order to guarentee the append point with multiple contending
+ * writers, to guarentee that no other appenders modify n_size
+ * while we are trying to obtain a truncated buffer (i.e. to avoid
+ * accidently truncating data written by another appender due to
+ * the race), and to ensure that the buffer is populated prior to
+ * our extending of the file. We hold rslock through the entire
+ * operation.
+ *
+ * Note that we do not synchronize the case where someone truncates
+ * the file while we are appending to it because attempting to lock
+ * this case may deadlock other parts of the system unexpectedly.
+ */
+ if ((ioflag & IO_APPEND) ||
+ uio->uio_offset + uio->uio_resid > np->n_size) {
+ if (nfs_rslock(np, p) == ENOLCK)
+ goto restart;
+ haverslock = 1;
+ }
+
+ /*
+ * Maybe this should be above the vnode op call, but so long as
+ * file servers have no limits, i don't think it matters
+ */
+ if (p && uio->uio_offset + uio->uio_resid >
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+ psignal(p, SIGXFSZ);
+ if (haverslock)
+ nfs_rsunlock(np, p);
+ return (EFBIG);
+ }
+
+ biosize = vp->v_mount->mnt_stat.f_iosize;
+
+ do {
+ /*
+ * Check for a valid write lease.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, ND_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, ND_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ break;
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ break;
+ np->n_brev = np->n_lrev;
+ }
+ }
+ if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
+ iomode = NFSV3WRITE_FILESYNC;
+ error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
+ if (must_commit)
+ nfs_clearcommit(vp->v_mount);
+ break;
+ }
+ nfsstats.biocache_writes++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize-1);
+ n = min((unsigned)(biosize - on), uio->uio_resid);
+again:
+ /*
+ * Handle direct append and file extension cases, calculate
+ * unaligned buffer size.
+ */
+
+ if (uio->uio_offset == np->n_size && n) {
+ /*
+ * Get the buffer (in its pre-append state to maintain
+ * B_CACHE if it was previously set). Resize the
+ * nfsnode after we have locked the buffer to prevent
+ * readers from reading garbage.
+ */
+ bcount = on;
+ bp = nfs_getcacheblk(vp, lbn, bcount, p);
+
+ if (bp != NULL) {
+ long save;
+
+ np->n_size = uio->uio_offset + n;
+ np->n_flag |= NMODIFIED;
+ vnode_pager_setsize(vp, np->n_size);
+
+ save = bp->b_flags & B_CACHE;
+ bcount += n;
+ allocbuf(bp, bcount);
+ bp->b_flags |= save;
+ }
+ } else {
+ /*
+ * Obtain the locked cache block first, and then
+ * adjust the file's size as appropriate.
+ */
+ bcount = on + n;
+ if ((off_t)lbn * biosize + bcount < np->n_size) {
+ if ((off_t)(lbn + 1) * biosize < np->n_size)
+ bcount = biosize;
+ else
+ bcount = np->n_size - (off_t)lbn * biosize;
+ }
+
+ bp = nfs_getcacheblk(vp, lbn, bcount, p);
+
+ if (uio->uio_offset + n > np->n_size) {
+ np->n_size = uio->uio_offset + n;
+ np->n_flag |= NMODIFIED;
+ vnode_pager_setsize(vp, np->n_size);
+ }
+ }
+
+ if (!bp) {
+ error = EINTR;
+ break;
+ }
+
+ /*
+ * Issue a READ if B_CACHE is not set. In special-append
+ * mode, B_CACHE is based on the buffer prior to the write
+ * op and is typically set, avoiding the read. If a read
+ * is required in special append mode, the server will
+ * probably send us a short-read since we extended the file
+ * on our end, resulting in b_resid == 0 and, thusly,
+ * B_CACHE getting set.
+ *
+ * We can also avoid issuing the read if the write covers
+ * the entire buffer. We have to make sure the buffer state
+ * is reasonable in this case since we will not be initiating
+ * I/O. See the comments in kern/vfs_bio.c's getblk() for
+ * more information.
+ *
+ * B_CACHE may also be set due to the buffer being cached
+ * normally.
+ */
+
+ if (on == 0 && n == bcount) {
+ bp->b_flags |= B_CACHE;
+ bp->b_flags &= ~(B_ERROR | B_INVAL);
+ }
+
+ if ((bp->b_flags & B_CACHE) == 0) {
+ bp->b_flags |= B_READ;
+ vfs_busy_pages(bp, 0);
+ error = nfs_doio(bp, cred, p);
+ if (error) {
+ brelse(bp);
+ break;
+ }
+ }
+ if (!bp) {
+ error = EINTR;
+ break;
+ }
+ if (bp->b_wcred == NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ np->n_flag |= NMODIFIED;
+
+ /*
+ * If dirtyend exceeds file size, chop it down. This should
+ * not normally occur but there is an append race where it
+ * might occur XXX, so we log it.
+ *
+ * If the chopping creates a reverse-indexed or degenerate
+ * situation with dirtyoff/end, we 0 both of them.
+ */
+
+ if (bp->b_dirtyend > bcount) {
+ printf("NFS append race @%lx:%d\n",
+ (long)bp->b_blkno * DEV_BSIZE,
+ bp->b_dirtyend - bcount);
+ bp->b_dirtyend = bcount;
+ }
+
+ if (bp->b_dirtyoff >= bp->b_dirtyend)
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+
+ /*
+ * If the new write will leave a contiguous dirty
+ * area, just update the b_dirtyoff and b_dirtyend,
+ * otherwise force a write rpc of the old dirty area.
+ *
+ * While it is possible to merge discontiguous writes due to
+ * our having a B_CACHE buffer ( and thus valid read data
+ * for the hole), we don't because it could lead to
+ * significant cache coherency problems with multiple clients,
+ * especially if locking is implemented later on.
+ *
+ * as an optimization we could theoretically maintain
+ * a linked list of discontinuous areas, but we would still
+ * have to commit them separately so there isn't much
+ * advantage to it except perhaps a bit of asynchronization.
+ */
+
+ if (bp->b_dirtyend > 0 &&
+ (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+ if (VOP_BWRITE(bp->b_vp, bp) == EINTR)
+ return (EINTR);
+ goto again;
+ }
+
+ /*
+ * Check for valid write lease and get one as required.
+ * In case getblk() and/or bwrite() delayed us.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, ND_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, ND_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error) {
+ brelse(bp);
+ break;
+ }
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ brelse(bp);
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ break;
+ np->n_brev = np->n_lrev;
+ goto again;
+ }
+ }
+
+ error = uiomove((char *)bp->b_data + on, n, uio);
+
+ /*
+ * Since this block is being modified, it must be written
+ * again and not just committed. Since write clustering does
+ * not work for the stage 1 data write, only the stage 2
+ * commit rpc, we have to clear B_CLUSTEROK as well.
+ */
+ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
+
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ brelse(bp);
+ break;
+ }
+
+ /*
+ * Only update dirtyoff/dirtyend if not a degenerate
+ * condition.
+ */
+ if (n) {
+ if (bp->b_dirtyend > 0) {
+ bp->b_dirtyoff = min(on, bp->b_dirtyoff);
+ bp->b_dirtyend = max((on + n), bp->b_dirtyend);
+ } else {
+ bp->b_dirtyoff = on;
+ bp->b_dirtyend = on + n;
+ }
+ vfs_bio_set_validclean(bp, on, n);
+ }
+
+ /*
+ * If the lease is non-cachable or IO_SYNC do bwrite().
+ *
+ * IO_INVAL appears to be unused. The idea appears to be
+ * to turn off caching in this case. Very odd. XXX
+ */
+ if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
+ if (ioflag & IO_INVAL)
+ bp->b_flags |= B_NOCACHE;
+ error = VOP_BWRITE(bp->b_vp, bp);
+ if (error)
+ break;
+ if (np->n_flag & NQNFSNONCACHE) {
+ error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
+ if (error)
+ break;
+ }
+ } else if ((n + on) == biosize &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
+ bp->b_flags |= B_ASYNC;
+ (void)nfs_writebp(bp, 0, 0);
+ } else {
+ bdwrite(bp);
+ }
+ } while (uio->uio_resid > 0 && n > 0);
+
+ if (haverslock)
+ nfs_rsunlock(np, p);
+
+ return (error);
+}
+
+/*
+ * Get an nfs cache block.
+ *
+ * Allocate a new one if the block isn't currently in the cache
+ * and return the block marked busy. If the calling process is
+ * interrupted by a signal for an interruptible mount point, return
+ * NULL.
+ *
+ * The caller must carefully deal with the possible B_INVAL state of
+ * the buffer. nfs_doio() clears B_INVAL (and nfs_asyncio() clears it
+ * indirectly), so synchronous reads can be issued without worrying about
+ * the B_INVAL state. We have to be a little more careful when dealing
+ * with writes (see comments in nfs_write()) when extending a file past
+ * its EOF.
+ */
+static struct buf *
+nfs_getcacheblk(vp, bn, size, p)
+ struct vnode *vp;
+ daddr_t bn;
+ int size;
+ struct proc *p;
+{
+ register struct buf *bp;
+ struct mount *mp;
+ struct nfsmount *nmp;
+
+ mp = vp->v_mount;
+ nmp = VFSTONFS(mp);
+
+ if (nmp->nm_flag & NFSMNT_INT) {
+ bp = getblk(vp, bn, size, PCATCH, 0);
+ while (bp == (struct buf *)0) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return ((struct buf *)0);
+ bp = getblk(vp, bn, size, 0, 2 * hz);
+ }
+ } else {
+ bp = getblk(vp, bn, size, 0, 0);
+ }
+
+ if (vp->v_type == VREG) {
+ int biosize;
+
+ biosize = mp->mnt_stat.f_iosize;
+ bp->b_blkno = bn * (biosize / DEV_BSIZE);
+ }
+ return (bp);
+}
+
+/*
+ * Flush and invalidate all dirty buffers. If another process is already
+ * doing the flush, just wait for completion.
+ */
+int
+nfs_vinvalbuf(vp, flags, cred, p, intrflg)
+ struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int intrflg;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ int error = 0, slpflag, slptimeo;
+
+ if (vp->v_flag & VXLOCK) {
+ return (0);
+ }
+
+ if ((nmp->nm_flag & NFSMNT_INT) == 0)
+ intrflg = 0;
+ if (intrflg) {
+ slpflag = PCATCH;
+ slptimeo = 2 * hz;
+ } else {
+ slpflag = 0;
+ slptimeo = 0;
+ }
+ /*
+ * First wait for any other process doing a flush to complete.
+ */
+ while (np->n_flag & NFLUSHINPROG) {
+ np->n_flag |= NFLUSHWANT;
+ error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
+ slptimeo);
+ if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return (EINTR);
+ }
+
+ /*
+ * Now, flush as required.
+ */
+ np->n_flag |= NFLUSHINPROG;
+ error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
+ while (error) {
+ if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+ np->n_flag &= ~NFLUSHINPROG;
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (EINTR);
+ }
+ error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
+ }
+ np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (0);
+}
+
+/*
+ * Initiate asynchronous I/O. Return an error if no nfsiods are available.
+ * This is mainly to avoid queueing async I/O requests when the nfsiods
+ * are all hung on a dead server.
+ *
+ * Note: nfs_asyncio() does not clear (B_ERROR|B_INVAL) but when the bp
+ * is eventually dequeued by the async daemon, nfs_doio() *will*.
+ */
+int
+nfs_asyncio(bp, cred, procp)
+ register struct buf *bp;
+ struct ucred *cred;
+ struct proc *procp;
+{
+ struct nfsmount *nmp;
+ int i;
+ int gotiod;
+ int slpflag = 0;
+ int slptimeo = 0;
+ int error;
+
+ if (nfs_numasync == 0)
+ return (EIO);
+
+ nmp = VFSTONFS(bp->b_vp->v_mount);
+again:
+ if (nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ gotiod = FALSE;
+
+ /*
+ * Find a free iod to process this request.
+ */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_iodwant[i]) {
+ /*
+ * Found one, so wake it up and tell it which
+ * mount to process.
+ */
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: waking iod %d for mount %p\n",
+ i, nmp));
+ nfs_iodwant[i] = (struct proc *)0;
+ nfs_iodmount[i] = nmp;
+ nmp->nm_bufqiods++;
+ wakeup((caddr_t)&nfs_iodwant[i]);
+ gotiod = TRUE;
+ break;
+ }
+
+ /*
+ * If none are free, we may already have an iod working on this mount
+ * point. If so, it will process our request.
+ */
+ if (!gotiod) {
+ if (nmp->nm_bufqiods > 0) {
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: %d iods are already processing mount %p\n",
+ nmp->nm_bufqiods, nmp));
+ gotiod = TRUE;
+ }
+ }
+
+ /*
+ * If we have an iod which can process the request, then queue
+ * the buffer.
+ */
+ if (gotiod) {
+ /*
+ * Ensure that the queue never grows too large.
+ */
+ while (nmp->nm_bufqlen >= 2*nfs_numasync) {
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
+ nmp->nm_bufqwant = TRUE;
+ error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
+ "nfsaio", slptimeo);
+ if (error) {
+ if (nfs_sigintr(nmp, NULL, procp))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ /*
+ * We might have lost our iod while sleeping,
+ * so check and loop if nescessary.
+ */
+ if (nmp->nm_bufqiods == 0) {
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
+ goto again;
+ }
+ }
+
+ if (bp->b_flags & B_READ) {
+ if (bp->b_rcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_rcred = cred;
+ }
+ } else {
+ bp->b_flags |= B_WRITEINPROG;
+ if (bp->b_wcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ }
+
+ BUF_KERNPROC(bp);
+ TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
+ nmp->nm_bufqlen++;
+ return (0);
+ }
+
+ /*
+ * All the iods are busy on other mounts, so return EIO to
+ * force the caller to process the i/o synchronously.
+ */
+ NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
+ return (EIO);
+}
+
+/*
+ * Do an I/O operation to/from a cache block. This may be called
+ * synchronously or from an nfsiod.
+ */
+int
+nfs_doio(bp, cr, p)
+ struct buf *bp;
+ struct ucred *cr;
+ struct proc *p;
+{
+ struct uio *uiop;
+ struct vnode *vp;
+ struct nfsnode *np;
+ struct nfsmount *nmp;
+ int error = 0, iomode, must_commit = 0;
+ struct uio uio;
+ struct iovec io;
+
+ vp = bp->b_vp;
+ np = VTONFS(vp);
+ nmp = VFSTONFS(vp->v_mount);
+ uiop = &uio;
+ uiop->uio_iov = &io;
+ uiop->uio_iovcnt = 1;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = p;
+
+ /*
+ * clear B_ERROR and B_INVAL state prior to initiating the I/O. We
+ * do this here so we do not have to do it in all the code that
+ * calls us.
+ */
+ bp->b_flags &= ~(B_ERROR | B_INVAL);
+
+ KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
+
+ /*
+ * Historically, paging was done with physio, but no more.
+ */
+ if (bp->b_flags & B_PHYS) {
+ /*
+ * ...though reading /dev/drum still gets us here.
+ */
+ io.iov_len = uiop->uio_resid = bp->b_bcount;
+ /* mapping was done by vmapbuf() */
+ io.iov_base = bp->b_data;
+ uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
+ if (bp->b_flags & B_READ) {
+ uiop->uio_rw = UIO_READ;
+ nfsstats.read_physios++;
+ error = nfs_readrpc(vp, uiop, cr);
+ } else {
+ int com;
+
+ iomode = NFSV3WRITE_DATASYNC;
+ uiop->uio_rw = UIO_WRITE;
+ nfsstats.write_physios++;
+ error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
+ }
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = error;
+ }
+ } else if (bp->b_flags & B_READ) {
+ io.iov_len = uiop->uio_resid = bp->b_bcount;
+ io.iov_base = bp->b_data;
+ uiop->uio_rw = UIO_READ;
+ switch (vp->v_type) {
+ case VREG:
+ uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
+ nfsstats.read_bios++;
+ error = nfs_readrpc(vp, uiop, cr);
+ if (!error) {
+ if (uiop->uio_resid) {
+ /*
+ * If we had a short read with no error, we must have
+ * hit a file hole. We should zero-fill the remainder.
+ * This can also occur if the server hits the file EOF.
+ *
+ * Holes used to be able to occur due to pending
+ * writes, but that is not possible any longer.
+ */
+ int nread = bp->b_bcount - uiop->uio_resid;
+ int left = bp->b_bcount - nread;
+
+ if (left > 0)
+ bzero((char *)bp->b_data + nread, left);
+ uiop->uio_resid = 0;
+ }
+ }
+ if (p && (vp->v_flag & VTEXT) &&
+ (((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, ND_READ) &&
+ np->n_lrev != np->n_brev) ||
+ (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+ np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
+ uprintf("Process killed due to text file modification\n");
+ psignal(p, SIGKILL);
+ PHOLD(p);
+ }
+ break;
+ case VLNK:
+ uiop->uio_offset = (off_t)0;
+ nfsstats.readlink_bios++;
+ error = nfs_readlinkrpc(vp, uiop, cr);
+ break;
+ case VDIR:
+ nfsstats.readdir_bios++;
+ uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
+ if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
+ error = nfs_readdirplusrpc(vp, uiop, cr);
+ if (error == NFSERR_NOTSUPP)
+ nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
+ }
+ if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
+ error = nfs_readdirrpc(vp, uiop, cr);
+ /*
+ * end-of-directory sets B_INVAL but does not generate an
+ * error.
+ */
+ if (error == 0 && uiop->uio_resid == bp->b_bcount)
+ bp->b_flags |= B_INVAL;
+ break;
+ default:
+ printf("nfs_doio: type %x unexpected\n",vp->v_type);
+ break;
+ };
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = error;
+ }
+ } else {
+ /*
+ * If we only need to commit, try to commit
+ */
+ if (bp->b_flags & B_NEEDCOMMIT) {
+ int retv;
+ off_t off;
+
+ off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
+ bp->b_flags |= B_WRITEINPROG;
+ retv = nfs_commit(
+ bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
+ bp->b_wcred, p);
+ bp->b_flags &= ~B_WRITEINPROG;
+ if (retv == 0) {
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
+ bp->b_resid = 0;
+ biodone(bp);
+ return (0);
+ }
+ if (retv == NFSERR_STALEWRITEVERF) {
+ nfs_clearcommit(bp->b_vp->v_mount);
+ }
+ }
+
+ /*
+ * Setup for actual write
+ */
+
+ if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
+ bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
+
+ if (bp->b_dirtyend > bp->b_dirtyoff) {
+ io.iov_len = uiop->uio_resid = bp->b_dirtyend
+ - bp->b_dirtyoff;
+ uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
+ + bp->b_dirtyoff;
+ io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+ uiop->uio_rw = UIO_WRITE;
+ nfsstats.write_bios++;
+
+ if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
+ iomode = NFSV3WRITE_UNSTABLE;
+ else
+ iomode = NFSV3WRITE_FILESYNC;
+
+ bp->b_flags |= B_WRITEINPROG;
+ error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
+
+ /*
+ * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
+ * to cluster the buffers needing commit. This will allow
+ * the system to submit a single commit rpc for the whole
+ * cluster. We can do this even if the buffer is not 100%
+ * dirty (relative to the NFS blocksize), so we optimize the
+ * append-to-file-case.
+ *
+ * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be
+ * cleared because write clustering only works for commit
+ * rpc's, not for the data portion of the write).
+ */
+
+ if (!error && iomode == NFSV3WRITE_UNSTABLE) {
+ bp->b_flags |= B_NEEDCOMMIT;
+ if (bp->b_dirtyoff == 0
+ && bp->b_dirtyend == bp->b_bcount)
+ bp->b_flags |= B_CLUSTEROK;
+ } else {
+ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
+ }
+ bp->b_flags &= ~B_WRITEINPROG;
+
+ /*
+ * For an interrupted write, the buffer is still valid
+ * and the write hasn't been pushed to the server yet,
+ * so we can't set B_ERROR and report the interruption
+ * by setting B_EINTR. For the B_ASYNC case, B_EINTR
+ * is not relevant, so the rpc attempt is essentially
+ * a noop. For the case of a V3 write rpc not being
+ * committed to stable storage, the block is still
+ * dirty and requires either a commit rpc or another
+ * write rpc with iomode == NFSV3WRITE_FILESYNC before
+ * the block is reused. This is indicated by setting
+ * the B_DELWRI and B_NEEDCOMMIT flags.
+ *
+ * If the buffer is marked B_PAGING, it does not reside on
+ * the vp's paging queues so we cannot call bdirty(). The
+ * bp in this case is not an NFS cache block so we should
+ * be safe. XXX
+ */
+ if (error == EINTR
+ || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
+ int s;
+
+ s = splbio();
+ bp->b_flags &= ~(B_INVAL|B_NOCACHE);
+ if ((bp->b_flags & B_PAGING) == 0) {
+ bdirty(bp);
+ bp->b_flags &= ~B_DONE;
+ }
+ if (error && (bp->b_flags & B_ASYNC) == 0)
+ bp->b_flags |= B_EINTR;
+ splx(s);
+ } else {
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ }
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ }
+ } else {
+ bp->b_resid = 0;
+ biodone(bp);
+ return (0);
+ }
+ }
+ bp->b_resid = uiop->uio_resid;
+ if (must_commit)
+ nfs_clearcommit(vp->v_mount);
+ biodone(bp);
+ return (error);
+}
diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c
new file mode 100644
index 0000000..e63ba63
--- /dev/null
+++ b/sys/nfs/nfs_common.c
@@ -0,0 +1,2272 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
+ * $FreeBSD$
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#include <sys/sysent.h>
+#include <sys/syscall.h>
+#include <sys/conf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_int32_t nfs_xdrneg1;
+u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
+ rpc_auth_kerb;
+u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_int32_t nfs_xid = 0;
+static enum vtype nv2tov_type[8]= {
+ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON
+};
+enum vtype nv3tov_type[8]= {
+ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
+};
+
+int nfs_ticks;
+int nfs_pbuf_freecnt = -1; /* start out unlimited */
+
+struct nfs_reqq nfs_reqq;
+struct nfssvc_sockhead nfssvc_sockhead;
+int nfssvc_sockhead_flag;
+struct nfsd_head nfsd_head;
+int nfsd_head_flag;
+struct nfs_bufq nfs_bufq;
+struct nqtimerhead nqtimerhead;
+struct nqfhhashhead *nqfhhashtbl;
+u_long nqfhhash;
+
+static void (*nfs_prev_lease_updatetime) __P((int));
+static int nfs_prev_nfssvc_sy_narg;
+static sy_call_t *nfs_prev_nfssvc_sy_call;
+
+#ifndef NFS_NOSERVER
+
+static vop_t *nfs_prev_vop_lease_check;
+static int nfs_prev_getfh_sy_narg;
+static sy_call_t *nfs_prev_getfh_sy_call;
+
+/*
+ * Mapping of old NFS Version 2 RPC numbers to generic numbers.
+ */
+int nfsv3_procid[NFS_NPROCS] = {
+ NFSPROC_NULL,
+ NFSPROC_GETATTR,
+ NFSPROC_SETATTR,
+ NFSPROC_NOOP,
+ NFSPROC_LOOKUP,
+ NFSPROC_READLINK,
+ NFSPROC_READ,
+ NFSPROC_NOOP,
+ NFSPROC_WRITE,
+ NFSPROC_CREATE,
+ NFSPROC_REMOVE,
+ NFSPROC_RENAME,
+ NFSPROC_LINK,
+ NFSPROC_SYMLINK,
+ NFSPROC_MKDIR,
+ NFSPROC_RMDIR,
+ NFSPROC_READDIR,
+ NFSPROC_FSSTAT,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP
+};
+
+#endif /* NFS_NOSERVER */
+/*
+ * and the reverse mapping from generic to Version 2 procedure numbers
+ */
+int nfsv2_procid[NFS_NPROCS] = {
+ NFSV2PROC_NULL,
+ NFSV2PROC_GETATTR,
+ NFSV2PROC_SETATTR,
+ NFSV2PROC_LOOKUP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_READLINK,
+ NFSV2PROC_READ,
+ NFSV2PROC_WRITE,
+ NFSV2PROC_CREATE,
+ NFSV2PROC_MKDIR,
+ NFSV2PROC_SYMLINK,
+ NFSV2PROC_CREATE,
+ NFSV2PROC_REMOVE,
+ NFSV2PROC_RMDIR,
+ NFSV2PROC_RENAME,
+ NFSV2PROC_LINK,
+ NFSV2PROC_READDIR,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_STATFS,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+};
+
+#ifndef NFS_NOSERVER
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static u_char nfsrv_v2errmap[ELAST] = {
+ NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
+ NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
+ NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO /* << Last is 86 */
+};
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Although it is not obvious whether or not NFS clients really care if
+ * a returned error value is in the specified list for the procedure, the
+ * safest thing to do is filter them appropriately. For Version 2, the
+ * X/Open XNFS document is the only specification that defines error values
+ * for each RPC (The RFC simply lists all possible error values for all RPCs),
+ * so I have decided to not do this for Version 2.
+ * The first entry is the default error return and the rest are the valid
+ * errors for that RPC in increasing numeric order.
+ */
+static short nfsv3err_null[] = {
+ 0,
+ 0,
+};
+
+static short nfsv3err_getattr[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_setattr[] = {
+ NFSERR_IO,
+ NFSERR_PERM,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOT_SYNC,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_lookup[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_NAMETOL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_access[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_readlink[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_read[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_NXIO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_write[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_FBIG,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_create[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_mkdir[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_symlink[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_mknod[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ NFSERR_BADTYPE,
+ 0,
+};
+
+static short nfsv3err_remove[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_rmdir[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_INVAL,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_NOTEMPTY,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_rename[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_XDEV,
+ NFSERR_NOTDIR,
+ NFSERR_ISDIR,
+ NFSERR_INVAL,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_MLINK,
+ NFSERR_NAMETOL,
+ NFSERR_NOTEMPTY,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_link[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_XDEV,
+ NFSERR_NOTDIR,
+ NFSERR_INVAL,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_MLINK,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_readdir[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_BAD_COOKIE,
+ NFSERR_TOOSMALL,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_readdirplus[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_BAD_COOKIE,
+ NFSERR_NOTSUPP,
+ NFSERR_TOOSMALL,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_fsstat[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_fsinfo[] = {
+ NFSERR_STALE,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_pathconf[] = {
+ NFSERR_STALE,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_commit[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short *nfsrv_v3errmap[] = {
+ nfsv3err_null,
+ nfsv3err_getattr,
+ nfsv3err_setattr,
+ nfsv3err_lookup,
+ nfsv3err_access,
+ nfsv3err_readlink,
+ nfsv3err_read,
+ nfsv3err_write,
+ nfsv3err_create,
+ nfsv3err_mkdir,
+ nfsv3err_symlink,
+ nfsv3err_mknod,
+ nfsv3err_remove,
+ nfsv3err_rmdir,
+ nfsv3err_rename,
+ nfsv3err_link,
+ nfsv3err_readdir,
+ nfsv3err_readdirplus,
+ nfsv3err_fsstat,
+ nfsv3err_fsinfo,
+ nfsv3err_pathconf,
+ nfsv3err_commit,
+};
+
+#endif /* NFS_NOSERVER */
+
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+extern struct nfsstats nfsstats;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern nfstype nfsv2_type[9];
+extern nfstype nfsv3_type[9];
+extern struct nfsnodehashhead *nfsnodehashtbl;
+extern u_long nfsnodehash;
+
+struct getfh_args;
+extern int getfh(struct proc *, struct getfh_args *, int *);
+struct nfssvc_args;
+extern int nfssvc(struct proc *, struct nfssvc_args *, int *);
+
+LIST_HEAD(nfsnodehashhead, nfsnode);
+
+int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
+
+u_quad_t
+nfs_curusec()
+{
+ struct timeval tv;
+
+ getmicrotime(&tv);
+ return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
+}
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_int32_t *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
+ verf_str, mrest, mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nmflag;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ int verf_len;
+ char *verf_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_int32_t *xidp;
+{
+ register struct mbuf *mb;
+ register u_int32_t *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
+
+ /* Get a pretty random xid to start with */
+ if (!nfs_xid)
+ nfs_xid = random();
+ /*
+ * Skip zero xid if it should ever happen.
+ */
+ if (++nfs_xid == 0)
+ nfs_xid++;
+
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nmflag & NFSMNT_NQNFS) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER3);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ if (nmflag & NFSMNT_NFSV3)
+ *tl++ = txdr_unsigned(NFS_VER3);
+ else
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ if (nmflag & NFSMNT_NFSV3)
+ *tl++ = txdr_unsigned(procid);
+ else
+ *tl++ = txdr_unsigned(nfsv2_procid[procid]);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_int32_t *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_KERB4:
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+
+ /*
+ * And the verifier...
+ */
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ if (verf_str) {
+ *tl++ = txdr_unsigned(RPCAUTH_KERB4);
+ *tl = txdr_unsigned(verf_len);
+ siz = verf_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(verf_str, bpos, i);
+ mb->m_len += i;
+ verf_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ } else {
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ }
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+int
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain.
+ * NOTE: can ony handle iovcnt == 1
+ */
+int
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+#ifdef DIAGNOSTIC
+ if (uiop->uio_iovcnt != 1)
+ panic("nfsm_uiotombuf: iovcnt != 1");
+#endif
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+int
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+int
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+int
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ const char *cp;
+ long siz;
+{
+ register struct mbuf *m1 = NULL, *m2;
+ long left, xfer, len, tlen;
+ u_int32_t *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_int32_t *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_int32_t *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+int
+nfs_init(vfsp)
+ struct vfsconf *vfsp;
+{
+ register int i;
+
+ nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
+
+ /*
+ * Check to see if major data structures haven't bloated.
+ */
+ if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) {
+ printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
+ printf("Try reducing NFS_UIDHASHSIZ\n");
+ }
+ if (sizeof (struct nfsuid) > NFS_UIDALLOC) {
+ printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC);
+ printf("Try unionizing the nu_nickname and nu_flag fields\n");
+ }
+ nfs_mount_type = vfsp->vfc_typenum;
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
+ if (nfs_ticks < 1)
+ nfs_ticks = 1;
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
+ nfs_iodwant[i] = (struct proc *)0;
+ nfs_iodmount[i] = (struct nfsmount *)0;
+ }
+ nfs_nhinit(); /* Init the nfsnode table */
+#ifndef NFS_NOSERVER
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+#endif
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ CIRCLEQ_INIT(&nqtimerhead);
+ nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ TAILQ_INIT(&nfs_reqq);
+
+ nfs_timer(0);
+
+ /*
+ * Set up lease_check and lease_updatetime so that other parts
+ * of the system can call us, if we are loadable.
+ */
+#ifndef NFS_NOSERVER
+ nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)];
+ default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check;
+#endif
+ nfs_prev_lease_updatetime = lease_updatetime;
+ lease_updatetime = nfs_lease_updatetime;
+ nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
+ sysent[SYS_nfssvc].sy_narg = 2;
+ nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
+ sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc;
+#ifndef NFS_NOSERVER
+ nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg;
+ sysent[SYS_getfh].sy_narg = 2;
+ nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call;
+ sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
+#endif
+
+ nfs_pbuf_freecnt = nswbuf / 2 + 1;
+
+ return (0);
+}
+
+int
+nfs_uninit(vfsp)
+ struct vfsconf *vfsp;
+{
+
+ untimeout(nfs_timer, (void *)NULL, nfs_timer_handle);
+ nfs_mount_type = -1;
+#ifndef NFS_NOSERVER
+ default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check;
+#endif
+ lease_updatetime = nfs_prev_lease_updatetime;
+ sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
+ sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
+#ifndef NFS_NOSERVER
+ sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg;
+ sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call;
+#endif
+ return (0);
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+int
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfs_fattr *fp;
+ register struct nfsnode *np;
+ register int32_t t1;
+ caddr_t cp2;
+ int error = 0, rdev;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ struct timespec mtime;
+ int v3 = NFS_ISV3(vp);
+
+ md = *mdp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
+ if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
+ return (error);
+ fp = (struct nfs_fattr *)cp2;
+ if (v3) {
+ vtyp = nfsv3tov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
+ fxdr_unsigned(int, fp->fa3_rdev.specdata2));
+ fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
+ } else {
+ vtyp = nfsv2tov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ /*
+ * XXX
+ *
+ * The duplicate information returned in fa_type and fa_mode
+ * is an ambiguity in the NFS version 2 protocol.
+ *
+ * VREG should be taken literally as a regular file. If a
+ * server intents to return some type information differently
+ * in the upper bits of the mode field (e.g. for sockets, or
+ * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we
+ * leave the examination of the mode bits even in the VREG
+ * case to avoid breakage for bogus servers, but we make sure
+ * that there are actually type bits set in the upper part of
+ * fa_mode (and failing that, trust the va_type field).
+ *
+ * NFSv3 cleared the issue, and requires fa_mode to not
+ * contain any type information (while also introduing sockets
+ * and FIFOs for fa_type).
+ */
+ if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
+ vtyp = IFTOVT(vmode);
+ rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
+ fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
+
+ /*
+ * Really ugly NFSv2 kludge.
+ */
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vtyp = VFIFO;
+ }
+
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type != vtyp) {
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+ vp->v_op = fifo_nfsv2nodeop_p;
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ addaliasu(vp, rdev);
+ }
+ np->n_mtime = mtime.tv_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_rdev = rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (v3) {
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_size = fxdr_hyper(&fp->fa3_size);
+ vap->va_blocksize = NFS_FABLKSIZE;
+ vap->va_bytes = fxdr_hyper(&fp->fa3_used);
+ vap->va_fileid = fxdr_unsigned(int32_t,
+ fp->fa3_fileid.nfsuquad[1]);
+ fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
+ fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
+ vap->va_flags = 0;
+ vap->va_filerev = 0;
+ } else {
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
+ vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
+ vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
+ * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
+ fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
+ fp->fa2_ctime.nfsv2_sec);
+ vap->va_ctime.tv_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else {
+ np->n_size = vap->va_size;
+ }
+ vnode_pager_setsize(vp, np->n_size);
+ } else {
+ np->n_size = vap->va_size;
+ }
+ }
+ np->n_attrstamp = time_second;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC)
+ vaper->va_atime = np->n_atim;
+ if (np->n_flag & NUPD)
+ vaper->va_mtime = np->n_mtim;
+ }
+ }
+ return (0);
+}
+
+#ifdef NFS_ACDEBUG
+#include <sys/sysctl.h>
+SYSCTL_DECL(_vfs_nfs);
+static int nfs_acdebug;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
+#endif
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+int
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np;
+ register struct vattr *vap;
+ struct nfsmount *nmp;
+ int timeo;
+
+ np = VTONFS(vp);
+ vap = &np->n_vattr;
+ nmp = VFSTONFS(vp->v_mount);
+ /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
+ timeo = (time_second - np->n_mtime) / 10;
+
+#ifdef NFS_ACDEBUG
+ if (nfs_acdebug>1)
+ printf("nfs_getattrcache: initial timeo = %d\n", timeo);
+#endif
+
+ if (vap->va_type == VDIR) {
+ if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
+ timeo = nmp->nm_acdirmin;
+ else if (timeo > nmp->nm_acdirmax)
+ timeo = nmp->nm_acdirmax;
+ } else {
+ if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
+ timeo = nmp->nm_acregmin;
+ else if (timeo > nmp->nm_acregmax)
+ timeo = nmp->nm_acregmax;
+ }
+
+#ifdef NFS_ACDEBUG
+ if (nfs_acdebug > 2)
+ printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
+ nmp->nm_acregmin, nmp->nm_acregmax,
+ nmp->nm_acdirmin, nmp->nm_acdirmax);
+
+ if (nfs_acdebug)
+ printf("nfs_getattrcache: age = %d; final timeo = %d\n",
+ (time_second - np->n_attrstamp), timeo);
+#endif
+
+ if ((time_second - np->n_attrstamp) >= timeo) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else {
+ np->n_size = vap->va_size;
+ }
+ vnode_pager_setsize(vp, np->n_size);
+ } else {
+ np->n_size = vap->va_size;
+ }
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC)
+ vaper->va_atime = np->n_atim;
+ if (np->n_flag & NUPD)
+ vaper->va_mtime = np->n_mtim;
+ }
+ return (0);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Set up nameidata for a lookup() call and do it.
+ *
+ * If pubflag is set, this call is done for a lookup operation on the
+ * public filehandle. In that case we allow crossing mountpoints and
+ * absolute pathnames. However, the caller is expected to check that
+ * the lookup result is within the public fs, and deny access if
+ * it is not.
+ *
+ * nfs_namei() clears out garbage fields that namei() might leave garbage.
+ * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
+ * error occurs but the parent was not requested.
+ *
+ * dirp may be set whether an error is returned or not, and must be
+ * released by the caller.
+ */
+int
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct sockaddr *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vnode **retdirp;
+ struct proc *p;
+ int kerbflag, pubflag;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp, *cp;
+ struct iovec aiov;
+ struct uio auio;
+ struct vnode *dp;
+ int error, rdonly, linklen;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ *retdirp = (struct vnode *)0;
+ cnp->cn_pnbuf = zalloc(namei_zone);
+
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
+ error = EACCES;
+ goto out;
+ }
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
+ goto out;
+ }
+
+ /*
+ * Extract and set starting directory.
+ */
+ error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly, kerbflag, pubflag);
+ if (error)
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+
+ if (rdonly)
+ cnp->cn_flags |= RDONLY;
+
+ /*
+ * Set return directory. Reference to dp is implicitly transfered
+ * to the returned pointer
+ */
+ *retdirp = dp;
+
+ if (pubflag) {
+ /*
+ * Oh joy. For WebNFS, handle those pesky '%' escapes,
+ * and the 'native path' indicator.
+ */
+ cp = zalloc(namei_zone);
+ fromcp = cnp->cn_pnbuf;
+ tocp = cp;
+ if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
+ switch ((unsigned char)*fromcp) {
+ case WEBNFS_NATIVE_CHAR:
+ /*
+ * 'Native' path for us is the same
+ * as a path according to the NFS spec,
+ * just skip the escape char.
+ */
+ fromcp++;
+ break;
+ /*
+ * More may be added in the future, range 0x80-0xff
+ */
+ default:
+ error = EIO;
+ zfree(namei_zone, cp);
+ goto out;
+ }
+ }
+ /*
+ * Translate the '%' escapes, URL-style.
+ */
+ while (*fromcp != '\0') {
+ if (*fromcp == WEBNFS_ESC_CHAR) {
+ if (fromcp[1] != '\0' && fromcp[2] != '\0') {
+ fromcp++;
+ *tocp++ = HEXSTRTOI(fromcp);
+ fromcp += 2;
+ continue;
+ } else {
+ error = ENOENT;
+ zfree(namei_zone, cp);
+ goto out;
+ }
+ } else
+ *tocp++ = *fromcp++;
+ }
+ *tocp = '\0';
+ zfree(namei_zone, cnp->cn_pnbuf);
+ cnp->cn_pnbuf = cp;
+ }
+
+ ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
+ ndp->ni_segflg = UIO_SYSSPACE;
+
+ if (pubflag) {
+ ndp->ni_rootdir = rootvnode;
+ ndp->ni_loopcnt = 0;
+ if (cnp->cn_pnbuf[0] == '/')
+ dp = rootvnode;
+ } else {
+ cnp->cn_flags |= NOCROSSMOUNT;
+ }
+
+ /*
+ * Initialize for scan, set ni_startdir and bump ref on dp again
+ * becuase lookup() will dereference ni_startdir.
+ */
+
+ cnp->cn_proc = p;
+ VREF(dp);
+ ndp->ni_startdir = dp;
+
+ for (;;) {
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Call lookup() to do the real work. If an error occurs,
+ * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
+ * we do not have to dereference anything before returning.
+ * In either case ni_startdir will be dereferenced and NULLed
+ * out.
+ */
+ error = lookup(ndp);
+ if (error)
+ break;
+
+ /*
+ * Check for encountering a symbolic link. Trivial
+ * termination occurs if no symlink encountered.
+ * Note: zfree is safe because error is 0, so we will
+ * not zfree it again when we break.
+ */
+ if ((cnp->cn_flags & ISSYMLINK) == 0) {
+ nfsrv_object_create(ndp->ni_vp);
+ if (cnp->cn_flags & (SAVENAME | SAVESTART))
+ cnp->cn_flags |= HASBUF;
+ else
+ zfree(namei_zone, cnp->cn_pnbuf);
+ break;
+ }
+
+ /*
+ * Validate symlink
+ */
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ VOP_UNLOCK(ndp->ni_dvp, 0, p);
+ if (!pubflag) {
+ error = EINVAL;
+ goto badlink2;
+ }
+
+ if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+ error = ELOOP;
+ goto badlink2;
+ }
+ if (ndp->ni_pathlen > 1)
+ cp = zalloc(namei_zone);
+ else
+ cp = cnp->cn_pnbuf;
+ aiov.iov_base = cp;
+ aiov.iov_len = MAXPATHLEN;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = (struct proc *)0;
+ auio.uio_resid = MAXPATHLEN;
+ error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
+ if (error) {
+ badlink1:
+ if (ndp->ni_pathlen > 1)
+ zfree(namei_zone, cp);
+ badlink2:
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ break;
+ }
+ linklen = MAXPATHLEN - auio.uio_resid;
+ if (linklen == 0) {
+ error = ENOENT;
+ goto badlink1;
+ }
+ if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
+ error = ENAMETOOLONG;
+ goto badlink1;
+ }
+
+ /*
+ * Adjust or replace path
+ */
+ if (ndp->ni_pathlen > 1) {
+ bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+ zfree(namei_zone, cnp->cn_pnbuf);
+ cnp->cn_pnbuf = cp;
+ } else
+ cnp->cn_pnbuf[linklen] = '\0';
+ ndp->ni_pathlen += linklen;
+
+ /*
+ * Cleanup refs for next loop and check if root directory
+ * should replace current directory. Normally ni_dvp
+ * becomes the new base directory and is cleaned up when
+ * we loop. Explicitly null pointers after invalidation
+ * to clarify operation.
+ */
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+
+ if (cnp->cn_pnbuf[0] == '/') {
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = ndp->ni_rootdir;
+ VREF(ndp->ni_dvp);
+ }
+ ndp->ni_startdir = ndp->ni_dvp;
+ ndp->ni_dvp = NULL;
+ }
+
+ /*
+ * nfs_namei() guarentees that fields will not contain garbage
+ * whether an error occurs or not. This allows the caller to track
+ * cleanup state trivially.
+ */
+out:
+ if (error) {
+ zfree(namei_zone, cnp->cn_pnbuf);
+ ndp->ni_vp = NULL;
+ ndp->ni_dvp = NULL;
+ ndp->ni_startdir = NULL;
+ cnp->cn_flags &= ~HASBUF;
+ } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
+ ndp->ni_dvp = NULL;
+ }
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ for (m = m->m_next;m;m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * Make these functions instead of macros, so that the kernel text size
+ * doesn't get too big...
+ */
+void
+nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
+ struct nfsrv_descript *nfsd;
+ int before_ret;
+ register struct vattr *before_vap;
+ int after_ret;
+ struct vattr *after_vap;
+ struct mbuf **mbp;
+ char **bposp;
+{
+ register struct mbuf *mb = *mbp, *mb2;
+ register char *bpos = *bposp;
+ register u_int32_t *tl;
+
+ if (before_ret) {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = nfs_false;
+ } else {
+ nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
+ *tl++ = nfs_true;
+ txdr_hyper(before_vap->va_size, tl);
+ tl += 2;
+ txdr_nfsv3time(&(before_vap->va_mtime), tl);
+ tl += 2;
+ txdr_nfsv3time(&(before_vap->va_ctime), tl);
+ }
+ *bposp = bpos;
+ *mbp = mb;
+ nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
+}
+
+void
+nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
+ struct nfsrv_descript *nfsd;
+ int after_ret;
+ struct vattr *after_vap;
+ struct mbuf **mbp;
+ char **bposp;
+{
+ register struct mbuf *mb = *mbp, *mb2;
+ register char *bpos = *bposp;
+ register u_int32_t *tl;
+ register struct nfs_fattr *fp;
+
+ if (after_ret) {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = nfs_false;
+ } else {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
+ *tl++ = nfs_true;
+ fp = (struct nfs_fattr *)tl;
+ nfsm_srvfattr(nfsd, after_vap, fp);
+ }
+ *mbp = mb;
+ *bposp = bpos;
+}
+
+void
+nfsm_srvfattr(nfsd, vap, fp)
+ register struct nfsrv_descript *nfsd;
+ register struct vattr *vap;
+ register struct nfs_fattr *fp;
+{
+
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink);
+ fp->fa_uid = txdr_unsigned(vap->va_uid);
+ fp->fa_gid = txdr_unsigned(vap->va_gid);
+ if (nfsd->nd_flag & ND_NFSV3) {
+ fp->fa_type = vtonfsv3_type(vap->va_type);
+ fp->fa_mode = vtonfsv3_mode(vap->va_mode);
+ txdr_hyper(vap->va_size, &fp->fa3_size);
+ txdr_hyper(vap->va_bytes, &fp->fa3_used);
+ fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev));
+ fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev));
+ fp->fa3_fsid.nfsuquad[0] = 0;
+ fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
+ fp->fa3_fileid.nfsuquad[0] = 0;
+ fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
+ txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
+ txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
+ txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
+ } else {
+ fp->fa_type = vtonfsv2_type(vap->va_type);
+ fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
+ fp->fa2_size = txdr_unsigned(vap->va_size);
+ fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
+ if (vap->va_type == VFIFO)
+ fp->fa2_rdev = 0xffffffff;
+ else
+ fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
+ fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
+ fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
+ fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
+ txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
+ txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
+ txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
+ }
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+int
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct sockaddr *nam;
+ int *rdonlyp;
+ int kerbflag;
+ int pubflag;
+{
+ struct proc *p = curproc; /* XXX */
+ register struct mount *mp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+#ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */
+ struct sockaddr_int *saddr;
+#endif
+
+ *vpp = (struct vnode *)0;
+
+ if (nfs_ispublicfh(fhp)) {
+ if (!pubflag || !nfs_pub.np_valid)
+ return (ESTALE);
+ fhp = &nfs_pub.np_handle;
+ }
+
+ mp = vfs_getvfs(&fhp->fh_fsid);
+ if (!mp)
+ return (ESTALE);
+ error = VFS_CHECKEXP(mp, nam, &exflags, &credanon);
+ if (error)
+ return (error);
+ error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
+ if (error)
+ return (error);
+#ifdef MNT_EXNORESPORT
+ if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
+ saddr = (struct sockaddr_in *)nam;
+ if (saddr->sin_family == AF_INET &&
+ ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ }
+ }
+#endif
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ if (!kerbflag) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ }
+ } else if (kerbflag) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ cred->cr_ngroups = i;
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+
+ nfsrv_object_create(*vpp);
+
+ if (!lockflag)
+ VOP_UNLOCK(*vpp, 0, p);
+ return (0);
+}
+
+
+/*
+ * WebNFS: check if a filehandle is a public filehandle. For v3, this
+ * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
+ * transformed this to all zeroes in both cases, so check for it.
+ */
+int
+nfs_ispublicfh(fhp)
+ fhandle_t *fhp;
+{
+ char *cp = (char *)fhp;
+ int i;
+
+ for (i = 0; i < NFSX_V3FH; i++)
+ if (*cp++ != 0)
+ return (FALSE);
+ return (TRUE);
+}
+
+#endif /* NFS_NOSERVER */
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+int
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct sockaddr *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = (struct sockaddr_in *)nam;
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = (struct sockaddr_iso *)nam;
+ isoaddr2 = (struct sockaddr_iso *)haddr->had_nam;
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
+
+static nfsuint64 nfs_nullcookie = { { 0, 0 } };
+/*
+ * This function finds the directory cookie that corresponds to the
+ * logical byte offset given.
+ */
+nfsuint64 *
+nfs_getcookie(np, off, add)
+ register struct nfsnode *np;
+ off_t off;
+ int add;
+{
+ register struct nfsdmap *dp, *dp2;
+ register int pos;
+
+ pos = (uoff_t)off / NFS_DIRBLKSIZ;
+ if (pos == 0 || off < 0) {
+#ifdef DIAGNOSTIC
+ if (add)
+ panic("nfs getcookie add at <= 0");
+#endif
+ return (&nfs_nullcookie);
+ }
+ pos--;
+ dp = np->n_cookies.lh_first;
+ if (!dp) {
+ if (add) {
+ MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
+ M_NFSDIROFF, M_WAITOK);
+ dp->ndm_eocookie = 0;
+ LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
+ } else
+ return ((nfsuint64 *)0);
+ }
+ while (pos >= NFSNUMCOOKIES) {
+ pos -= NFSNUMCOOKIES;
+ if (dp->ndm_list.le_next) {
+ if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
+ pos >= dp->ndm_eocookie)
+ return ((nfsuint64 *)0);
+ dp = dp->ndm_list.le_next;
+ } else if (add) {
+ MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
+ M_NFSDIROFF, M_WAITOK);
+ dp2->ndm_eocookie = 0;
+ LIST_INSERT_AFTER(dp, dp2, ndm_list);
+ dp = dp2;
+ } else
+ return ((nfsuint64 *)0);
+ }
+ if (pos >= dp->ndm_eocookie) {
+ if (add)
+ dp->ndm_eocookie = pos + 1;
+ else
+ return ((nfsuint64 *)0);
+ }
+ return (&dp->ndm_cookies[pos]);
+}
+
+/*
+ * Invalidate cached directory information, except for the actual directory
+ * blocks (which are invalidated separately).
+ * Done mainly to avoid the use of stale offset cookies.
+ */
+void
+nfs_invaldir(vp)
+ register struct vnode *vp;
+{
+ register struct nfsnode *np = VTONFS(vp);
+
+#ifdef DIAGNOSTIC
+ if (vp->v_type != VDIR)
+ panic("nfs: invaldir not dir");
+#endif
+ np->n_direofoffset = 0;
+ np->n_cookieverf.nfsuquad[0] = 0;
+ np->n_cookieverf.nfsuquad[1] = 0;
+ if (np->n_cookies.lh_first)
+ np->n_cookies.lh_first->ndm_eocookie = 0;
+}
+
+/*
+ * The write verifier has changed (probably due to a server reboot), so all
+ * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
+ * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
+ * and B_CLUSTEROK flags. Once done the new write verifier can be set for the
+ * mount point.
+ *
+ * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
+ * writes are not clusterable.
+ */
+void
+nfs_clearcommit(mp)
+ struct mount *mp;
+{
+ register struct vnode *vp, *nvp;
+ register struct buf *bp, *nbp;
+ int s;
+
+ s = splbio();
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp) /* Paranoia */
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if (BUF_REFCNT(bp) == 0 &&
+ (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
+ == (B_DELWRI | B_NEEDCOMMIT))
+ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
+ }
+ }
+ splx(s);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Map errnos to NFS error numbers. For Version 3 also filter out error
+ * numbers not specified for the associated procedure.
+ */
+int
+nfsrv_errmap(nd, err)
+ struct nfsrv_descript *nd;
+ register int err;
+{
+ register short *defaulterrp, *errp;
+
+ if (nd->nd_flag & ND_NFSV3) {
+ if (nd->nd_procnum <= NFSPROC_COMMIT) {
+ errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
+ while (*++errp) {
+ if (*errp == err)
+ return (err);
+ else if (*errp > err)
+ break;
+ }
+ return ((int)*defaulterrp);
+ } else
+ return (err & 0xffff);
+ }
+ if (err <= ELAST)
+ return ((int)nfsrv_v2errmap[err - 1]);
+ return (NFSERR_IO);
+}
+
+int
+nfsrv_object_create(vp)
+ struct vnode *vp;
+{
+
+ if (vp == NULL || vp->v_type != VREG)
+ return (1);
+ return (vfs_object_create(vp, curproc,
+ curproc ? curproc->p_ucred : NULL));
+}
+
+/*
+ * Sort the group list in increasing numerical order.
+ * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
+ * that used to be here.)
+ */
+void
+nfsrvw_sort(list, num)
+ register gid_t *list;
+ register int num;
+{
+ register int i, j;
+ gid_t v;
+
+ /* Insertion sort. */
+ for (i = 1; i < num; i++) {
+ v = list[i];
+ /* find correct slot for value v, moving others up */
+ for (j = i; --j >= 0 && v < list[j];)
+ list[j + 1] = list[j];
+ list[j + 1] = v;
+ }
+}
+
+/*
+ * copy credentials making sure that the result can be compared with bcmp().
+ */
+void
+nfsrv_setcred(incred, outcred)
+ register struct ucred *incred, *outcred;
+{
+ register int i;
+
+ bzero((caddr_t)outcred, sizeof (struct ucred));
+ outcred->cr_ref = 1;
+ outcred->cr_uid = incred->cr_uid;
+ outcred->cr_ngroups = incred->cr_ngroups;
+ for (i = 0; i < incred->cr_ngroups; i++)
+ outcred->cr_groups[i] = incred->cr_groups[i];
+ nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
+}
+#endif /* NFS_NOSERVER */
diff --git a/sys/nfs/nfs_common.h b/sys/nfs/nfs_common.h
new file mode 100644
index 0000000..b13c009
--- /dev/null
+++ b/sys/nfs/nfs_common.h
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.2 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NFSM_SUBS_H_
+#define _NFS_NFSM_SUBS_H_
+
+struct ucred;
+struct vnode;
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+struct mbuf *nfsm_reqh __P((struct vnode *vp, u_long procid, int hsiz,
+ caddr_t *bposp));
+struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
+ int auth_type, int auth_len, char *auth_str,
+ int verf_len, char *verf_str,
+ struct mbuf *mrest, int mrest_len,
+ struct mbuf **mbp, u_int32_t *xidp));
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ do { \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat; \
+ } while (0)
+#define NFSMADV(m, s) \
+ do { \
+ (m)->m_data += (s); \
+ } while (0)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ do { \
+ if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); \
+ } while (0)
+
+#define nfsm_dissect(a, c, s) \
+ do { \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if ((t1 = nfsm_disct(&md, &dpos, (s), t1, &cp2)) != 0){ \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } \
+ } while (0)
+
+#define nfsm_fhtom(v, v3) \
+ do { \
+ if (v3) { \
+ t2 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl, u_int32_t *, t2); \
+ *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); \
+ *(tl + ((t2>>2) - 2)) = 0; \
+ bcopy((caddr_t)VTONFS(v)->n_fhp,(caddr_t)tl, \
+ VTONFS(v)->n_fhsize); \
+ } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, \
+ (caddr_t)VTONFS(v)->n_fhp, \
+ VTONFS(v)->n_fhsize)) != 0) { \
+ error = t2; \
+ m_freem(mreq); \
+ goto nfsmout; \
+ } \
+ } else { \
+ nfsm_build(cp, caddr_t, NFSX_V2FH); \
+ bcopy((caddr_t)VTONFS(v)->n_fhp, cp, NFSX_V2FH); \
+ } \
+ } while (0)
+
+#define nfsm_srvfhtom(f, v3) \
+ do { \
+ if (v3) { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FH);\
+ *tl++ = txdr_unsigned(NFSX_V3FH); \
+ bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \
+ } else { \
+ nfsm_build(cp, caddr_t, NFSX_V2FH); \
+ bcopy((caddr_t)(f), cp, NFSX_V2FH); \
+ } \
+ } while (0)
+
+#define nfsm_srvpostop_fh(f) \
+ do { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_V3FH); \
+ *tl++ = nfs_true; \
+ *tl++ = txdr_unsigned(NFSX_V3FH); \
+ bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \
+ } while (0)
+
+#define nfsm_mtofh(d, v, v3, f) \
+ do { \
+ struct nfsnode *ttnp; nfsfh_t *ttfhp; int ttfhsize; \
+ if (v3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (f) = fxdr_unsigned(int, *tl); \
+ } else \
+ (f) = 1; \
+ if (f) { \
+ nfsm_getfh(ttfhp, ttfhsize, (v3)); \
+ if ((t1 = nfs_nget((d)->v_mount, ttfhp, ttfhsize, \
+ &ttnp)) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(ttnp); \
+ } \
+ if (v3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (f) \
+ (f) = fxdr_unsigned(int, *tl); \
+ else if (fxdr_unsigned(int, *tl)) \
+ nfsm_adv(NFSX_V3FATTR); \
+ } \
+ if (f) \
+ nfsm_loadattr((v), (struct vattr *)0); \
+ } while (0)
+
+#define nfsm_getfh(f, s, v3) \
+ do { \
+ if (v3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int, *tl)) <= 0 || \
+ (s) > NFSX_V3FHMAX) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } \
+ } else \
+ (s) = NFSX_V2FH; \
+ nfsm_dissect((f), nfsfh_t *, nfsm_rndup(s)); \
+ } while (0)
+
+#define nfsm_loadattr(v, a) \
+ do { \
+ struct vnode *ttvp = (v); \
+ if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a))) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = ttvp; \
+ } while (0)
+
+#define nfsm_postop_attr(v, f) \
+ do { \
+ struct vnode *ttvp = (v); \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (((f) = fxdr_unsigned(int, *tl)) != 0) { \
+ if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \
+ (struct vattr *)0)) != 0) { \
+ error = t1; \
+ (f) = 0; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = ttvp; \
+ } \
+ } while (0)
+
+/* Used as (f) for nfsm_wcc_data() */
+#define NFSV3_WCCRATTR 0
+#define NFSV3_WCCCHK 1
+
+#define nfsm_wcc_data(v, f) \
+ do { \
+ int ttattrf, ttretf = 0; \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED); \
+ if (f) \
+ ttretf = (VTONFS(v)->n_mtime == \
+ fxdr_unsigned(u_int32_t, *(tl + 2))); \
+ } \
+ nfsm_postop_attr((v), ttattrf); \
+ if (f) { \
+ (f) = ttretf; \
+ } else { \
+ (f) = ttattrf; \
+ } \
+ } while (0)
+
+/* If full is true, set all fields, otherwise just set mode and time fields */
+#define nfsm_v3attrbuild(a, full) \
+ do { \
+ if ((a)->va_mode != (mode_t)VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ *tl = txdr_unsigned((a)->va_mode); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((full) && (a)->va_uid != (uid_t)VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ *tl = txdr_unsigned((a)->va_uid); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((full) && (a)->va_gid != (gid_t)VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ *tl = txdr_unsigned((a)->va_gid); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((full) && (a)->va_size != VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ txdr_hyper((a)->va_size, tl); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((a)->va_atime.tv_sec != VNOVAL) { \
+ if ((a)->va_atime.tv_sec != time_second) { \
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\
+ *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\
+ txdr_nfsv3time(&(a)->va_atime, tl); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \
+ } \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \
+ } \
+ if ((a)->va_mtime.tv_sec != VNOVAL) { \
+ if ((a)->va_mtime.tv_sec != time_second) { \
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\
+ *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\
+ txdr_nfsv3time(&(a)->va_mtime, tl); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \
+ } \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \
+ } \
+ } while (0)
+
+
+#define nfsm_strsiz(s,m) \
+ do { \
+ nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int32_t,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_srvstrsiz(s,m) \
+ do { \
+ nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int32_t,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } \
+ } while (0)
+
+#define nfsm_srvnamesiz(s) \
+ do { \
+ nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int32_t,*tl)) > NFS_MAXNAMLEN) \
+ error = NFSERR_NAMETOL; \
+ if ((s) <= 0) \
+ error = EBADRPC; \
+ if (error) \
+ nfsm_reply(0); \
+ } while (0)
+
+#define nfsm_mtouio(p,s) \
+ do {\
+ if ((s) > 0 && \
+ (t1 = nfsm_mbuftouio(&md,(p),(s),&dpos)) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_uiotom(p,s) \
+ do { \
+ if ((t1 = nfsm_uiotombuf((p),&mb,(s),&bpos)) != 0) { \
+ error = t1; \
+ m_freem(mreq); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_reqhead(v,a,s) \
+ do { \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos); \
+ } while (0)
+
+#define nfsm_reqdone \
+ do { \
+ m_freem(mrep); \
+ nfsmout: \
+ } while (0)
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ do { \
+ if ((error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) != 0) { \
+ if (error & NFSERR_RETERR) \
+ error &= ~NFSERR_RETERR; \
+ else \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_strtom(a,s,m) \
+ do {\
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_int32_t *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((const char *)(a), (caddr_t)tl, (s)); \
+ } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, (a), (s))) != 0) { \
+ error = t2; \
+ m_freem(mreq); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_srvdone \
+ do { \
+ nfsmout: \
+ return (error); \
+ } while (0)
+
+#define nfsm_reply(s) \
+ do { \
+ nfsd->nd_repstat = error; \
+ if (error && !(nfsd->nd_flag & ND_NFSV3)) \
+ (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ if (mrep != NULL) { \
+ m_freem(mrep); \
+ mrep = NULL; \
+ } \
+ mreq = *mrq; \
+ if (error && (!(nfsd->nd_flag & ND_NFSV3) || \
+ error == EBADRPC)) { \
+ error = 0; \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_writereply(s, v3) \
+ do { \
+ nfsd->nd_repstat = error; \
+ if (error && !(v3)) \
+ (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \
+ &mreq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \
+ &mreq, &mb, &bpos); \
+ } while (0)
+
+#define nfsm_adv(s) \
+ do { \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if ((t1 = nfs_adv(&md, &dpos, (s), t1)) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_srvmtofh(f) \
+ do { \
+ int fhlen; \
+ if (nfsd->nd_flag & ND_NFSV3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ fhlen = fxdr_unsigned(int, *tl); \
+ if (fhlen != 0 && fhlen != NFSX_V3FH) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } \
+ } else { \
+ fhlen = NFSX_V2FH; \
+ } \
+ if (fhlen != 0) { \
+ nfsm_dissect(tl, u_int32_t *, fhlen); \
+ bcopy((caddr_t)tl, (caddr_t)(f), fhlen); \
+ } else {\
+ bzero((caddr_t)(f), NFSX_V3FH); \
+ } \
+ } while (0)
+
+#define nfsm_clget \
+ do { \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_int32_t *)bp; \
+ } while (0)
+
+#define nfsm_srvfillattr(a, f) \
+ do { \
+ nfsm_srvfattr(nfsd, (a), (f)); \
+ } while (0)
+
+#define nfsm_srvwcc_data(br, b, ar, a) \
+ do { \
+ nfsm_srvwcc(nfsd, (br), (b), (ar), (a), &mb, &bpos); \
+ } while (0)
+
+#define nfsm_srvpostop_attr(r, a) \
+ do { \
+ nfsm_srvpostopattr(nfsd, (r), (a), &mb, &bpos); \
+ } while (0)
+
+#define nfsm_srvsattr(a) \
+ do { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (a)->va_mode = nfstov_mode(*tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (a)->va_uid = fxdr_unsigned(uid_t, *tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (a)->va_gid = fxdr_unsigned(gid_t, *tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ (a)->va_size = fxdr_hyper(tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ switch (fxdr_unsigned(int, *tl)) { \
+ case NFSV3SATTRTIME_TOCLIENT: \
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ fxdr_nfsv3time(tl, &(a)->va_atime); \
+ break; \
+ case NFSV3SATTRTIME_TOSERVER: \
+ getnanotime(&(a)->va_atime); \
+ break; \
+ }; \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ switch (fxdr_unsigned(int, *tl)) { \
+ case NFSV3SATTRTIME_TOCLIENT: \
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ fxdr_nfsv3time(tl, &(a)->va_mtime); \
+ break; \
+ case NFSV3SATTRTIME_TOSERVER: \
+ getnanotime(&(a)->va_mtime); \
+ break; \
+ } \
+ } while (0)
+
+#endif
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
new file mode 100644
index 0000000..89cbdce
--- /dev/null
+++ b/sys/nfs/nfs_node.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_node.c 8.6 (Berkeley) 5/22/95
+ * $FreeBSD$
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm_zone.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+
+static vm_zone_t nfsnode_zone;
+static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl;
+static u_long nfsnodehash;
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Initialize hash links for nfsnodes
+ * and build nfsnode free list.
+ */
+void
+nfs_nhinit()
+{
+ nfsnode_zone = zinit("NFSNODE", sizeof(struct nfsnode), 0, 0, 1);
+ nfsnodehashtbl = hashinit(desiredvnodes, M_NFSHASH, &nfsnodehash);
+}
+
+/*
+ * Compute an entry in the NFS hash table structure
+ */
+u_long
+nfs_hash(fhp, fhsize)
+ register nfsfh_t *fhp;
+ int fhsize;
+{
+ register u_char *fhpp;
+ register u_long fhsum;
+ register int i;
+
+ fhpp = &fhp->fh_bytes[0];
+ fhsum = 0;
+ for (i = 0; i < fhsize; i++)
+ fhsum += *fhpp++;
+ return (fhsum);
+}
+
+/*
+ * Look up a vnode/nfsnode by file handle.
+ * Callers must check for mount points!!
+ * In all cases, a pointer to a
+ * nfsnode structure is returned.
+ */
+static int nfs_node_hash_lock;
+
+int
+nfs_nget(mntp, fhp, fhsize, npp)
+ struct mount *mntp;
+ register nfsfh_t *fhp;
+ int fhsize;
+ struct nfsnode **npp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct nfsnode *np, *np2;
+ struct nfsnodehashhead *nhpp;
+ register struct vnode *vp;
+ struct vnode *nvp;
+ int error;
+
+retry:
+ nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
+loop:
+ for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) {
+ if (mntp != NFSTOV(np)->v_mount || np->n_fhsize != fhsize ||
+ bcmp((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize))
+ continue;
+ vp = NFSTOV(np);
+ if (vget(vp, LK_EXCLUSIVE, p))
+ goto loop;
+ *npp = np;
+ return(0);
+ }
+ /*
+ * Obtain a lock to prevent a race condition if the getnewvnode()
+ * or MALLOC() below happens to block.
+ */
+ if (nfs_node_hash_lock) {
+ while (nfs_node_hash_lock) {
+ nfs_node_hash_lock = -1;
+ tsleep(&nfs_node_hash_lock, PVM, "nfsngt", 0);
+ }
+ goto loop;
+ }
+ nfs_node_hash_lock = 1;
+
+ /*
+ * Allocate before getnewvnode since doing so afterward
+ * might cause a bogus v_data pointer to get dereferenced
+ * elsewhere if zalloc should block.
+ */
+ np = zalloc(nfsnode_zone);
+
+ error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp);
+ if (error) {
+ if (nfs_node_hash_lock < 0)
+ wakeup(&nfs_node_hash_lock);
+ nfs_node_hash_lock = 0;
+ *npp = 0;
+ zfree(nfsnode_zone, np);
+ return (error);
+ }
+ vp = nvp;
+ bzero((caddr_t)np, sizeof *np);
+ vp->v_data = np;
+ np->n_vnode = vp;
+ /*
+ * Insert the nfsnode in the hash queue for its new file handle
+ */
+ for (np2 = nhpp->lh_first; np2 != 0; np2 = np2->n_hash.le_next) {
+ if (mntp != NFSTOV(np2)->v_mount || np2->n_fhsize != fhsize ||
+ bcmp((caddr_t)fhp, (caddr_t)np2->n_fhp, fhsize))
+ continue;
+ vrele(vp);
+ if (nfs_node_hash_lock < 0)
+ wakeup(&nfs_node_hash_lock);
+ nfs_node_hash_lock = 0;
+ zfree(nfsnode_zone, np);
+ goto retry;
+ }
+ LIST_INSERT_HEAD(nhpp, np, n_hash);
+ if (fhsize > NFS_SMALLFH) {
+ MALLOC(np->n_fhp, nfsfh_t *, fhsize, M_NFSBIGFH, M_WAITOK);
+ } else
+ np->n_fhp = &np->n_fh;
+ bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
+ np->n_fhsize = fhsize;
+ lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE);
+ *npp = np;
+
+ if (nfs_node_hash_lock < 0)
+ wakeup(&nfs_node_hash_lock);
+ nfs_node_hash_lock = 0;
+
+ /*
+ * Lock the new nfsnode.
+ */
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+
+ return (0);
+}
+
+int
+nfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct nfsnode *np;
+ register struct sillyrename *sp;
+ struct proc *p = curproc; /* XXX */
+
+ np = VTONFS(ap->a_vp);
+ if (prtactive && ap->a_vp->v_usecount != 0)
+ vprint("nfs_inactive: pushing active", ap->a_vp);
+ if (ap->a_vp->v_type != VDIR) {
+ sp = np->n_sillyrename;
+ np->n_sillyrename = (struct sillyrename *)0;
+ } else
+ sp = (struct sillyrename *)0;
+ if (sp) {
+ /*
+ * We need a reference to keep the vnode from being
+ * recycled by getnewvnode while we do the I/O
+ * associated with discarding the buffers unless we
+ * are being forcibly unmounted in which case we already
+ * have our own reference.
+ */
+ if (ap->a_vp->v_usecount > 0)
+ (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+ else if (vget(ap->a_vp, 0, p))
+ panic("nfs_inactive: lost vnode");
+ else {
+ (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+ vrele(ap->a_vp);
+ }
+ /*
+ * Remove the silly file that was rename'd earlier
+ */
+ nfs_removeit(sp);
+ crfree(sp->s_cred);
+ vrele(sp->s_dvp);
+ FREE((caddr_t)sp, M_NFSREQ);
+ }
+ np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED |
+ NQNFSNONCACHE | NQNFSWRITE);
+ VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ return (0);
+}
+
+/*
+ * Reclaim an nfsnode so that it can be used for other purposes.
+ */
+int
+nfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ register struct nfsdmap *dp, *dp2;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("nfs_reclaim: pushing active", vp);
+
+ if (np->n_hash.le_prev != NULL)
+ LIST_REMOVE(np, n_hash);
+
+ /*
+ * For nqnfs, take it off the timer queue as required.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_timer.cqe_next != 0) {
+ CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer);
+ }
+
+ /*
+ * Free up any directory cookie structures and
+ * large file handle structures that might be associated with
+ * this nfs node.
+ */
+ if (vp->v_type == VDIR) {
+ dp = np->n_cookies.lh_first;
+ while (dp) {
+ dp2 = dp;
+ dp = dp->ndm_list.le_next;
+ FREE((caddr_t)dp2, M_NFSDIROFF);
+ }
+ }
+ if (np->n_fhsize > NFS_SMALLFH) {
+ FREE((caddr_t)np->n_fhp, M_NFSBIGFH);
+ }
+
+ cache_purge(vp);
+ zfree(nfsnode_zone, vp->v_data);
+ vp->v_data = (void *)0;
+ return (0);
+}
+
+#if 0
+/*
+ * Lock an nfsnode
+ */
+int
+nfs_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ /*
+ * Ugh, another place where interruptible mounts will get hung.
+ * If you make this sleep interruptible, then you have to fix all
+ * the VOP_LOCK() calls to expect interruptibility.
+ */
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ (void) tsleep((caddr_t)vp, PINOD, "nfslck", 0);
+ }
+ if (vp->v_tag == VT_NON)
+ return (ENOENT);
+
+#if 0
+ /*
+ * Only lock regular files. If a server crashed while we were
+ * holding a directory lock, we could easily end up sleeping
+ * until the server rebooted while holding a lock on the root.
+ * Locks are only needed for protecting critical sections in
+ * VMIO at the moment.
+ * New vnodes will have type VNON but they should be locked
+ * since they may become VREG. This is checked in loadattrcache
+ * and unwanted locks are released there.
+ */
+ if (vp->v_type == VREG || vp->v_type == VNON) {
+ while (np->n_flag & NLOCKED) {
+ np->n_flag |= NWANTED;
+ (void) tsleep((caddr_t) np, PINOD, "nfslck2", 0);
+ /*
+ * If the vnode has transmuted into a VDIR while we
+ * were asleep, then skip the lock.
+ */
+ if (vp->v_type != VREG && vp->v_type != VNON)
+ return (0);
+ }
+ np->n_flag |= NLOCKED;
+ }
+#endif
+
+ return (0);
+}
+
+/*
+ * Unlock an nfsnode
+ */
+int
+nfs_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+#if 0
+ struct vnode* vp = ap->a_vp;
+ struct nfsnode* np = VTONFS(vp);
+
+ if (vp->v_type == VREG || vp->v_type == VNON) {
+ if (!(np->n_flag & NLOCKED))
+ panic("nfs_unlock: nfsnode not locked");
+ np->n_flag &= ~NLOCKED;
+ if (np->n_flag & NWANTED) {
+ np->n_flag &= ~NWANTED;
+ wakeup((caddr_t) np);
+ }
+ }
+#endif
+
+ return (0);
+}
+
+/*
+ * Check for a locked nfsnode
+ */
+int
+nfs_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return VTONFS(ap->a_vp)->n_flag & NLOCKED ? 1 : 0;
+}
+#endif
+
diff --git a/sys/nfs/nfs_nqlease.c b/sys/nfs/nfs_nqlease.c
new file mode 100644
index 0000000..ccaa848
--- /dev/null
+++ b/sys/nfs/nfs_nqlease.c
@@ -0,0 +1,1307 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_nqlease.c 8.9 (Berkeley) 5/20/95
+ * $FreeBSD$
+ */
+
+
+/*
+ * References:
+ * Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant
+ * Mechanism for Distributed File Cache Consistency",
+ * In Proc. of the Twelfth ACM Symposium on Operating Systems
+ * Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989.
+ * Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching
+ * in the Sprite Network File System", ACM TOCS 6(1),
+ * pages 134-154, February 1988.
+ * V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and
+ * Performance of Cache-Consistency Protocols", Digital
+ * Equipment Corporation WRL Research Report 89/5, May 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+
+#include <vm/vm_zone.h>
+
+#include <netinet/in.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+
+static MALLOC_DEFINE(M_NQMHOST, "NQNFS Host", "Nqnfs host address table");
+
+time_t nqnfsstarttime = (time_t)0;
+int nqsrv_clockskew = NQ_CLOCKSKEW;
+int nqsrv_writeslack = NQ_WRITESLACK;
+int nqsrv_maxlease = NQ_MAXLEASE;
+#ifndef NFS_NOSERVER
+static int nqsrv_maxnumlease = NQ_MAXNUMLEASE;
+#endif
+
+struct vop_lease_args;
+
+#ifndef NFS_NOSERVER
+static int nqsrv_cmpnam __P((struct nfssvc_sock *, struct sockaddr *,
+ struct nqhost *));
+static int nqnfs_vacated __P((struct vnode *vp, struct ucred *cred));
+static void nqsrv_addhost __P((struct nqhost *lph, struct nfssvc_sock *slp,
+ struct sockaddr *nam));
+static void nqsrv_instimeq __P((struct nqlease *lp, u_int32_t duration));
+static void nqsrv_locklease __P((struct nqlease *lp));
+static void nqsrv_send_eviction __P((struct vnode *vp, struct nqlease *lp,
+ struct nfssvc_sock *slp,
+ struct sockaddr *nam,
+ struct ucred *cred));
+static void nqsrv_unlocklease __P((struct nqlease *lp));
+static void nqsrv_waitfor_expiry __P((struct nqlease *lp));
+#endif
+extern void nqnfs_lease_updatetime __P((int deltat));
+
+/*
+ * Signifies which rpcs can have piggybacked lease requests
+ */
+int nqnfs_piggy[NFS_NPROCS] = {
+ 0,
+ 0,
+ ND_WRITE,
+ ND_READ,
+ 0,
+ ND_READ,
+ ND_READ,
+ ND_WRITE,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ ND_READ,
+ ND_READ,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+};
+
+extern nfstype nfsv2_type[9];
+extern nfstype nfsv3_type[9];
+extern int nfsd_waiting;
+extern struct nfsstats nfsstats;
+
+#define TRUE 1
+#define FALSE 0
+
+#ifndef NFS_NOSERVER
+/*
+ * Get or check for a lease for "vp", based on ND_CHECK flag.
+ * The rules are as follows:
+ * - if a current non-caching lease, reply non-caching
+ * - if a current lease for same host only, extend lease
+ * - if a read cachable lease and a read lease request
+ * add host to list any reply cachable
+ * - else { set non-cachable for read-write sharing }
+ * send eviction notice messages to all other hosts that have lease
+ * wait for lease termination { either by receiving vacated messages
+ * from all the other hosts or expiry
+ * via. timeout }
+ * modify lease to non-cachable
+ * - else if no current lease, issue new one
+ * - reply
+ * - return boolean TRUE iff nam should be m_freem()'d
+ * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep()
+ * in here must be framed by nqsrv_locklease() and nqsrv_unlocklease().
+ * nqsrv_locklease() is coded such that at least one of LC_LOCKED and
+ * LC_WANTED is set whenever a process is tsleeping in it. The exception
+ * is when a new lease is being allocated, since it is not in the timer
+ * queue yet. (Ditto for the splsoftclock() and splx(s) calls)
+ */
+int
+nqsrv_getlease(vp, duration, flags, slp, procp, nam, cachablep, frev, cred)
+ struct vnode *vp;
+ u_int32_t *duration;
+ int flags;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct sockaddr *nam;
+ int *cachablep;
+ u_quad_t *frev;
+ struct ucred *cred;
+{
+ register struct nqlease *lp;
+ register struct nqfhhashhead *lpp = NULL;
+ register struct nqhost *lph = NULL;
+ struct nqlease *tlp;
+ struct nqm **lphp;
+ struct vattr vattr;
+ fhandle_t fh;
+ int i, ok, error, s;
+
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+ return (0);
+ if (*duration > nqsrv_maxlease)
+ *duration = nqsrv_maxlease;
+ error = VOP_GETATTR(vp, &vattr, cred, procp);
+ if (error)
+ return (error);
+ *frev = vattr.va_filerev;
+ s = splsoftclock();
+ tlp = vp->v_lease;
+ if ((flags & ND_CHECK) == 0)
+ nfsstats.srvnqnfs_getleases++;
+ if (tlp == 0) {
+ /*
+ * Find the lease by searching the hash list.
+ */
+ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fh.fh_fid);
+ if (error) {
+ splx(s);
+ return (error);
+ }
+ lpp = NQFHHASH(fh.fh_fid.fid_data);
+ for (lp = lpp->lh_first; lp != 0; lp = lp->lc_hash.le_next)
+ if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] &&
+ fh.fh_fsid.val[1] == lp->lc_fsid.val[1] &&
+ !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata,
+ fh.fh_fid.fid_len - sizeof (int32_t))) {
+ /* Found it */
+ lp->lc_vp = vp;
+ vp->v_lease = lp;
+ tlp = lp;
+ break;
+ }
+ } else
+ lp = tlp;
+ if (lp != 0) {
+ if ((lp->lc_flag & LC_NONCACHABLE) ||
+ (lp->lc_morehosts == (struct nqm *)0 &&
+ nqsrv_cmpnam(slp, nam, &lp->lc_host)))
+ goto doreply;
+ if ((flags & ND_READ) && (lp->lc_flag & LC_WRITE) == 0) {
+ if (flags & ND_CHECK)
+ goto doreply;
+ if (nqsrv_cmpnam(slp, nam, &lp->lc_host))
+ goto doreply;
+ i = 0;
+ if (lp->lc_morehosts) {
+ lph = lp->lc_morehosts->lpm_hosts;
+ lphp = &lp->lc_morehosts->lpm_next;
+ ok = 1;
+ } else {
+ lphp = &lp->lc_morehosts;
+ ok = 0;
+ }
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (nqsrv_cmpnam(slp, nam, lph))
+ goto doreply;
+ if (++i == LC_MOREHOSTSIZ) {
+ i = 0;
+ if (*lphp) {
+ lph = (*lphp)->lpm_hosts;
+ lphp = &((*lphp)->lpm_next);
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+ nqsrv_locklease(lp);
+ if (!ok) {
+ *lphp = (struct nqm *)
+ malloc(sizeof (struct nqm),
+ M_NQMHOST, M_WAITOK);
+ bzero((caddr_t)*lphp, sizeof (struct nqm));
+ lph = (*lphp)->lpm_hosts;
+ }
+ nqsrv_addhost(lph, slp, nam);
+ nqsrv_unlocklease(lp);
+ } else {
+ lp->lc_flag |= LC_NONCACHABLE;
+ nqsrv_locklease(lp);
+ nqsrv_send_eviction(vp, lp, slp, nam, cred);
+ nqsrv_waitfor_expiry(lp);
+ nqsrv_unlocklease(lp);
+ }
+doreply:
+ /*
+ * Update the lease and return
+ */
+ if ((flags & ND_CHECK) == 0)
+ nqsrv_instimeq(lp, *duration);
+ if (lp->lc_flag & LC_NONCACHABLE)
+ *cachablep = 0;
+ else {
+ *cachablep = 1;
+ if (flags & ND_WRITE)
+ lp->lc_flag |= LC_WRITTEN;
+ }
+ splx(s);
+ return (0);
+ }
+ splx(s);
+ if (flags & ND_CHECK)
+ return (0);
+
+ /*
+ * Allocate new lease
+ * The value of nqsrv_maxnumlease should be set generously, so that
+ * the following "printf" happens infrequently.
+ */
+ if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) {
+ printf("Nqnfs server, too many leases\n");
+ do {
+ (void) tsleep((caddr_t)&lbolt, PSOCK,
+ "nqsrvnuml", 0);
+ } while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease);
+ }
+ MALLOC(lp, struct nqlease *, sizeof (struct nqlease), M_NQLEASE, M_WAITOK);
+ bzero((caddr_t)lp, sizeof (struct nqlease));
+ if (flags & ND_WRITE)
+ lp->lc_flag |= (LC_WRITE | LC_WRITTEN);
+ nqsrv_addhost(&lp->lc_host, slp, nam);
+ lp->lc_vp = vp;
+ lp->lc_fsid = fh.fh_fsid;
+ bcopy(fh.fh_fid.fid_data, lp->lc_fiddata,
+ fh.fh_fid.fid_len - sizeof (int32_t));
+ if(!lpp)
+ panic("nfs_nqlease.c: Phoney lpp");
+ LIST_INSERT_HEAD(lpp, lp, lc_hash);
+ vp->v_lease = lp;
+ s = splsoftclock();
+ nqsrv_instimeq(lp, *duration);
+ splx(s);
+ *cachablep = 1;
+ if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases)
+ nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases;
+ return (0);
+}
+
+/*
+ * Local lease check for server syscalls.
+ * Just set up args and let nqsrv_getlease() do the rest.
+ * nqnfs_vop_lease_check() is the VOP_LEASE() form of the same routine.
+ * Ifdef'd code in nfsnode.h renames these routines to whatever a particular
+ * OS needs.
+ */
+void
+nqnfs_lease_check(vp, p, cred, flag)
+ struct vnode *vp;
+ struct proc *p;
+ struct ucred *cred;
+ int flag;
+{
+ u_int32_t duration = 0;
+ int cache;
+ u_quad_t frev;
+
+ (void) nqsrv_getlease(vp, &duration, ND_CHECK | flag, NQLOCALSLP,
+ p, (struct sockaddr *)0, &cache, &frev, cred);
+}
+
+int
+nqnfs_vop_lease_check(ap)
+ struct vop_lease_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ struct ucred *a_cred;
+ int a_flag;
+ } */ *ap;
+{
+ u_int32_t duration = 0;
+ int cache;
+ u_quad_t frev;
+
+ (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
+ NQLOCALSLP, ap->a_p, (struct sockaddr *)0,
+ &cache, &frev, ap->a_cred);
+ return (0);
+}
+
+
+/*
+ * Add a host to an nqhost structure for a lease.
+ */
+static void
+nqsrv_addhost(lph, slp, nam)
+ register struct nqhost *lph;
+ struct nfssvc_sock *slp;
+ struct sockaddr *nam;
+{
+ struct sockaddr_in *saddr;
+ struct socket *nsso;
+
+ if (slp == NQLOCALSLP) {
+ lph->lph_flag |= (LC_VALID | LC_LOCAL);
+ return;
+ }
+ nsso = slp->ns_so;
+ if (nsso && nsso->so_proto->pr_protocol == IPPROTO_UDP) {
+ saddr = (struct sockaddr_in *)nam;
+ lph->lph_flag |= (LC_VALID | LC_UDP);
+ lph->lph_inetaddr = saddr->sin_addr.s_addr;
+ lph->lph_port = saddr->sin_port;
+#ifdef ISO
+ } else if (nsso && nsso->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ lph->lph_nam = dup_sockaddr(nam, 1);
+ lph->lph_flag |= (LC_VALID | LC_CLTP);
+#endif
+ } else {
+ lph->lph_flag |= (LC_VALID | LC_SREF);
+ lph->lph_slp = slp;
+ slp->ns_sref++;
+ }
+}
+
+/*
+ * Update the lease expiry time and position it in the timer queue correctly.
+ */
+static void
+nqsrv_instimeq(lp, duration)
+ register struct nqlease *lp;
+ u_int32_t duration;
+{
+ register struct nqlease *tlp;
+ time_t newexpiry;
+
+ newexpiry = time_second + duration + nqsrv_clockskew;
+ if (lp->lc_expiry == newexpiry)
+ return;
+ if (lp->lc_timer.cqe_next != 0) {
+ CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer);
+ }
+ lp->lc_expiry = newexpiry;
+
+ /*
+ * Find where in the queue it should be.
+ */
+ tlp = nqtimerhead.cqh_last;
+ while (tlp != (void *)&nqtimerhead && tlp->lc_expiry > newexpiry)
+ tlp = tlp->lc_timer.cqe_prev;
+#ifdef HASNVRAM
+ if (tlp == nqtimerhead.cqh_last)
+ NQSTORENOVRAM(newexpiry);
+#endif /* HASNVRAM */
+ if (tlp == (void *)&nqtimerhead) {
+ CIRCLEQ_INSERT_HEAD(&nqtimerhead, lp, lc_timer);
+ } else {
+ CIRCLEQ_INSERT_AFTER(&nqtimerhead, tlp, lp, lc_timer);
+ }
+}
+
+/*
+ * Compare the requesting host address with the lph entry in the lease.
+ * Return true iff it is the same.
+ * This is somewhat messy due to the union in the nqhost structure.
+ * The local host is indicated by the special value of NQLOCALSLP for slp.
+ */
+static int
+nqsrv_cmpnam(slp, nam, lph)
+ register struct nfssvc_sock *slp;
+ struct sockaddr *nam;
+ register struct nqhost *lph;
+{
+ register struct sockaddr_in *saddr;
+ struct sockaddr *addr;
+ union nethostaddr lhaddr;
+ struct socket *nsso;
+ int ret;
+
+ if (slp == NQLOCALSLP) {
+ if (lph->lph_flag & LC_LOCAL)
+ return (1);
+ else
+ return (0);
+ }
+ nsso = slp->ns_so;
+ if (nsso && nsso->so_proto->pr_protocol == IPPROTO_UDP) {
+ addr = nam;
+#ifdef ISO
+ } else if (nsso && nsso->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ addr = nam;
+#endif
+ } else {
+ addr = slp->ns_nam;
+ }
+ if (lph->lph_flag & LC_UDP) {
+ ret = netaddr_match(AF_INET, &lph->lph_haddr, addr);
+ } else if (lph->lph_flag & LC_CLTP) {
+ ret = netaddr_match(AF_ISO, &lph->lph_claddr, addr);
+ } else {
+ if ((lph->lph_slp->ns_flag & SLP_VALID) == 0)
+ return (0);
+ saddr = (struct sockaddr_in *)lph->lph_slp->ns_nam;
+ if (saddr->sin_family == AF_INET)
+ lhaddr.had_inetaddr = saddr->sin_addr.s_addr;
+ else
+ lhaddr.had_nam = lph->lph_slp->ns_nam;
+ ret = netaddr_match(saddr->sin_family, &lhaddr, addr);
+ }
+ return (ret);
+}
+
+/*
+ * Send out eviction notice messages to all other hosts for the lease.
+ */
+static void
+nqsrv_send_eviction(vp, lp, slp, nam, cred)
+ struct vnode *vp;
+ register struct nqlease *lp;
+ struct nfssvc_sock *slp;
+ struct sockaddr *nam;
+ struct ucred *cred;
+{
+ register struct nqhost *lph = &lp->lc_host;
+ register int siz;
+ struct nqm *lphnext = lp->lc_morehosts;
+ struct mbuf *m, *mreq, *mb, *mb2, *mheadend;
+ struct socket *so;
+ struct sockaddr *nam2;
+ struct sockaddr_in *saddr;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ caddr_t bpos, cp;
+ u_int32_t xid, *tl;
+ int len = 1, ok = 1, i = 0;
+ int sotype, *solockp;
+
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (nqsrv_cmpnam(slp, nam, lph))
+ lph->lph_flag |= LC_VACATED;
+ else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
+ if (lph->lph_flag & LC_UDP) {
+ MALLOC(nam2, struct sockaddr *,
+ sizeof *nam2, M_SONAME, M_WAITOK);
+ saddr = (struct sockaddr_in *)nam2;
+ saddr->sin_len = sizeof *saddr;
+ saddr->sin_family = AF_INET;
+ saddr->sin_addr.s_addr = lph->lph_inetaddr;
+ saddr->sin_port = lph->lph_port;
+ so = lph->lph_slp->ns_so;
+ } else if (lph->lph_flag & LC_CLTP) {
+ nam2 = lph->lph_nam;
+ so = lph->lph_slp->ns_so;
+ } else if (lph->lph_slp->ns_flag & SLP_VALID) {
+ nam2 = (struct sockaddr *)0;
+ so = lph->lph_slp->ns_so;
+ } else
+ goto nextone;
+ sotype = so->so_type;
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+ solockp = &lph->lph_slp->ns_solock;
+ else
+ solockp = (int *)0;
+ nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED,
+ NFSX_V3FH + NFSX_UNSIGNED);
+ fhp = &nfh.fh_generic;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ VFS_VPTOFH(vp, &fhp->fh_fid);
+ nfsm_srvfhtom(fhp, 1);
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = nfsm_rpchead(cred, (NFSMNT_NFSV3 | NFSMNT_NQNFS),
+ NQNFSPROC_EVICTED,
+ RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0,
+ 0, (char *)NULL, mreq, siz, &mheadend, &xid);
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_int32_t *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ /*
+ * nfs_sndlock if PR_CONNREQUIRED XXX
+ */
+
+ if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 &&
+ (lph->lph_slp->ns_flag & SLP_VALID) == 0) ||
+ (nfs_slplock(lph->lph_slp, 0) == 0))
+ m_freem(m);
+ else {
+ (void) nfs_send(so, nam2, m,
+ (struct nfsreq *)0);
+ if (solockp)
+ nfs_slpunlock(lph->lph_slp);
+ }
+ if (lph->lph_flag & LC_UDP)
+ FREE(nam2, M_SONAME);
+ }
+nextone:
+ if (++i == len) {
+ if (lphnext) {
+ i = 0;
+ len = LC_MOREHOSTSIZ;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+}
+
+/*
+ * Wait for the lease to expire.
+ * This will occur when all clients have sent "vacated" messages to
+ * this server OR when it expires do to timeout.
+ */
+static void
+nqsrv_waitfor_expiry(lp)
+ register struct nqlease *lp;
+{
+ register struct nqhost *lph;
+ register int i;
+ struct nqm *lphnext;
+ int len, ok;
+
+tryagain:
+ if (time_second > lp->lc_expiry)
+ return;
+ lph = &lp->lc_host;
+ lphnext = lp->lc_morehosts;
+ len = 1;
+ i = 0;
+ ok = 1;
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
+ lp->lc_flag |= LC_EXPIREDWANTED;
+ (void) tsleep((caddr_t)&lp->lc_flag, PSOCK,
+ "nqexp", 0);
+ goto tryagain;
+ }
+ if (++i == len) {
+ if (lphnext) {
+ i = 0;
+ len = LC_MOREHOSTSIZ;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+}
+
+/*
+ * Nqnfs server timer that maintains the server lease queue.
+ * Scan the lease queue for expired entries:
+ * - when one is found, wakeup anyone waiting for it
+ * else dequeue and free
+ */
+void
+nqnfs_serverd()
+{
+ register struct nqlease *lp;
+ register struct nqhost *lph;
+ struct nqlease *nextlp;
+ struct nqm *lphnext, *olphnext;
+ int i, len, ok;
+
+ for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead;
+ lp = nextlp) {
+ if (lp->lc_expiry >= time_second)
+ break;
+ nextlp = lp->lc_timer.cqe_next;
+ if (lp->lc_flag & LC_EXPIREDWANTED) {
+ lp->lc_flag &= ~LC_EXPIREDWANTED;
+ wakeup((caddr_t)&lp->lc_flag);
+ } else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) {
+ /*
+ * Make a best effort at keeping a write caching lease long
+ * enough by not deleting it until it has been explicitly
+ * vacated or there have been no writes in the previous
+ * write_slack seconds since expiry and the nfsds are not
+ * all busy. The assumption is that if the nfsds are not
+ * all busy now (no queue of nfs requests), then the client
+ * would have been able to do at least one write to the
+ * file during the last write_slack seconds if it was still
+ * trying to push writes to the server.
+ */
+ if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE &&
+ ((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) {
+ lp->lc_flag &= ~LC_WRITTEN;
+ nqsrv_instimeq(lp, nqsrv_writeslack);
+ } else {
+ CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer);
+ LIST_REMOVE(lp, lc_hash);
+ /*
+ * This soft reference may no longer be valid, but
+ * no harm done. The worst case is if the vnode was
+ * recycled and has another valid lease reference,
+ * which is dereferenced prematurely.
+ */
+ lp->lc_vp->v_lease = (struct nqlease *)0;
+ lph = &lp->lc_host;
+ lphnext = lp->lc_morehosts;
+ olphnext = (struct nqm *)0;
+ len = 1;
+ i = 0;
+ ok = 1;
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (lph->lph_flag & LC_CLTP)
+ FREE(lph->lph_nam, M_SONAME);
+ if (lph->lph_flag & LC_SREF)
+ nfsrv_slpderef(lph->lph_slp);
+ if (++i == len) {
+ if (olphnext) {
+ free((caddr_t)olphnext, M_NQMHOST);
+ olphnext = (struct nqm *)0;
+ }
+ if (lphnext) {
+ olphnext = lphnext;
+ i = 0;
+ len = LC_MOREHOSTSIZ;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+ FREE((caddr_t)lp, M_NQLEASE);
+ if (olphnext)
+ free((caddr_t)olphnext, M_NQMHOST);
+ nfsstats.srvnqnfs_leases--;
+ }
+ }
+ }
+}
+
+/*
+ * Called from nfssvc_nfsd() for a getlease rpc request.
+ * Do the from/to xdr translation and call nqsrv_getlease() to
+ * do the real work.
+ */
+int
+nqnfsrv_getlease(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register struct nfs_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ struct vnode *vp;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ u_quad_t frev;
+ caddr_t bpos;
+ int error = 0;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ int flags, rdonly, cache;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ flags = fxdr_unsigned(int, *tl++);
+ nfsd->nd_duration = fxdr_unsigned(int, *tl);
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
+ (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(0);
+ goto nfsmout;
+ }
+ if (rdonly && flags == ND_WRITE) {
+ error = EROFS;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ (void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, slp, procp,
+ nam, &cache, &frev, cred);
+ error = VOP_GETATTR(vp, vap, cred, procp);
+ vput(vp);
+ nfsm_reply(NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
+ nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nfsd->nd_duration);
+ txdr_hyper(frev, tl);
+ nfsm_build(fp, struct nfs_fattr *, NFSX_V3FATTR);
+ nfsm_srvfillattr(vap, fp);
+ nfsm_srvdone;
+}
+
+/*
+ * Called from nfssvc_nfsd() when a "vacated" message is received from a
+ * client. Find the entry and expire it.
+ */
+int
+nqnfsrv_vacated(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ register struct nqlease *lp;
+ register struct nqhost *lph;
+ struct nqlease *tlp = (struct nqlease *)0;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ struct nqm *lphnext;
+ struct mbuf *mreq, *mb;
+ int error = 0, i, len, ok, gotit = 0, cache = 0;
+ char *cp2, *bpos;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ m_freem(mrep);
+ /*
+ * Find the lease by searching the hash list.
+ */
+ for (lp = NQFHHASH(fhp->fh_fid.fid_data)->lh_first; lp != 0;
+ lp = lp->lc_hash.le_next)
+ if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] &&
+ fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] &&
+ !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata,
+ MAXFIDSZ)) {
+ /* Found it */
+ tlp = lp;
+ break;
+ }
+ if (tlp != 0) {
+ lp = tlp;
+ len = 1;
+ i = 0;
+ lph = &lp->lc_host;
+ lphnext = lp->lc_morehosts;
+ ok = 1;
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (nqsrv_cmpnam(slp, nam, lph)) {
+ lph->lph_flag |= LC_VACATED;
+ gotit++;
+ break;
+ }
+ if (++i == len) {
+ if (lphnext) {
+ len = LC_MOREHOSTSIZ;
+ i = 0;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+ if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) {
+ lp->lc_flag &= ~LC_EXPIREDWANTED;
+ wakeup((caddr_t)&lp->lc_flag);
+ }
+nfsmout:
+ return (EPERM);
+ }
+ return (EPERM);
+}
+
+#endif /* NFS_NOSERVER */
+
+/*
+ * Client get lease rpc function.
+ */
+int
+nqnfs_getlease(vp, rwflag, cred, p)
+ register struct vnode *vp;
+ int rwflag;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ register struct nfsnode *np;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ caddr_t bpos, dpos, cp2;
+ time_t reqtime;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int cachable;
+ u_quad_t frev;
+
+ nfsstats.rpccnt[NQNFSPROC_GETLEASE]++;
+ mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_V3FH+2*NFSX_UNSIGNED,
+ &bpos);
+ nfsm_fhtom(vp, 1);
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(rwflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ reqtime = time_second;
+ nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred);
+ np = VTONFS(vp);
+ nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time_second) {
+ frev = fxdr_hyper(tl);
+ nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ } else
+ error = NQNFS_EXPIRED;
+ nfsm_reqdone;
+ return (error);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Client vacated message function.
+ */
+static int
+nqnfs_vacated(vp, cred)
+ register struct vnode *vp;
+ struct ucred *cred;
+{
+ register caddr_t cp;
+ register int i;
+ register u_int32_t *tl;
+ register int32_t t2;
+ caddr_t bpos;
+ u_int32_t xid;
+ int error = 0;
+ struct mbuf *m, *mreq, *mb, *mb2, *mheadend;
+ struct nfsmount *nmp;
+ struct nfsreq myrep;
+
+ nmp = VFSTONFS(vp->v_mount);
+ nfsstats.rpccnt[NQNFSPROC_VACATED]++;
+ nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_FH(1));
+ nfsm_fhtom(vp, 1);
+ m = mreq;
+ i = 0;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ m = nfsm_rpchead(cred, nmp->nm_flag, NQNFSPROC_VACATED,
+ RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0,
+ 0, (char *)NULL, mreq, i, &mheadend, &xid);
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_int32_t *) = htonl(0x80000000 | (m->m_pkthdr.len -
+ NFSX_UNSIGNED));
+ }
+ myrep.r_flags = 0;
+ myrep.r_nmp = nmp;
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ (void) nfs_sndlock(&myrep);
+ (void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(&myrep);
+nfsmout:
+ return (error);
+}
+
+/*
+ * Called for client side callbacks
+ */
+int
+nqnfs_callback(nmp, mrep, md, dpos)
+ struct nfsmount *nmp;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+{
+ register struct vnode *vp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ struct nfsnode *np;
+ struct nfsd tnfsd;
+ struct nfssvc_sock *slp;
+ struct nfsrv_descript ndesc;
+ register struct nfsrv_descript *nfsd = &ndesc;
+ struct mbuf **mrq = (struct mbuf **)0, *mb, *mreq;
+ int error = 0, cache = 0;
+ char *cp2, *bpos;
+ u_quad_t frev;
+
+#ifndef nolint
+ slp = NULL;
+#endif
+ nfsd->nd_mrep = mrep;
+ nfsd->nd_md = md;
+ nfsd->nd_dpos = dpos;
+ error = nfs_getreq(nfsd, &tnfsd, FALSE);
+ if (error)
+ return (error);
+ md = nfsd->nd_md;
+ dpos = nfsd->nd_dpos;
+ if (nfsd->nd_procnum != NQNFSPROC_EVICTED) {
+ m_freem(mrep);
+ return (EPERM);
+ }
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ m_freem(mrep);
+ error = nfs_nget(nmp->nm_mountp, (nfsfh_t *)fhp, NFSX_V3FH, &np);
+ if (error)
+ return (error);
+ vp = NFSTOV(np);
+ if (np->n_timer.cqe_next != 0) {
+ np->n_expiry = 0;
+ np->n_flag |= NQNFSEVICTED;
+ if (nmp->nm_timerhead.cqh_first != np) {
+ CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer);
+ CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer);
+ }
+ }
+ vput(vp);
+ nfsm_srvdone;
+}
+
+
+/*
+ * Nqnfs client helper daemon. Runs once a second to expire leases.
+ * It also get authorization strings for "kerb" mounts.
+ * It must start at the beginning of the list again after any potential
+ * "sleep" since nfs_reclaim() called from vclean() can pull a node off
+ * the list asynchronously.
+ */
+int
+nqnfs_clientd(nmp, cred, ncd, flag, argp, p)
+ register struct nfsmount *nmp;
+ struct ucred *cred;
+ struct nfsd_cargs *ncd;
+ int flag;
+ caddr_t argp;
+ struct proc *p;
+{
+ register struct nfsnode *np;
+ struct vnode *vp;
+ struct nfsreq myrep;
+ struct nfsuid *nuidp, *nnuidp;
+ int error = 0, vpid;
+
+ /*
+ * First initialize some variables
+ */
+
+ /*
+ * If an authorization string is being passed in, get it.
+ */
+ if ((flag & NFSSVC_GOTAUTH) &&
+ (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_DISMNT)) == 0) {
+ if (nmp->nm_state & NFSSTA_HASAUTH)
+ panic("cld kerb");
+ if ((flag & NFSSVC_AUTHINFAIL) == 0) {
+ if (ncd->ncd_authlen <= nmp->nm_authlen &&
+ ncd->ncd_verflen <= nmp->nm_verflen &&
+ !copyin(ncd->ncd_authstr,nmp->nm_authstr,ncd->ncd_authlen)&&
+ !copyin(ncd->ncd_verfstr,nmp->nm_verfstr,ncd->ncd_verflen)){
+ nmp->nm_authtype = ncd->ncd_authtype;
+ nmp->nm_authlen = ncd->ncd_authlen;
+ nmp->nm_verflen = ncd->ncd_verflen;
+#ifdef NFSKERB
+ nmp->nm_key = ncd->ncd_key;
+#endif
+ } else
+ nmp->nm_state |= NFSSTA_AUTHERR;
+ } else
+ nmp->nm_state |= NFSSTA_AUTHERR;
+ nmp->nm_state |= NFSSTA_HASAUTH;
+ wakeup((caddr_t)&nmp->nm_authlen);
+ } else
+ nmp->nm_state |= NFSSTA_WAITAUTH;
+
+ /*
+ * Loop every second updating queue until there is a termination sig.
+ */
+ while ((nmp->nm_state & NFSSTA_DISMNT) == 0) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ /*
+ * If there are no outstanding requests (and therefore no
+ * processes in nfs_reply) and there is data in the receive
+ * queue, poke for callbacks.
+ */
+ if (nfs_reqq.tqh_first == 0 && nmp->nm_so &&
+ nmp->nm_so->so_rcv.sb_cc > 0) {
+ myrep.r_flags = R_GETONEREP;
+ myrep.r_nmp = nmp;
+ myrep.r_mrep = (struct mbuf *)0;
+ myrep.r_procp = (struct proc *)0;
+ (void) nfs_reply(&myrep);
+ }
+
+ /*
+ * Loop through the leases, updating as required.
+ */
+ np = nmp->nm_timerhead.cqh_first;
+ while (np != (void *)&nmp->nm_timerhead &&
+ (nmp->nm_state & NFSSTA_DISMINPROG) == 0) {
+ vp = NFSTOV(np);
+ vpid = vp->v_id;
+ if (np->n_expiry < time_second) {
+ if (vget(vp, LK_EXCLUSIVE, p) == 0) {
+ nmp->nm_inprog = vp;
+ if (vpid == vp->v_id) {
+ CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer);
+ np->n_timer.cqe_next = 0;
+ if (np->n_flag & (NMODIFIED | NQNFSEVICTED)) {
+ if (np->n_flag & NQNFSEVICTED) {
+ if (vp->v_type == VDIR)
+ nfs_invaldir(vp);
+ cache_purge(vp);
+ (void) nfs_vinvalbuf(vp,
+ V_SAVE, cred, p, 0);
+ np->n_flag &= ~NQNFSEVICTED;
+ (void) nqnfs_vacated(vp, cred);
+ } else if (vp->v_type == VREG) {
+ (void) VOP_FSYNC(vp, cred,
+ MNT_WAIT, p);
+ np->n_flag &= ~NMODIFIED;
+ }
+ }
+ }
+ vput(vp);
+ nmp->nm_inprog = NULLVP;
+ }
+ } else if ((np->n_expiry - NQ_RENEWAL) < time_second) {
+ if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE))
+ == NQNFSWRITE &&
+ !TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
+ vget(vp, LK_EXCLUSIVE, p) == 0) {
+ nmp->nm_inprog = vp;
+ if (vpid == vp->v_id &&
+ nqnfs_getlease(vp, ND_WRITE, cred, p)==0)
+ np->n_brev = np->n_lrev;
+ vput(vp);
+ nmp->nm_inprog = NULLVP;
+ }
+ } else
+ break;
+ if (np == nmp->nm_timerhead.cqh_first)
+ break;
+ np = nmp->nm_timerhead.cqh_first;
+ }
+ }
+
+ /*
+ * Get an authorization string, if required.
+ */
+ if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_DISMNT | NFSSTA_HASAUTH)) == 0) {
+ ncd->ncd_authuid = nmp->nm_authuid;
+ if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs)))
+ nmp->nm_state |= NFSSTA_WAITAUTH;
+ else
+ return (ENEEDAUTH);
+ }
+
+ /*
+ * Wait a bit (no pun) and do it again.
+ */
+ if ((nmp->nm_state & NFSSTA_DISMNT) == 0 &&
+ (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) {
+ error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH,
+ "nqnfstimr", hz / 3);
+ if (error == EINTR || error == ERESTART)
+ (void) dounmount(nmp->nm_mountp, 0, p);
+ }
+ }
+
+ /*
+ * Finally, we can free up the mount structure.
+ */
+ for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) {
+ nnuidp = nuidp->nu_lru.tqe_next;
+ LIST_REMOVE(nuidp, nu_hash);
+ TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
+ free((caddr_t)nuidp, M_NFSUID);
+ }
+ zfree(nfsmount_zone, nmp);
+ if (error == EWOULDBLOCK)
+ error = 0;
+ return (error);
+}
+
+#endif /* NFS_NOSERVER */
+
+/*
+ * Adjust all timer queue expiry times when the time of day clock is changed.
+ * Called from the settimeofday() syscall.
+ */
+void
+nqnfs_lease_updatetime(deltat)
+ register int deltat;
+{
+ struct proc *p = curproc; /* XXX */
+ struct nqlease *lp;
+ struct nfsnode *np;
+ struct mount *mp, *nxtmp;
+ struct nfsmount *nmp;
+ int s;
+
+ if (nqnfsstarttime != 0)
+ nqnfsstarttime += deltat;
+ s = splsoftclock();
+ for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead;
+ lp = lp->lc_timer.cqe_next)
+ lp->lc_expiry += deltat;
+ splx(s);
+
+ /*
+ * Search the mount list for all nqnfs mounts and do their timer
+ * queues.
+ */
+ simple_lock(&mountlist_slock);
+ for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nxtmp) {
+ if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+ nxtmp = TAILQ_NEXT(mp, mnt_list);
+ continue;
+ }
+ if (mp->mnt_stat.f_type == nfs_mount_type) {
+ nmp = VFSTONFS(mp);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ for (np = nmp->nm_timerhead.cqh_first;
+ np != (void *)&nmp->nm_timerhead;
+ np = np->n_timer.cqe_next) {
+ np->n_expiry += deltat;
+ }
+ }
+ }
+ simple_lock(&mountlist_slock);
+ nxtmp = TAILQ_NEXT(mp, mnt_list);
+ vfs_unbusy(mp, p);
+ }
+ simple_unlock(&mountlist_slock);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Lock a server lease.
+ */
+static void
+nqsrv_locklease(lp)
+ struct nqlease *lp;
+{
+
+ while (lp->lc_flag & LC_LOCKED) {
+ lp->lc_flag |= LC_WANTED;
+ (void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0);
+ }
+ lp->lc_flag |= LC_LOCKED;
+ lp->lc_flag &= ~LC_WANTED;
+}
+
+/*
+ * Unlock a server lease.
+ */
+static void
+nqsrv_unlocklease(lp)
+ struct nqlease *lp;
+{
+
+ lp->lc_flag &= ~LC_LOCKED;
+ if (lp->lc_flag & LC_WANTED)
+ wakeup((caddr_t)lp);
+}
+#endif /* NFS_NOSERVER */
+
+/*
+ * Update a client lease.
+ */
+void
+nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev)
+ register struct nfsmount *nmp;
+ register struct nfsnode *np;
+ int rwflag, cachable;
+ time_t expiry;
+ u_quad_t frev;
+{
+ register struct nfsnode *tp;
+
+ if (np->n_timer.cqe_next != 0) {
+ CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer);
+ if (rwflag == ND_WRITE)
+ np->n_flag |= NQNFSWRITE;
+ } else if (rwflag == ND_READ)
+ np->n_flag &= ~NQNFSWRITE;
+ else
+ np->n_flag |= NQNFSWRITE;
+ if (cachable)
+ np->n_flag &= ~NQNFSNONCACHE;
+ else
+ np->n_flag |= NQNFSNONCACHE;
+ np->n_expiry = expiry;
+ np->n_lrev = frev;
+ tp = nmp->nm_timerhead.cqh_last;
+ while (tp != (void *)&nmp->nm_timerhead && tp->n_expiry > np->n_expiry)
+ tp = tp->n_timer.cqe_prev;
+ if (tp == (void *)&nmp->nm_timerhead) {
+ CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer);
+ } else {
+ CIRCLEQ_INSERT_AFTER(&nmp->nm_timerhead, tp, np, n_timer);
+ }
+}
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
new file mode 100644
index 0000000..0d28670
--- /dev/null
+++ b/sys/nfs/nfs_serv.c
@@ -0,0 +1,4068 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_serv.c 8.8 (Berkeley) 7/31/95
+ * $FreeBSD$
+ */
+
+/*
+ * nfs version 2 and 3 server calls to vnode ops
+ * - these routines generally have 3 phases
+ * 1 - break down and validate rpc request in mbuf list
+ * 2 - do the vnode ops for the request
+ * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
+ * 3 - build the rpc reply in an mbuf list
+ * nb:
+ * - do not mix the phases, since the nfsm_?? macros can return failures
+ * on a bad rpc or similar and do not do any vrele() or vput()'s
+ *
+ * - the nfsm_reply() macro generates an nfs rpc reply with the nfs
+ * error number iff error != 0 whereas
+ * returning an error from the server function implies a fatal error
+ * such as a badly constructed rpc request that should be dropped without
+ * a reply.
+ * For Version 3, nfsm_reply() does not return for the error case, since
+ * most version 3 rpcs return more than the status for error cases.
+ *
+ * Other notes:
+ * Warning: always pay careful attention to resource cleanup on return
+ * and note that nfsm_*() macros can terminate a procedure on certain
+ * errors.
+ *
+ * lookup() and namei()
+ * may return garbage in various structural fields/return elements
+ * if an error is returned, and may garbage up nd.ni_dvp even if no
+ * error is returned and you did not request LOCKPARENT or WANTPARENT.
+ *
+ * We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
+ * buffer has been freed or not.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/buf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+#include <vm/vm_object.h>
+
+#include <nfs/nfsproto.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+#ifdef NFSRV_DEBUG
+#define nfsdbprintf(info) printf info
+#else
+#define nfsdbprintf(info)
+#endif
+
+#define MAX_COMMIT_COUNT (1024 * 1024)
+
+#define NUM_HEURISTIC 64
+#define NHUSE_INIT 64
+#define NHUSE_INC 16
+#define NHUSE_MAX 2048
+
+static struct nfsheur {
+ struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
+ off_t nh_nextr; /* next offset for sequential detection */
+ int nh_use; /* use count for selection */
+ int nh_seqcount; /* heuristic */
+} nfsheur[NUM_HEURISTIC];
+
+nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
+ NFFIFO, NFNON };
+#ifndef NFS_NOSERVER
+nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
+ NFCHR, NFNON };
+/* Global vars */
+extern u_int32_t nfs_xdrneg1;
+extern u_int32_t nfs_false, nfs_true;
+extern enum vtype nv3tov_type[8];
+extern struct nfsstats nfsstats;
+
+int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
+int nfsrvw_procrastinate_v3 = 0;
+
+static struct timeval nfsver = { 0 };
+
+SYSCTL_DECL(_vfs_nfs);
+
+static int nfs_async;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
+static int nfs_commit_blks;
+static int nfs_commit_miss;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
+SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
+
+static int nfsrv_access __P((struct vnode *,int,struct ucred *,int,
+ struct proc *, int));
+static void nfsrvw_coalesce __P((struct nfsrv_descript *,
+ struct nfsrv_descript *));
+
+/*
+ * Clear nameidata fields that are tested in nsfmout cleanup code prior
+ * to using first nfsm macro (that might jump to the cleanup code).
+ */
+
+static __inline
+void
+ndclear(struct nameidata *nd)
+{
+ nd->ni_cnd.cn_flags = 0;
+ nd->ni_vp = NULL;
+ nd->ni_dvp = NULL;
+ nd->ni_startdir = NULL;
+}
+
+/*
+ * nfs v3 access service
+ */
+int
+nfsrv3_access(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct vnode *vp = NULL;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, getret;
+ char *cp2;
+ struct mbuf *mb, *mreq, *mb2;
+ struct vattr vattr, *vap = &vattr;
+ u_long testmode, nfsmode;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ cache = 0;
+#endif
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
+ (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(1, (struct vattr *)0);
+ error = 0;
+ goto nfsmout;
+ }
+ nfsmode = fxdr_unsigned(u_int32_t, *tl);
+ if ((nfsmode & NFSV3ACCESS_READ) &&
+ nfsrv_access(vp, VREAD, cred, rdonly, procp, 0))
+ nfsmode &= ~NFSV3ACCESS_READ;
+ if (vp->v_type == VDIR)
+ testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
+ NFSV3ACCESS_DELETE);
+ else
+ testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
+ if ((nfsmode & testmode) &&
+ nfsrv_access(vp, VWRITE, cred, rdonly, procp, 0))
+ nfsmode &= ~testmode;
+ if (vp->v_type == VDIR)
+ testmode = NFSV3ACCESS_LOOKUP;
+ else
+ testmode = NFSV3ACCESS_EXECUTE;
+ if ((nfsmode & testmode) &&
+ nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0))
+ nfsmode &= ~testmode;
+ getret = VOP_GETATTR(vp, vap, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(getret, vap);
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(nfsmode);
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs getattr service
+ */
+int
+nfsrv_getattr(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register struct nfs_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ struct vnode *vp = NULL;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(0);
+ error = 0;
+ goto nfsmout;
+ }
+ nqsrv_getl(vp, ND_READ);
+ error = VOP_GETATTR(vp, vap, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
+ if (error) {
+ error = 0;
+ goto nfsmout;
+ }
+ nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
+ nfsm_srvfillattr(vap, fp);
+ /* fall through */
+
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs setattr service
+ */
+int
+nfsrv_setattr(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct vattr va, preat;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register struct nfs_fattr *fp;
+ struct vnode *vp = NULL;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev;
+ struct timespec guard;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ VATTR_NULL(vap);
+ if (v3) {
+ nfsm_srvsattr(vap);
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ gcheck = fxdr_unsigned(int, *tl);
+ if (gcheck) {
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ fxdr_nfsv3time(tl, &guard);
+ }
+ } else {
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ /*
+ * Nah nah nah nah na nah
+ * There is a bug in the Sun client that puts 0xffff in the mode
+ * field of sattr when it should put in 0xffffffff. The u_short
+ * doesn't sign extend.
+ * --> check the low order 2 bytes for 0xffff
+ */
+ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ if (sp->sa_uid != nfs_xdrneg1)
+ vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
+ if (sp->sa_gid != nfs_xdrneg1)
+ vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
+ if (sp->sa_size != nfs_xdrneg1)
+ vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
+ if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
+#ifdef notyet
+ fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
+#else
+ vap->va_atime.tv_sec =
+ fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
+ vap->va_atime.tv_nsec = 0;
+#endif
+ }
+ if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
+ fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
+
+ }
+
+ /*
+ * Now that we have all the fields, lets do it.
+ */
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
+ (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
+ error = 0;
+ goto nfsmout;
+ }
+
+ /*
+ * vp now an active resource, pay careful attention to cleanup
+ */
+
+ nqsrv_getl(vp, ND_WRITE);
+ if (v3) {
+ error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp);
+ if (!error && gcheck &&
+ (preat.va_ctime.tv_sec != guard.tv_sec ||
+ preat.va_ctime.tv_nsec != guard.tv_nsec))
+ error = NFSERR_NOT_SYNC;
+ if (error) {
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_WCCDATA(v3));
+ nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
+ error = 0;
+ goto nfsmout;
+ }
+ }
+
+ /*
+ * If the size is being changed write acces is required, otherwise
+ * just check for a read only file system.
+ */
+ if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto out;
+ }
+ } else {
+ if (vp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
+ procp, 0)) != 0)
+ goto out;
+ }
+ error = VOP_SETATTR(vp, vap, cred, procp);
+ postat_ret = VOP_GETATTR(vp, vap, cred, procp);
+ if (!error)
+ error = postat_ret;
+out:
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_WCCORFATTR(v3));
+ if (v3) {
+ nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
+ error = 0;
+ goto nfsmout;
+ } else {
+ nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
+ nfsm_srvfillattr(vap, fp);
+ }
+ /* fall through */
+
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs lookup rpc
+ */
+int
+nfsrv_lookup(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register struct nfs_fattr *fp;
+ struct nameidata nd, ind, *ndp = &nd;
+ struct vnode *vp, *dirp = NULL;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ register caddr_t cp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, cache, len, dirattr_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vattr va, dirattr, *vap = &va;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvnamesiz(len);
+
+ pubflag = nfs_ispublicfh(fhp);
+
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
+ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
+
+ /*
+ * namei failure, only dirp to cleanup. Clear out garbarge from
+ * structure in case macros jump to nfsmout.
+ */
+
+ if (error) {
+ if (dirp) {
+ if (v3)
+ dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred,
+ procp);
+ vrele(dirp);
+ dirp = NULL;
+ }
+ nfsm_reply(NFSX_POSTOPATTR(v3));
+ nfsm_srvpostop_attr(dirattr_ret, &dirattr);
+ error = 0;
+ goto nfsmout;
+ }
+
+ /*
+ * Locate index file for public filehandle
+ *
+ * error is 0 on entry and 0 on exit from this block.
+ */
+
+ if (pubflag) {
+ if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
+ /*
+ * Setup call to lookup() to see if we can find
+ * the index file. Arguably, this doesn't belong
+ * in a kernel.. Ugh. If an error occurs, do not
+ * try to install an index file and then clear the
+ * error.
+ *
+ * When we replace nd with ind and redirect ndp,
+ * maintenance of ni_startdir and ni_vp shift to
+ * ind and we have to clean them up in the old nd.
+ * However, the cnd resource continues to be maintained
+ * via the original nd. Confused? You aren't alone!
+ */
+ ind = nd;
+ VOP_UNLOCK(nd.ni_vp, 0, procp);
+ ind.ni_pathlen = strlen(nfs_pub.np_index);
+ ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
+ nfs_pub.np_index;
+ ind.ni_startdir = nd.ni_vp;
+ VREF(ind.ni_startdir);
+
+ error = lookup(&ind);
+ ind.ni_dvp = NULL;
+
+ if (error == 0) {
+ /*
+ * Found an index file. Get rid of
+ * the old references. transfer nd.ni_vp'
+ */
+ if (dirp)
+ vrele(dirp);
+ dirp = nd.ni_vp;
+ nd.ni_vp = NULL;
+ vrele(nd.ni_startdir);
+ nd.ni_startdir = NULL;
+ ndp = &ind;
+ }
+ error = 0;
+ }
+ /*
+ * If the public filehandle was used, check that this lookup
+ * didn't result in a filehandle outside the publicly exported
+ * filesystem. We clear the poor vp here to avoid lockups due
+ * to NFS I/O.
+ */
+
+ if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
+ vput(nd.ni_vp);
+ nd.ni_vp = NULL;
+ error = EPERM;
+ }
+ }
+
+ if (dirp) {
+ if (v3)
+ dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred,
+ procp);
+ vrele(dirp);
+ dirp = NULL;
+ }
+
+ /*
+ * Resources at this point:
+ * ndp->ni_vp may not be NULL
+ *
+ */
+
+ if (error) {
+ nfsm_reply(NFSX_POSTOPATTR(v3));
+ nfsm_srvpostop_attr(dirattr_ret, &dirattr);
+ error = 0;
+ goto nfsmout;
+ }
+
+ nqsrv_getl(ndp->ni_startdir, ND_READ);
+
+ /*
+ * Clear out some resources prior to potentially blocking. This
+ * is not as critical as ni_dvp resources in other routines, but
+ * it helps.
+ */
+ vrele(ndp->ni_startdir);
+ ndp->ni_startdir = NULL;
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+
+ /*
+ * Get underlying attribute, then release remaining resources ( for
+ * the same potential blocking reason ) and reply.
+ */
+ vp = ndp->ni_vp;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fhp->fh_fid);
+ if (!error)
+ error = VOP_GETATTR(vp, vap, cred, procp);
+
+ vput(vp);
+ ndp->ni_vp = NULL;
+ nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
+ if (error) {
+ nfsm_srvpostop_attr(dirattr_ret, &dirattr);
+ error = 0;
+ goto nfsmout;
+ }
+ nfsm_srvfhtom(fhp, v3);
+ if (v3) {
+ nfsm_srvpostop_attr(0, vap);
+ nfsm_srvpostop_attr(dirattr_ret, &dirattr);
+ } else {
+ nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
+ nfsm_srvfillattr(vap, fp);
+ }
+
+nfsmout:
+ if (dirp)
+ vrele(dirp);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (ndp->ni_startdir)
+ vrele(ndp->ni_startdir);
+ if (ndp->ni_vp)
+ vput(ndp->ni_vp);
+ return (error);
+}
+
+/*
+ * nfs readlink service
+ */
+int
+nfsrv_readlink(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
+ register struct iovec *ivp = iv;
+ register struct mbuf *mp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, i, tlen, len, getret;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
+ struct vnode *vp = NULL;
+ struct vattr attr;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ mp2 = (struct mbuf *)0;
+#endif
+ mp3 = NULL;
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ len = 0;
+ i = 0;
+ while (len < NFS_MAXPATHLEN) {
+ MGET(mp, M_WAIT, MT_DATA);
+ MCLGET(mp, M_WAIT);
+ mp->m_len = NFSMSIZ(mp);
+ if (len == 0)
+ mp3 = mp2 = mp;
+ else {
+ mp2->m_next = mp;
+ mp2 = mp;
+ }
+ if ((len+mp->m_len) > NFS_MAXPATHLEN) {
+ mp->m_len = NFS_MAXPATHLEN-len;
+ len = NFS_MAXPATHLEN;
+ } else
+ len += mp->m_len;
+ ivp->iov_base = mtod(mp, caddr_t);
+ ivp->iov_len = mp->m_len;
+ i++;
+ ivp++;
+ }
+ uiop->uio_iov = iv;
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = 0;
+ uiop->uio_resid = len;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(1, (struct vattr *)0);
+ error = 0;
+ goto nfsmout;
+ }
+ if (vp->v_type != VLNK) {
+ if (v3)
+ error = EINVAL;
+ else
+ error = ENXIO;
+ goto out;
+ }
+ nqsrv_getl(vp, ND_READ);
+ error = VOP_READLINK(vp, uiop, cred);
+out:
+ getret = VOP_GETATTR(vp, &attr, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
+ if (v3) {
+ nfsm_srvpostop_attr(getret, &attr);
+ if (error) {
+ error = 0;
+ goto nfsmout;
+ }
+ }
+ if (uiop->uio_resid > 0) {
+ len -= uiop->uio_resid;
+ tlen = nfsm_rndup(len);
+ nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
+ }
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(len);
+ mb->m_next = mp3;
+ mp3 = NULL;
+nfsmout:
+ if (mp3)
+ m_freem(mp3);
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs read service
+ */
+int
+nfsrv_read(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register struct iovec *iv;
+ struct iovec *iv2;
+ register struct mbuf *m;
+ register struct nfs_fattr *fp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ register int i;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret;
+ int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct mbuf *m2;
+ struct vnode *vp = NULL;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ struct vattr va, *vap = &va;
+ struct nfsheur *nh;
+ off_t off;
+ int ioflag = 0;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (v3) {
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ off = fxdr_hyper(tl);
+ } else {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ off = (off_t)fxdr_unsigned(u_int32_t, *tl);
+ }
+ nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
+
+ /*
+ * Reference vp. If an error occurs, vp will be invalid, but we
+ * have to NULL it just in case. The macros might goto nfsmout
+ * as well.
+ */
+
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ vp = NULL;
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(1, (struct vattr *)0);
+ error = 0;
+ goto nfsmout;
+ }
+
+ if (vp->v_type != VREG) {
+ if (v3)
+ error = EINVAL;
+ else
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ }
+ if (!error) {
+ nqsrv_getl(vp, ND_READ);
+ if ((error = nfsrv_access(vp, VREAD, cred, rdonly, procp, 1)) != 0)
+ error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 1);
+ }
+ getret = VOP_GETATTR(vp, vap, cred, procp);
+ if (!error)
+ error = getret;
+ if (error) {
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_POSTOPATTR(v3));
+ nfsm_srvpostop_attr(getret, vap);
+ error = 0;
+ goto nfsmout;
+ }
+
+ /*
+ * Calculate byte count to read
+ */
+
+ if (off >= vap->va_size)
+ cnt = 0;
+ else if ((off + reqlen) > vap->va_size)
+ cnt = vap->va_size - off;
+ else
+ cnt = reqlen;
+
+ /*
+ * Calculate seqcount for heuristic
+ */
+
+ {
+ int hi;
+ int try = 4;
+
+ /*
+ * Locate best candidate
+ */
+
+ hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) & (NUM_HEURISTIC - 1);
+ nh = &nfsheur[hi];
+
+ while (try--) {
+ if (nfsheur[hi].nh_vp == vp) {
+ nh = &nfsheur[hi];
+ break;
+ }
+ if (nfsheur[hi].nh_use > 0)
+ --nfsheur[hi].nh_use;
+ hi = (hi + 1) & (NUM_HEURISTIC - 1);
+ if (nfsheur[hi].nh_use < nh->nh_use)
+ nh = &nfsheur[hi];
+ }
+
+ if (nh->nh_vp != vp) {
+ nh->nh_vp = vp;
+ nh->nh_nextr = off;
+ nh->nh_use = NHUSE_INIT;
+ if (off == 0)
+ nh->nh_seqcount = 4;
+ else
+ nh->nh_seqcount = 1;
+ }
+
+ /*
+ * Calculate heuristic
+ */
+
+ if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
+ if (++nh->nh_seqcount > 127)
+ nh->nh_seqcount = 127;
+ } else if (nh->nh_seqcount > 1) {
+ nh->nh_seqcount = 1;
+ } else {
+ nh->nh_seqcount = 0;
+ }
+ nh->nh_use += NHUSE_INC;
+ if (nh->nh_use > NHUSE_MAX)
+ nh->nh_use = NHUSE_MAX;
+ ioflag |= nh->nh_seqcount << 16;
+ }
+
+ nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
+ if (v3) {
+ nfsm_build(tl, u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
+ *tl++ = nfs_true;
+ fp = (struct nfs_fattr *)tl;
+ tl += (NFSX_V3FATTR / sizeof (u_int32_t));
+ } else {
+ nfsm_build(tl, u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
+ fp = (struct nfs_fattr *)tl;
+ tl += (NFSX_V2FATTR / sizeof (u_int32_t));
+ }
+ len = left = nfsm_rndup(cnt);
+ if (cnt > 0) {
+ /*
+ * Generate the mbuf list with the uio_iov ref. to it.
+ */
+ i = 0;
+ m = m2 = mb;
+ while (left > 0) {
+ siz = min(M_TRAILINGSPACE(m), left);
+ if (siz > 0) {
+ left -= siz;
+ i++;
+ }
+ if (left > 0) {
+ MGET(m, M_WAIT, MT_DATA);
+ MCLGET(m, M_WAIT);
+ m->m_len = 0;
+ m2->m_next = m;
+ m2 = m;
+ }
+ }
+ MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
+ M_TEMP, M_WAITOK);
+ uiop->uio_iov = iv2 = iv;
+ m = mb;
+ left = len;
+ i = 0;
+ while (left > 0) {
+ if (m == NULL)
+ panic("nfsrv_read iov");
+ siz = min(M_TRAILINGSPACE(m), left);
+ if (siz > 0) {
+ iv->iov_base = mtod(m, caddr_t) + m->m_len;
+ iv->iov_len = siz;
+ m->m_len += siz;
+ left -= siz;
+ iv++;
+ i++;
+ }
+ m = m->m_next;
+ }
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = off;
+ uiop->uio_resid = len;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
+ off = uiop->uio_offset;
+ nh->nh_nextr = off;
+ FREE((caddr_t)iv2, M_TEMP);
+ if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) {
+ if (!error)
+ error = getret;
+ m_freem(mreq);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_POSTOPATTR(v3));
+ nfsm_srvpostop_attr(getret, vap);
+ error = 0;
+ goto nfsmout;
+ }
+ } else {
+ uiop->uio_resid = 0;
+ }
+ vput(vp);
+ vp = NULL;
+ nfsm_srvfillattr(vap, fp);
+ tlen = len - uiop->uio_resid;
+ cnt = cnt < tlen ? cnt : tlen;
+ tlen = nfsm_rndup(cnt);
+ if (len != tlen || tlen != cnt)
+ nfsm_adj(mb, len - tlen, tlen - cnt);
+ if (v3) {
+ *tl++ = txdr_unsigned(cnt);
+ if (len < reqlen)
+ *tl++ = nfs_true;
+ else
+ *tl++ = nfs_false;
+ }
+ *tl = txdr_unsigned(cnt);
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs write service
+ */
+int
+nfsrv_write(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register struct iovec *ivp;
+ register int i, cnt;
+ register struct mbuf *mp;
+ register struct nfs_fattr *fp;
+ struct iovec *iv;
+ struct vattr va, forat;
+ register struct vattr *vap = &va;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, len, forat_ret = 1;
+ int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
+ int stable = NFSV3WRITE_FILESYNC;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp = NULL;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ off_t off;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ if (mrep == NULL) {
+ *mrq = NULL;
+ error = 0;
+ goto nfsmout;
+ }
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (v3) {
+ nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+ off = fxdr_hyper(tl);
+ tl += 3;
+ stable = fxdr_unsigned(int, *tl++);
+ } else {
+ nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
+ tl += 2;
+ if (nfs_async)
+ stable = NFSV3WRITE_UNSTABLE;
+ }
+ retlen = len = fxdr_unsigned(int32_t, *tl);
+ cnt = i = 0;
+
+ /*
+ * For NFS Version 2, it is not obvious what a write of zero length
+ * should do, but I might as well be consistent with Version 3,
+ * which is to return ok so long as there are no permission problems.
+ */
+ if (len > 0) {
+ zeroing = 1;
+ mp = mrep;
+ while (mp) {
+ if (mp == md) {
+ zeroing = 0;
+ adjust = dpos - mtod(mp, caddr_t);
+ mp->m_len -= adjust;
+ if (mp->m_len > 0 && adjust > 0)
+ NFSMADV(mp, adjust);
+ }
+ if (zeroing)
+ mp->m_len = 0;
+ else if (mp->m_len > 0) {
+ i += mp->m_len;
+ if (i > len) {
+ mp->m_len -= (i - len);
+ zeroing = 1;
+ }
+ if (mp->m_len > 0)
+ cnt++;
+ }
+ mp = mp->m_next;
+ }
+ }
+ if (len > NFS_MAXDATA || len < 0 || i < len) {
+ error = EIO;
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
+ error = 0;
+ goto nfsmout;
+ }
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ vp = NULL;
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
+ error = 0;
+ goto nfsmout;
+ }
+ if (v3)
+ forat_ret = VOP_GETATTR(vp, &forat, cred, procp);
+ if (vp->v_type != VREG) {
+ if (v3)
+ error = EINVAL;
+ else
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ }
+ if (!error) {
+ nqsrv_getl(vp, ND_WRITE);
+ error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1);
+ }
+ if (error) {
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_WCCDATA(v3));
+ nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
+ error = 0;
+ goto nfsmout;
+ }
+
+ if (len > 0) {
+ MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
+ M_WAITOK);
+ uiop->uio_iov = iv = ivp;
+ uiop->uio_iovcnt = cnt;
+ mp = mrep;
+ while (mp) {
+ if (mp->m_len > 0) {
+ ivp->iov_base = mtod(mp, caddr_t);
+ ivp->iov_len = mp->m_len;
+ ivp++;
+ }
+ mp = mp->m_next;
+ }
+
+ /*
+ * XXX
+ * The IO_METASYNC flag indicates that all metadata (and not just
+ * enough to ensure data integrity) mus be written to stable storage
+ * synchronously.
+ * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
+ */
+ if (stable == NFSV3WRITE_UNSTABLE)
+ ioflags = IO_NODELOCKED;
+ else if (stable == NFSV3WRITE_DATASYNC)
+ ioflags = (IO_SYNC | IO_NODELOCKED);
+ else
+ ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
+ uiop->uio_resid = len;
+ uiop->uio_rw = UIO_WRITE;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ uiop->uio_offset = off;
+ error = VOP_WRITE(vp, uiop, ioflags, cred);
+ nfsstats.srvvop_writes++;
+ FREE((caddr_t)iv, M_TEMP);
+ }
+ aftat_ret = VOP_GETATTR(vp, vap, cred, procp);
+ vput(vp);
+ vp = NULL;
+ if (!error)
+ error = aftat_ret;
+ nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
+ 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
+ if (v3) {
+ nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
+ if (error) {
+ error = 0;
+ goto nfsmout;
+ }
+ nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(retlen);
+ /*
+ * If nfs_async is set, then pretend the write was FILESYNC.
+ */
+ if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
+ *tl++ = txdr_unsigned(stable);
+ else
+ *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
+ /*
+ * Actually, there is no need to txdr these fields,
+ * but it may make the values more human readable,
+ * for debugging purposes.
+ */
+ if (nfsver.tv_sec == 0)
+ nfsver = boottime;
+ *tl++ = txdr_unsigned(nfsver.tv_sec);
+ *tl = txdr_unsigned(nfsver.tv_usec);
+ } else {
+ nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
+ nfsm_srvfillattr(vap, fp);
+ }
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * NFS write service with write gathering support. Called when
+ * nfsrvw_procrastinate > 0.
+ * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
+ * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
+ * Jan. 1994.
+ */
+int
+nfsrv_writegather(ndp, slp, procp, mrq)
+ struct nfsrv_descript **ndp;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ register struct iovec *ivp;
+ register struct mbuf *mp;
+ register struct nfsrv_descript *wp, *nfsd, *owp, *swp;
+ register struct nfs_fattr *fp;
+ register int i;
+ struct iovec *iov;
+ struct nfsrvw_delayhash *wpp;
+ struct ucred *cred;
+ struct vattr va, forat;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos, dpos;
+ int error = 0, rdonly, cache, len, forat_ret = 1;
+ int ioflags, aftat_ret = 1, s, adjust, v3, zeroing;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq, *mrep, *md;
+ struct vnode *vp = NULL;
+ struct uio io, *uiop = &io;
+ u_quad_t frev, cur_usec;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ i = 0;
+ len = 0;
+#endif
+ *mrq = NULL;
+ if (*ndp) {
+ nfsd = *ndp;
+ *ndp = NULL;
+ mrep = nfsd->nd_mrep;
+ md = nfsd->nd_md;
+ dpos = nfsd->nd_dpos;
+ cred = &nfsd->nd_cr;
+ v3 = (nfsd->nd_flag & ND_NFSV3);
+ LIST_INIT(&nfsd->nd_coalesce);
+ nfsd->nd_mreq = NULL;
+ nfsd->nd_stable = NFSV3WRITE_FILESYNC;
+ cur_usec = nfs_curusec();
+ nfsd->nd_time = cur_usec +
+ (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
+
+ /*
+ * Now, get the write header..
+ */
+ nfsm_srvmtofh(&nfsd->nd_fh);
+ if (v3) {
+ nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+ nfsd->nd_off = fxdr_hyper(tl);
+ tl += 3;
+ nfsd->nd_stable = fxdr_unsigned(int, *tl++);
+ } else {
+ nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
+ tl += 2;
+ if (nfs_async)
+ nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
+ }
+ len = fxdr_unsigned(int32_t, *tl);
+ nfsd->nd_len = len;
+ nfsd->nd_eoff = nfsd->nd_off + len;
+
+ /*
+ * Trim the header out of the mbuf list and trim off any trailing
+ * junk so that the mbuf list has only the write data.
+ */
+ zeroing = 1;
+ i = 0;
+ mp = mrep;
+ while (mp) {
+ if (mp == md) {
+ zeroing = 0;
+ adjust = dpos - mtod(mp, caddr_t);
+ mp->m_len -= adjust;
+ if (mp->m_len > 0 && adjust > 0)
+ NFSMADV(mp, adjust);
+ }
+ if (zeroing)
+ mp->m_len = 0;
+ else {
+ i += mp->m_len;
+ if (i > len) {
+ mp->m_len -= (i - len);
+ zeroing = 1;
+ }
+ }
+ mp = mp->m_next;
+ }
+ if (len > NFS_MAXDATA || len < 0 || i < len) {
+nfsmout:
+ m_freem(mrep);
+ error = EIO;
+ nfsm_writereply(2 * NFSX_UNSIGNED, v3);
+ if (v3)
+ nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
+ nfsd->nd_mreq = mreq;
+ nfsd->nd_mrep = NULL;
+ nfsd->nd_time = 0;
+ }
+
+ /*
+ * Add this entry to the hash and time queues.
+ */
+ s = splsoftclock();
+ owp = NULL;
+ wp = slp->ns_tq.lh_first;
+ while (wp && wp->nd_time < nfsd->nd_time) {
+ owp = wp;
+ wp = wp->nd_tq.le_next;
+ }
+ NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
+ if (owp) {
+ LIST_INSERT_AFTER(owp, nfsd, nd_tq);
+ } else {
+ LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
+ }
+ if (nfsd->nd_mrep) {
+ wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
+ owp = NULL;
+ wp = wpp->lh_first;
+ while (wp &&
+ bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
+ owp = wp;
+ wp = wp->nd_hash.le_next;
+ }
+ while (wp && wp->nd_off < nfsd->nd_off &&
+ !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
+ owp = wp;
+ wp = wp->nd_hash.le_next;
+ }
+ if (owp) {
+ LIST_INSERT_AFTER(owp, nfsd, nd_hash);
+
+ /*
+ * Search the hash list for overlapping entries and
+ * coalesce.
+ */
+ for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
+ wp = nfsd->nd_hash.le_next;
+ if (NFSW_SAMECRED(owp, nfsd))
+ nfsrvw_coalesce(owp, nfsd);
+ }
+ } else {
+ LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
+ }
+ }
+ splx(s);
+ }
+
+ /*
+ * Now, do VOP_WRITE()s for any one(s) that need to be done now
+ * and generate the associated reply mbuf list(s).
+ */
+loop1:
+ cur_usec = nfs_curusec();
+ s = splsoftclock();
+ for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
+ owp = nfsd->nd_tq.le_next;
+ if (nfsd->nd_time > cur_usec)
+ break;
+ if (nfsd->nd_mreq)
+ continue;
+ NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
+ LIST_REMOVE(nfsd, nd_tq);
+ LIST_REMOVE(nfsd, nd_hash);
+ splx(s);
+ mrep = nfsd->nd_mrep;
+ nfsd->nd_mrep = NULL;
+ cred = &nfsd->nd_cr;
+ v3 = (nfsd->nd_flag & ND_NFSV3);
+ forat_ret = aftat_ret = 1;
+ error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
+ nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (!error) {
+ if (v3)
+ forat_ret = VOP_GETATTR(vp, &forat, cred, procp);
+ if (vp->v_type != VREG) {
+ if (v3)
+ error = EINVAL;
+ else
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ }
+ } else {
+ vp = NULL;
+ }
+ if (!error) {
+ nqsrv_getl(vp, ND_WRITE);
+ error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1);
+ }
+
+ if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
+ ioflags = IO_NODELOCKED;
+ else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
+ ioflags = (IO_SYNC | IO_NODELOCKED);
+ else
+ ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
+ uiop->uio_rw = UIO_WRITE;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ uiop->uio_offset = nfsd->nd_off;
+ uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
+ if (uiop->uio_resid > 0) {
+ mp = mrep;
+ i = 0;
+ while (mp) {
+ if (mp->m_len > 0)
+ i++;
+ mp = mp->m_next;
+ }
+ uiop->uio_iovcnt = i;
+ MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
+ M_TEMP, M_WAITOK);
+ uiop->uio_iov = ivp = iov;
+ mp = mrep;
+ while (mp) {
+ if (mp->m_len > 0) {
+ ivp->iov_base = mtod(mp, caddr_t);
+ ivp->iov_len = mp->m_len;
+ ivp++;
+ }
+ mp = mp->m_next;
+ }
+ if (!error) {
+ error = VOP_WRITE(vp, uiop, ioflags, cred);
+ nfsstats.srvvop_writes++;
+ }
+ FREE((caddr_t)iov, M_TEMP);
+ }
+ m_freem(mrep);
+ if (vp) {
+ aftat_ret = VOP_GETATTR(vp, &va, cred, procp);
+ vput(vp);
+ vp = NULL;
+ }
+
+ /*
+ * Loop around generating replies for all write rpcs that have
+ * now been completed.
+ */
+ swp = nfsd;
+ do {
+ NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
+ if (error) {
+ nfsm_writereply(NFSX_WCCDATA(v3), v3);
+ if (v3) {
+ nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
+ }
+ } else {
+ nfsm_writereply(NFSX_PREOPATTR(v3) +
+ NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
+ NFSX_WRITEVERF(v3), v3);
+ if (v3) {
+ nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
+ nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nfsd->nd_len);
+ *tl++ = txdr_unsigned(swp->nd_stable);
+ /*
+ * Actually, there is no need to txdr these fields,
+ * but it may make the values more human readable,
+ * for debugging purposes.
+ */
+ if (nfsver.tv_sec == 0)
+ nfsver = boottime;
+ *tl++ = txdr_unsigned(nfsver.tv_sec);
+ *tl = txdr_unsigned(nfsver.tv_usec);
+ } else {
+ nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
+ nfsm_srvfillattr(&va, fp);
+ }
+ }
+ nfsd->nd_mreq = mreq;
+ if (nfsd->nd_mrep)
+ panic("nfsrv_write: nd_mrep not free");
+
+ /*
+ * Done. Put it at the head of the timer queue so that
+ * the final phase can return the reply.
+ */
+ s = splsoftclock();
+ if (nfsd != swp) {
+ nfsd->nd_time = 0;
+ LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
+ }
+ nfsd = swp->nd_coalesce.lh_first;
+ if (nfsd) {
+ LIST_REMOVE(nfsd, nd_tq);
+ }
+ splx(s);
+ } while (nfsd);
+ s = splsoftclock();
+ swp->nd_time = 0;
+ LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
+ splx(s);
+ goto loop1;
+ }
+ splx(s);
+
+ /*
+ * Search for a reply to return.
+ */
+ s = splsoftclock();
+ for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next)
+ if (nfsd->nd_mreq) {
+ NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
+ LIST_REMOVE(nfsd, nd_tq);
+ *mrq = nfsd->nd_mreq;
+ *ndp = nfsd;
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+/*
+ * Coalesce the write request nfsd into owp. To do this we must:
+ * - remove nfsd from the queues
+ * - merge nfsd->nd_mrep into owp->nd_mrep
+ * - update the nd_eoff and nd_stable for owp
+ * - put nfsd on owp's nd_coalesce list
+ * NB: Must be called at splsoftclock().
+ */
+static void
+nfsrvw_coalesce(owp, nfsd)
+ register struct nfsrv_descript *owp;
+ register struct nfsrv_descript *nfsd;
+{
+ register int overlap;
+ register struct mbuf *mp;
+ struct nfsrv_descript *p;
+
+ NFS_DPF(WG, ("C%03x-%03x",
+ nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
+ LIST_REMOVE(nfsd, nd_hash);
+ LIST_REMOVE(nfsd, nd_tq);
+ if (owp->nd_eoff < nfsd->nd_eoff) {
+ overlap = owp->nd_eoff - nfsd->nd_off;
+ if (overlap < 0)
+ panic("nfsrv_coalesce: bad off");
+ if (overlap > 0)
+ m_adj(nfsd->nd_mrep, overlap);
+ mp = owp->nd_mrep;
+ while (mp->m_next)
+ mp = mp->m_next;
+ mp->m_next = nfsd->nd_mrep;
+ owp->nd_eoff = nfsd->nd_eoff;
+ } else
+ m_freem(nfsd->nd_mrep);
+ nfsd->nd_mrep = NULL;
+ if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
+ owp->nd_stable = NFSV3WRITE_FILESYNC;
+ else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
+ owp->nd_stable == NFSV3WRITE_UNSTABLE)
+ owp->nd_stable = NFSV3WRITE_DATASYNC;
+ LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
+
+ /*
+ * If nfsd had anything else coalesced into it, transfer them
+ * to owp, otherwise their replies will never get sent.
+ */
+ for (p = nfsd->nd_coalesce.lh_first; p;
+ p = nfsd->nd_coalesce.lh_first) {
+ LIST_REMOVE(p, nd_tq);
+ LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
+ }
+}
+
+/*
+ * nfs create service
+ * now does a truncate to 0 length via. setattr if it already exists
+ */
+int
+nfsrv_create(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register struct nfs_fattr *fp;
+ struct vattr va, dirfor, diraft;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register u_int32_t *tl;
+ struct nameidata nd;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
+ caddr_t cp;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *dirp = (struct vnode *)0;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev, tempsize;
+ u_char cverf[NFSX_V3CREATEVERF];
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ rdev = 0;
+#endif
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvnamesiz(len);
+
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
+
+ /*
+ * Call namei and do initial cleanup to get a few things
+ * out of the way. If we get an initial error we cleanup
+ * and return here to avoid special-casing the invalid nd
+ * structure through the rest of the case. dirp may be
+ * set even if an error occurs, but the nd structure will not
+ * be valid at all if an error occurs so we have to invalidate it
+ * prior to calling nfsm_reply ( which might goto nfsmout ).
+ */
+ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (dirp) {
+ if (v3) {
+ dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred,
+ procp);
+ } else {
+ vrele(dirp);
+ dirp = NULL;
+ }
+ }
+ if (error) {
+ nfsm_reply(NFSX_WCCDATA(v3));
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ goto nfsmout;
+ }
+
+ /*
+ * No error. Continue. State:
+ *
+ * startdir is valid ( we release this immediately )
+ * dirp may be valid
+ * nd.ni_vp may be valid
+ * nd.ni_dvp is valid
+ *
+ * The error state is set through the code and we may also do some
+ * opportunistic releasing of vnodes to avoid holding locks through
+ * NFS I/O. The cleanup at the end is a catch-all
+ */
+
+ VATTR_NULL(vap);
+ if (v3) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ how = fxdr_unsigned(int, *tl);
+ switch (how) {
+ case NFSV3CREATE_GUARDED:
+ if (nd.ni_vp) {
+ error = EEXIST;
+ break;
+ }
+ /* fall through */
+ case NFSV3CREATE_UNCHECKED:
+ nfsm_srvsattr(vap);
+ break;
+ case NFSV3CREATE_EXCLUSIVE:
+ nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF);
+ bcopy(cp, cverf, NFSX_V3CREATEVERF);
+ exclusive_flag = 1;
+ if (nd.ni_vp == NULL)
+ vap->va_mode = 0;
+ break;
+ };
+ vap->va_type = VREG;
+ } else {
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
+ if (vap->va_type == VNON)
+ vap->va_type = VREG;
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ switch (vap->va_type) {
+ case VREG:
+ tsize = fxdr_unsigned(int32_t, sp->sa_size);
+ if (tsize != -1)
+ vap->va_size = (u_quad_t)tsize;
+ break;
+ case VCHR:
+ case VBLK:
+ case VFIFO:
+ rdev = fxdr_unsigned(long, sp->sa_size);
+ break;
+ default:
+ break;
+ };
+ }
+
+ /*
+ * Iff doesn't exist, create it
+ * otherwise just truncate to 0 length
+ * should I set the mode too ?
+ *
+ * The only possible error we can have at this point is EEXIST.
+ * nd.ni_vp will also be non-NULL in that case.
+ */
+ if (nd.ni_vp == NULL) {
+ if (vap->va_type == VREG || vap->va_type == VSOCK) {
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+ error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
+ if (error)
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ else {
+ nfsrv_object_create(nd.ni_vp);
+ if (exclusive_flag) {
+ exclusive_flag = 0;
+ VATTR_NULL(vap);
+ bcopy(cverf, (caddr_t)&vap->va_atime,
+ NFSX_V3CREATEVERF);
+ error = VOP_SETATTR(nd.ni_vp, vap, cred,
+ procp);
+ }
+ }
+ } else if (
+ vap->va_type == VCHR ||
+ vap->va_type == VBLK ||
+ vap->va_type == VFIFO
+ ) {
+ /*
+ * Handle SysV FIFO node special cases. All other
+ * devices require super user to access.
+ */
+ if (vap->va_type == VCHR && rdev == 0xffffffff)
+ vap->va_type = VFIFO;
+ if (vap->va_type != VFIFO &&
+ (error = suser_xxx(cred, 0, 0))) {
+ goto nfsmreply0;
+ }
+ vap->va_rdev = rdev;
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+
+ error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
+ if (error) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ goto nfsmreply0;
+ }
+ vput(nd.ni_vp);
+ nd.ni_vp = NULL;
+
+ /*
+ * release dvp prior to lookup
+ */
+ vput(nd.ni_dvp);
+ nd.ni_dvp = NULL;
+
+ /*
+ * Setup for lookup.
+ *
+ * Even though LOCKPARENT was cleared, ni_dvp may
+ * be garbage.
+ */
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
+ nd.ni_cnd.cn_proc = procp;
+ nd.ni_cnd.cn_cred = cred;
+
+ error = lookup(&nd);
+ nd.ni_dvp = NULL;
+
+ if (error != 0) {
+ nfsm_reply(0);
+ /* fall through on certain errors */
+ }
+ nfsrv_object_create(nd.ni_vp);
+ if (nd.ni_cnd.cn_flags & ISSYMLINK) {
+ error = EINVAL;
+ goto nfsmreply0;
+ }
+ } else {
+ error = ENXIO;
+ }
+ } else {
+ if (vap->va_size != -1) {
+ error = nfsrv_access(nd.ni_vp, VWRITE, cred,
+ (nd.ni_cnd.cn_flags & RDONLY), procp, 0);
+ if (!error) {
+ nqsrv_getl(nd.ni_vp, ND_WRITE);
+ tempsize = vap->va_size;
+ VATTR_NULL(vap);
+ vap->va_size = tempsize;
+ error = VOP_SETATTR(nd.ni_vp, vap, cred,
+ procp);
+ }
+ }
+ }
+
+ if (!error) {
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
+ if (!error)
+ error = VOP_GETATTR(nd.ni_vp, vap, cred, procp);
+ }
+ if (v3) {
+ if (exclusive_flag && !error &&
+ bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
+ error = EEXIST;
+ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp);
+ vrele(dirp);
+ dirp = NULL;
+ }
+ nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
+ if (v3) {
+ if (!error) {
+ nfsm_srvpostop_fh(fhp);
+ nfsm_srvpostop_attr(0, vap);
+ }
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ } else {
+ nfsm_srvfhtom(fhp, v3);
+ nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
+ nfsm_srvfillattr(vap, fp);
+ }
+ goto nfsmout;
+
+nfsmreply0:
+ nfsm_reply(0);
+ error = 0;
+ /* fall through */
+
+nfsmout:
+ if (nd.ni_startdir) {
+ vrele(nd.ni_startdir);
+ nd.ni_startdir = NULL;
+ }
+ if (dirp)
+ vrele(dirp);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp) {
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ }
+ if (nd.ni_vp)
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * nfs v3 mknod service
+ */
+int
+nfsrv_mknod(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct vattr va, dirfor, diraft;
+ register struct vattr *vap = &va;
+ register u_int32_t *tl;
+ struct nameidata nd;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
+ u_int32_t major, minor;
+ enum vtype vtyp;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp, *dirp = (struct vnode *)0;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvnamesiz(len);
+
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
+
+ /*
+ * Handle nfs_namei() call. If an error occurs, the nd structure
+ * is not valid. However, nfsm_*() routines may still jump to
+ * nfsmout.
+ */
+
+ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (dirp)
+ dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp);
+ if (error) {
+ nfsm_reply(NFSX_WCCDATA(1));
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ goto nfsmout;
+ }
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ vtyp = nfsv3tov_type(*tl);
+ if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
+ error = NFSERR_BADTYPE;
+ goto out;
+ }
+ VATTR_NULL(vap);
+ nfsm_srvsattr(vap);
+ if (vtyp == VCHR || vtyp == VBLK) {
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ major = fxdr_unsigned(u_int32_t, *tl++);
+ minor = fxdr_unsigned(u_int32_t, *tl);
+ vap->va_rdev = makeudev(major, minor);
+ }
+
+ /*
+ * Iff doesn't exist, create it.
+ */
+ if (nd.ni_vp) {
+ error = EEXIST;
+ goto out;
+ }
+ vap->va_type = vtyp;
+ if (vtyp == VSOCK) {
+ vrele(nd.ni_startdir);
+ nd.ni_startdir = NULL;
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+ error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
+ if (error)
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ } else {
+ if (vtyp != VFIFO && (error = suser_xxx(cred, 0, 0)))
+ goto out;
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+
+ error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
+ if (error) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ goto out;
+ }
+ vput(nd.ni_vp);
+ nd.ni_vp = NULL;
+
+ /*
+ * Release dvp prior to lookup
+ */
+ vput(nd.ni_dvp);
+ nd.ni_dvp = NULL;
+
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
+ nd.ni_cnd.cn_proc = procp;
+ nd.ni_cnd.cn_cred = procp->p_ucred;
+
+ error = lookup(&nd);
+ nd.ni_dvp = NULL;
+
+ if (error)
+ goto out;
+ if (nd.ni_cnd.cn_flags & ISSYMLINK)
+ error = EINVAL;
+ }
+
+ /*
+ * send response, cleanup, return.
+ */
+out:
+ if (nd.ni_startdir) {
+ vrele(nd.ni_startdir);
+ nd.ni_startdir = NULL;
+ }
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp) {
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ nd.ni_dvp = NULL;
+ }
+ vp = nd.ni_vp;
+ if (!error) {
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fhp->fh_fid);
+ if (!error)
+ error = VOP_GETATTR(vp, vap, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nd.ni_vp = NULL;
+ }
+ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp);
+ if (dirp) {
+ vrele(dirp);
+ dirp = NULL;
+ }
+ nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
+ if (!error) {
+ nfsm_srvpostop_fh(fhp);
+ nfsm_srvpostop_attr(0, vap);
+ }
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ return (0);
+nfsmout:
+ if (dirp)
+ vrele(dirp);
+ if (nd.ni_startdir)
+ vrele(nd.ni_startdir);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp) {
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ }
+ if (nd.ni_vp)
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * nfs remove service
+ */
+int
+nfsrv_remove(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct nameidata nd;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *dirp;
+ struct vattr dirfor, diraft;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvnamesiz(len);
+
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (dirp) {
+ if (v3) {
+ dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred,
+ procp);
+ } else {
+ vrele(dirp);
+ dirp = NULL;
+ }
+ }
+ if (error == 0) {
+ if (nd.ni_vp->v_type == VDIR) {
+ error = EPERM; /* POSIX */
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (nd.ni_vp->v_flag & VROOT) {
+ error = EBUSY;
+ goto out;
+ }
+out:
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+ nqsrv_getl(nd.ni_vp, ND_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ }
+ }
+ if (dirp && v3) {
+ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp);
+ vrele(dirp);
+ dirp = NULL;
+ }
+ nfsm_reply(NFSX_WCCDATA(v3));
+ if (v3) {
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ }
+nfsmout:
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp) {
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ }
+ if (nd.ni_vp)
+ vput(nd.ni_vp);
+ return(error);
+}
+
+/*
+ * nfs rename service
+ */
+int
+nfsrv_rename(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
+ int tdirfor_ret = 1, tdiraft_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct nameidata fromnd, tond;
+ struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0;
+ struct vnode *tdirp = (struct vnode *)0;
+ struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
+ nfsfh_t fnfh, tnfh;
+ fhandle_t *ffhp, *tfhp;
+ u_quad_t frev;
+ uid_t saved_uid;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ fvp = (struct vnode *)0;
+#endif
+ ffhp = &fnfh.fh_generic;
+ tfhp = &tnfh.fh_generic;
+
+ /*
+ * Clear fields incase goto nfsmout occurs from macro.
+ */
+
+ ndclear(&fromnd);
+ ndclear(&tond);
+
+ nfsm_srvmtofh(ffhp);
+ nfsm_srvnamesiz(len);
+ /*
+ * Remember our original uid so that we can reset cr_uid before
+ * the second nfs_namei() call, in case it is remapped.
+ */
+ saved_uid = cred->cr_uid;
+ fromnd.ni_cnd.cn_cred = cred;
+ fromnd.ni_cnd.cn_nameiop = DELETE;
+ fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
+ error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md,
+ &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (fdirp) {
+ if (v3) {
+ fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred,
+ procp);
+ } else {
+ vrele(fdirp);
+ fdirp = NULL;
+ }
+ }
+ if (error) {
+ nfsm_reply(2 * NFSX_WCCDATA(v3));
+ nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
+ nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
+ error = 0;
+ goto nfsmout;
+ }
+ fvp = fromnd.ni_vp;
+ nfsm_srvmtofh(tfhp);
+ nfsm_strsiz(len2, NFS_MAXNAMLEN);
+ cred->cr_uid = saved_uid;
+ tond.ni_cnd.cn_cred = cred;
+ tond.ni_cnd.cn_nameiop = RENAME;
+ tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
+ error = nfs_namei(&tond, tfhp, len2, slp, nam, &md,
+ &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (tdirp) {
+ if (v3) {
+ tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred,
+ procp);
+ } else {
+ vrele(tdirp);
+ tdirp = NULL;
+ }
+ }
+ if (error)
+ goto out1;
+
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ if (v3)
+ error = EEXIST;
+ else
+ error = EISDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ if (v3)
+ error = EEXIST;
+ else
+ error = ENOTDIR;
+ goto out;
+ }
+ if (tvp->v_type == VDIR && tvp->v_mountedhere) {
+ if (v3)
+ error = EXDEV;
+ else
+ error = ENOTEMPTY;
+ goto out;
+ }
+ }
+ if (fvp->v_type == VDIR && fvp->v_mountedhere) {
+ if (v3)
+ error = EXDEV;
+ else
+ error = ENOTEMPTY;
+ goto out;
+ }
+ if (fvp->v_mount != tdvp->v_mount) {
+ if (v3)
+ error = EXDEV;
+ else
+ error = ENOTEMPTY;
+ goto out;
+ }
+ if (fvp == tdvp) {
+ if (v3)
+ error = EINVAL;
+ else
+ error = ENOTEMPTY;
+ }
+ /*
+ * If source is the same as the destination (that is the
+ * same vnode with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ /*
+ * The VOP_RENAME function releases all vnode references &
+ * locks prior to returning so we need to clear the pointers
+ * to bypass cleanup code later on.
+ */
+ nqsrv_getl(fromnd.ni_dvp, ND_WRITE);
+ nqsrv_getl(tdvp, ND_WRITE);
+ if (tvp) {
+ nqsrv_getl(tvp, ND_WRITE);
+ }
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ fromnd.ni_dvp = NULL;
+ fromnd.ni_vp = NULL;
+ tond.ni_dvp = NULL;
+ tond.ni_vp = NULL;
+ if (error) {
+ fromnd.ni_cnd.cn_flags &= ~HASBUF;
+ tond.ni_cnd.cn_flags &= ~HASBUF;
+ }
+ } else {
+ if (error == -1)
+ error = 0;
+ }
+ /* fall through */
+
+out1:
+ if (fdirp)
+ fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp);
+ if (tdirp)
+ tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp);
+ nfsm_reply(2 * NFSX_WCCDATA(v3));
+ if (v3) {
+ nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
+ nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
+ }
+ error = 0;
+ /* fall through */
+
+nfsmout:
+ /*
+ * Clear out tond related fields
+ */
+ if (tdirp)
+ vrele(tdirp);
+ if (tond.ni_startdir)
+ vrele(tond.ni_startdir);
+ NDFREE(&tond, NDF_ONLY_PNBUF);
+ if (tond.ni_dvp) {
+ if (tond.ni_dvp == tond.ni_vp)
+ vrele(tond.ni_dvp);
+ else
+ vput(tond.ni_dvp);
+ }
+ if (tond.ni_vp)
+ vput(tond.ni_vp);
+
+ /*
+ * Clear out fromnd related fields
+ */
+ if (fdirp)
+ vrele(fdirp);
+ if (fromnd.ni_startdir)
+ vrele(fromnd.ni_startdir);
+ NDFREE(&fromnd, NDF_ONLY_PNBUF);
+ if (fromnd.ni_dvp)
+ vrele(fromnd.ni_dvp);
+ if (fromnd.ni_vp)
+ vrele(fromnd.ni_vp);
+
+ return (error);
+}
+
+/*
+ * nfs link service
+ */
+int
+nfsrv_link(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct nameidata nd;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1;
+ int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp = NULL, *xp, *dirp = (struct vnode *)0;
+ struct vattr dirfor, diraft, at;
+ nfsfh_t nfh, dnfh;
+ fhandle_t *fhp, *dfhp;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ dfhp = &dnfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvmtofh(dfhp);
+ nfsm_srvnamesiz(len);
+
+ error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
+ nfsm_srvpostop_attr(getret, &at);
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ vp = NULL;
+ error = 0;
+ goto nfsmout;
+ }
+ if (vp->v_type == VDIR) {
+ error = EPERM; /* POSIX */
+ goto out1;
+ }
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (dirp) {
+ if (v3) {
+ dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred,
+ procp);
+ } else {
+ vrele(dirp);
+ dirp = NULL;
+ }
+ }
+ if (error)
+ goto out1;
+
+ xp = nd.ni_vp;
+ if (xp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+ xp = nd.ni_dvp;
+ if (vp->v_mount != xp->v_mount)
+ error = EXDEV;
+out:
+ if (!error) {
+ nqsrv_getl(vp, ND_WRITE);
+ nqsrv_getl(xp, ND_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ }
+ /* fall through */
+
+out1:
+ if (v3)
+ getret = VOP_GETATTR(vp, &at, cred, procp);
+ if (dirp)
+ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp);
+ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
+ if (v3) {
+ nfsm_srvpostop_attr(getret, &at);
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ }
+ /* fall through */
+
+nfsmout:
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (dirp)
+ vrele(dirp);
+ if (vp)
+ vrele(vp);
+ if (nd.ni_dvp) {
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ }
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ return(error);
+}
+
+/*
+ * nfs symbolic link service
+ */
+int
+nfsrv_symlink(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct vattr va, dirfor, diraft;
+ struct nameidata nd;
+ register struct vattr *vap = &va;
+ register u_int32_t *tl;
+ register int32_t t1;
+ struct nfsv2_sattr *sp;
+ char *bpos, *pathcp = (char *)0, *cp2;
+ struct uio io;
+ struct iovec iv;
+ int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ struct mbuf *mb, *mreq, *mb2;
+ struct vnode *dirp = (struct vnode *)0;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvnamesiz(len);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART;
+ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (dirp) {
+ if (v3) {
+ dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred,
+ procp);
+ } else {
+ vrele(dirp);
+ dirp = NULL;
+ }
+ }
+ if (error)
+ goto out;
+
+ VATTR_NULL(vap);
+ if (v3)
+ nfsm_srvsattr(vap);
+ nfsm_strsiz(len2, NFS_MAXPATHLEN);
+ MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
+ iv.iov_base = pathcp;
+ iv.iov_len = len2;
+ io.uio_resid = len2;
+ io.uio_offset = 0;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ nfsm_mtouio(&io, len2);
+ if (!v3) {
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ vap->va_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
+ }
+ *(pathcp + len2) = '\0';
+ if (nd.ni_vp) {
+ error = EEXIST;
+ goto out;
+ }
+
+ /*
+ * issue symlink op. SAVESTART is set so the underlying path component
+ * is only freed by the VOP if an error occurs.
+ */
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
+ if (error)
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ else {
+ vput(nd.ni_vp);
+ nd.ni_vp = NULL;
+ }
+ /*
+ * releases directory prior to potential lookup op.
+ */
+ vput(nd.ni_dvp);
+ nd.ni_dvp = NULL;
+
+ if (error == 0) {
+ if (v3) {
+ /*
+ * Issue lookup. Leave SAVESTART set so we can easily free
+ * the name buffer later on.
+ *
+ * since LOCKPARENT is not set, ni_dvp will be garbage on
+ * return whether an error occurs or not.
+ */
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
+ nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
+ nd.ni_cnd.cn_proc = procp;
+ nd.ni_cnd.cn_cred = cred;
+
+ error = lookup(&nd);
+ nd.ni_dvp = NULL;
+
+ if (error == 0) {
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
+ if (!error)
+ error = VOP_GETATTR(nd.ni_vp, vap, cred,
+ procp);
+ vput(nd.ni_vp);
+ nd.ni_vp = NULL;
+ }
+ }
+ }
+out:
+ /*
+ * These releases aren't strictly required, does even doing them
+ * make any sense? XXX can nfsm_reply() block?
+ */
+ if (pathcp) {
+ FREE(pathcp, M_TEMP);
+ pathcp = NULL;
+ }
+ if (dirp) {
+ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp);
+ vrele(dirp);
+ dirp = NULL;
+ }
+ if (nd.ni_startdir) {
+ vrele(nd.ni_startdir);
+ nd.ni_startdir = NULL;
+ }
+ nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
+ if (v3) {
+ if (!error) {
+ nfsm_srvpostop_fh(fhp);
+ nfsm_srvpostop_attr(0, vap);
+ }
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ }
+ error = 0;
+ /* fall through */
+
+nfsmout:
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp) {
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ }
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ if (nd.ni_startdir)
+ vrele(nd.ni_startdir);
+ if (dirp)
+ vrele(dirp);
+ if (pathcp)
+ FREE(pathcp, M_TEMP);
+
+ return (error);
+}
+
+/*
+ * nfs mkdir service
+ */
+int
+nfsrv_mkdir(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct vattr va, dirfor, diraft;
+ register struct vattr *vap = &va;
+ register struct nfs_fattr *fp;
+ struct nameidata nd;
+ register caddr_t cp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *dirp = NULL;
+ int vpexcl = 0;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvnamesiz(len);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+
+ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (dirp) {
+ if (v3) {
+ dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred,
+ procp);
+ } else {
+ vrele(dirp);
+ dirp = NULL;
+ }
+ }
+ if (error) {
+ nfsm_reply(NFSX_WCCDATA(v3));
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ goto nfsmout;
+ }
+ VATTR_NULL(vap);
+ if (v3) {
+ nfsm_srvsattr(vap);
+ } else {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ vap->va_mode = nfstov_mode(*tl++);
+ }
+
+ /*
+ * At this point nd.ni_dvp is referenced and exclusively locked and
+ * nd.ni_vp, if it exists, is referenced but not locked.
+ */
+
+ vap->va_type = VDIR;
+ if (nd.ni_vp != NULL) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ error = EEXIST;
+ goto out;
+ }
+
+ /*
+ * Issue mkdir op. Since SAVESTART is not set, the pathname
+ * component is freed by the VOP call. This will fill-in
+ * nd.ni_vp, reference, and exclusively lock it.
+ */
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+ error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vpexcl = 1;
+
+ vput(nd.ni_dvp);
+ nd.ni_dvp = NULL;
+
+ if (!error) {
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
+ if (!error)
+ error = VOP_GETATTR(nd.ni_vp, vap, cred, procp);
+ }
+out:
+ if (dirp)
+ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp);
+ nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
+ if (v3) {
+ if (!error) {
+ nfsm_srvpostop_fh(fhp);
+ nfsm_srvpostop_attr(0, vap);
+ }
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ } else {
+ nfsm_srvfhtom(fhp, v3);
+ nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
+ nfsm_srvfillattr(vap, fp);
+ }
+ error = 0;
+ /* fall through */
+
+nfsmout:
+ if (dirp)
+ vrele(dirp);
+ if (nd.ni_dvp) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp == nd.ni_vp && vpexcl)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ }
+ if (nd.ni_vp) {
+ if (vpexcl)
+ vput(nd.ni_vp);
+ else
+ vrele(nd.ni_vp);
+ }
+ return (error);
+}
+
+/*
+ * nfs rmdir service
+ */
+int
+nfsrv_rmdir(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp, *dirp = (struct vnode *)0;
+ struct vattr dirfor, diraft;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ struct nameidata nd;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ ndclear(&nd);
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvnamesiz(len);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
+ &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
+ if (dirp) {
+ if (v3) {
+ dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred,
+ procp);
+ } else {
+ vrele(dirp);
+ dirp = NULL;
+ }
+ }
+ if (error) {
+ nfsm_reply(NFSX_WCCDATA(v3));
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ goto nfsmout;
+ }
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ /*
+ * Issue or abort op. Since SAVESTART is not set, path name
+ * component is freed by the VOP after either.
+ */
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, ND_WRITE);
+ nqsrv_getl(vp, ND_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ }
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+
+ if (dirp)
+ diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp);
+ nfsm_reply(NFSX_WCCDATA(v3));
+ if (v3) {
+ nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ error = 0;
+ }
+ /* fall through */
+
+nfsmout:
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (dirp)
+ vrele(dirp);
+ if (nd.ni_dvp) {
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ }
+ if (nd.ni_vp)
+ vput(nd.ni_vp);
+
+ return(error);
+}
+
+/*
+ * nfs readdir service
+ * - mallocs what it thinks is enough to read
+ * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
+ * - calls VOP_READDIR()
+ * - loops around building the reply
+ * if the output generated exceeds count break out of loop
+ * The nfsm_clget macro is used here so that the reply will be packed
+ * tightly in mbuf clusters.
+ * - it only knows that it has encountered eof when the VOP_READDIR()
+ * reads nothing
+ * - as such one readdir rpc will return eof false although you are there
+ * and then the next will return eof
+ * - it trims out records with d_fileno == 0
+ * this doesn't matter for Unix clients, but they might confuse clients
+ * for other os'.
+ * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
+ * than requested, but this may not apply to all filesystems. For
+ * example, client NFS does not { although it is never remote mounted
+ * anyhow }
+ * The alternate call nfsrv_readdirplus() does lookups as well.
+ * PS: The NFS protocol spec. does not clarify what the "count" byte
+ * argument is a count of.. just name strings and file id's or the
+ * entire reply rpc or ...
+ * I tried just file name and id sizes and it confused the Sun client,
+ * so I am using the full rpc size now. The "paranoia.." comment refers
+ * to including the status longwords that are not a part of the dir.
+ * "entry" structures, but are in the rpc.
+ */
+struct flrep {
+ nfsuint64 fl_off;
+ u_int32_t fl_postopok;
+ u_int32_t fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
+ u_int32_t fl_fhok;
+ u_int32_t fl_fhsize;
+ u_int32_t fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
+};
+
+int
+nfsrv_readdir(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp = NULL;
+ struct vattr at;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ struct uio io;
+ struct iovec iv;
+ int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ u_quad_t frev, off, toff, verf;
+ u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (v3) {
+ nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+ toff = fxdr_hyper(tl);
+ tl += 2;
+ verf = fxdr_hyper(tl);
+ tl += 2;
+ } else {
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ toff = fxdr_unsigned(u_quad_t, *tl++);
+ verf = 0; /* shut up gcc */
+ }
+ off = toff;
+ cnt = fxdr_unsigned(int, *tl);
+ siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
+ xfer = NFS_SRVMAXDATA(nfsd);
+ if (siz > xfer)
+ siz = xfer;
+ fullsiz = siz;
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (!error && vp->v_type != VDIR) {
+ error = ENOTDIR;
+ vput(vp);
+ vp = NULL;
+ }
+ if (error) {
+ nfsm_reply(NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+
+ /*
+ * Obtain lock on vnode for this section of the code
+ */
+
+ nqsrv_getl(vp, ND_READ);
+ if (v3) {
+ error = getret = VOP_GETATTR(vp, &at, cred, procp);
+#if 0
+ /*
+ * XXX This check may be too strict for Solaris 2.5 clients.
+ */
+ if (!error && toff && verf && verf != at.va_filerev)
+ error = NFSERR_BAD_COOKIE;
+#endif
+ }
+ if (!error)
+ error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0);
+ if (error) {
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_POSTOPATTR(v3));
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ VOP_UNLOCK(vp, 0, procp);
+
+ /*
+ * end section. Allocate rbuf and continue
+ */
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ eofflag = 0;
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+ if (cookies) {
+ free((caddr_t)cookies, M_TEMP);
+ cookies = NULL;
+ }
+ error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
+ off = (off_t)io.uio_offset;
+ if (!cookies && !error)
+ error = NFSERR_PERM;
+ if (v3) {
+ getret = VOP_GETATTR(vp, &at, cred, procp);
+ if (!error)
+ error = getret;
+ }
+ VOP_UNLOCK(vp, 0, procp);
+ if (error) {
+ vrele(vp);
+ vp = NULL;
+ free((caddr_t)rbuf, M_TEMP);
+ if (cookies)
+ free((caddr_t)cookies, M_TEMP);
+ nfsm_reply(NFSX_POSTOPATTR(v3));
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
+ 2 * NFSX_UNSIGNED);
+ if (v3) {
+ nfsm_srvpostop_attr(getret, &at);
+ nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ txdr_hyper(at.va_filerev, tl);
+ tl += 2;
+ } else
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)rbuf, M_TEMP);
+ FREE((caddr_t)cookies, M_TEMP);
+ error = 0;
+ goto nfsmout;
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ cookiep = cookies;
+ /*
+ * For some reason FreeBSD's ufs_readdir() chooses to back the
+ * directory offset up to a block boundary, so it is necessary to
+ * skip over the records that preceed the requested offset. This
+ * requires the assumption that file offset cookies monotonically
+ * increase.
+ */
+ while (cpos < cend && ncookies > 0 &&
+ (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
+ ((u_quad_t)(*cookiep)) <= toff)) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ cookiep++;
+ ncookies--;
+ }
+ if (cpos >= cend || ncookies == 0) {
+ toff = off;
+ siz = fullsiz;
+ goto again;
+ }
+
+ len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */
+ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
+ if (v3) {
+ nfsm_srvpostop_attr(getret, &at);
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ txdr_hyper(at.va_filerev, tl);
+ }
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend && ncookies > 0) {
+ if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen) - nlen;
+ len += (4 * NFSX_UNSIGNED + nlen + rem);
+ if (v3)
+ len += 2 * NFSX_UNSIGNED;
+ if (len > cnt) {
+ eofflag = 0;
+ break;
+ }
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+ if (v3) {
+ nfsm_clget;
+ *tl = 0;
+ bp += NFSX_UNSIGNED;
+ }
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a int32_t boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+ nfsm_clget;
+
+ /* Finish off the record */
+ if (v3) {
+ *tl = 0;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ }
+ *tl = txdr_unsigned(*cookiep);
+ bp += NFSX_UNSIGNED;
+ }
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ cookiep++;
+ ncookies--;
+ }
+ vrele(vp);
+ vp = NULL;
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE((caddr_t)rbuf, M_TEMP);
+ FREE((caddr_t)cookies, M_TEMP);
+
+nfsmout:
+ if (vp)
+ vrele(vp);
+ return(error);
+}
+
+int
+nfsrv_readdirplus(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp = NULL, *nvp;
+ struct flrep fl;
+ nfsfh_t nfh;
+ fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
+ struct uio io;
+ struct iovec iv;
+ struct vattr va, at, *vap = &va;
+ struct nfs_fattr *fp;
+ int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies;
+ u_quad_t frev, off, toff, verf;
+ u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
+ toff = fxdr_hyper(tl);
+ tl += 2;
+ verf = fxdr_hyper(tl);
+ tl += 2;
+ siz = fxdr_unsigned(int, *tl++);
+ cnt = fxdr_unsigned(int, *tl);
+ off = toff;
+ siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
+ xfer = NFS_SRVMAXDATA(nfsd);
+ if (siz > xfer)
+ siz = xfer;
+ fullsiz = siz;
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (!error && vp->v_type != VDIR) {
+ error = ENOTDIR;
+ vput(vp);
+ vp = NULL;
+ }
+ if (error) {
+ nfsm_reply(NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ error = getret = VOP_GETATTR(vp, &at, cred, procp);
+#if 0
+ /*
+ * XXX This check may be too strict for Solaris 2.5 clients.
+ */
+ if (!error && toff && verf && verf != at.va_filerev)
+ error = NFSERR_BAD_COOKIE;
+#endif
+ if (!error) {
+ nqsrv_getl(vp, ND_READ);
+ error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0);
+ }
+ if (error) {
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_V3POSTOPATTR);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ VOP_UNLOCK(vp, 0, procp);
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ eofflag = 0;
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+ if (cookies) {
+ free((caddr_t)cookies, M_TEMP);
+ cookies = NULL;
+ }
+ error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
+ off = (u_quad_t)io.uio_offset;
+ getret = VOP_GETATTR(vp, &at, cred, procp);
+ VOP_UNLOCK(vp, 0, procp);
+ if (!cookies && !error)
+ error = NFSERR_PERM;
+ if (!error)
+ error = getret;
+ if (error) {
+ vrele(vp);
+ vp = NULL;
+ if (cookies)
+ free((caddr_t)cookies, M_TEMP);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(NFSX_V3POSTOPATTR);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
+ 2 * NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(getret, &at);
+ nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ txdr_hyper(at.va_filerev, tl);
+ tl += 2;
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)cookies, M_TEMP);
+ FREE((caddr_t)rbuf, M_TEMP);
+ error = 0;
+ goto nfsmout;
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ cookiep = cookies;
+ /*
+ * For some reason FreeBSD's ufs_readdir() chooses to back the
+ * directory offset up to a block boundary, so it is necessary to
+ * skip over the records that preceed the requested offset. This
+ * requires the assumption that file offset cookies monotonically
+ * increase.
+ */
+ while (cpos < cend && ncookies > 0 &&
+ (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
+ ((u_quad_t)(*cookiep)) <= toff)) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ cookiep++;
+ ncookies--;
+ }
+ if (cpos >= cend || ncookies == 0) {
+ toff = off;
+ siz = fullsiz;
+ goto again;
+ }
+
+ /*
+ * Probe one of the directory entries to see if the filesystem
+ * supports VGET.
+ */
+ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp) == EOPNOTSUPP) {
+ error = NFSERR_NOTSUPP;
+ vrele(vp);
+ vp = NULL;
+ free((caddr_t)cookies, M_TEMP);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(NFSX_V3POSTOPATTR);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ vput(nvp);
+ nvp = NULL;
+
+ dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED;
+ nfsm_reply(cnt);
+ nfsm_srvpostop_attr(getret, &at);
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ txdr_hyper(at.va_filerev, tl);
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend && ncookies > 0) {
+ if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen)-nlen;
+
+ /*
+ * For readdir_and_lookup get the vnode using
+ * the file number.
+ */
+ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
+ goto invalid;
+ bzero((caddr_t)nfhp, NFSX_V3FH);
+ nfhp->fh_fsid =
+ nvp->v_mount->mnt_stat.f_fsid;
+ if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
+ vput(nvp);
+ nvp = NULL;
+ goto invalid;
+ }
+ if (VOP_GETATTR(nvp, vap, cred, procp)) {
+ vput(nvp);
+ nvp = NULL;
+ goto invalid;
+ }
+ vput(nvp);
+ nvp = NULL;
+
+ /*
+ * If either the dircount or maxcount will be
+ * exceeded, get out now. Both of these lengths
+ * are calculated conservatively, including all
+ * XDR overheads.
+ */
+ len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
+ NFSX_V3POSTOPATTR);
+ dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
+ if (len > cnt || dirlen > fullsiz) {
+ eofflag = 0;
+ break;
+ }
+
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ fp = (struct nfs_fattr *)&fl.fl_fattr;
+ nfsm_srvfillattr(vap, fp);
+ fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
+ fl.fl_fhok = nfs_true;
+ fl.fl_postopok = nfs_true;
+ fl.fl_off.nfsuquad[0] = 0;
+ fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
+
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = 0;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp + xfer) > be)
+ tsiz = be - bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a int32_t boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+
+ /*
+ * Now copy the flrep structure out.
+ */
+ xfer = sizeof (struct flrep);
+ cp = (caddr_t)&fl;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp + xfer) > be)
+ tsiz = be - bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ }
+invalid:
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ cookiep++;
+ ncookies--;
+ }
+ vrele(vp);
+ vp = NULL;
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE((caddr_t)cookies, M_TEMP);
+ FREE((caddr_t)rbuf, M_TEMP);
+nfsmout:
+ if (vp)
+ vrele(vp);
+ return(error);
+}
+
+/*
+ * nfs commit service
+ */
+int
+nfsrv_commit(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ struct vattr bfor, aft;
+ struct vnode *vp = NULL;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev, off;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ cache = 0;
+#endif
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+
+ /*
+ * XXX At this time VOP_FSYNC() does not accept offset and byte
+ * count parameters, so these arguments are useless (someday maybe).
+ */
+ off = fxdr_hyper(tl);
+ tl += 2;
+ cnt = fxdr_unsigned(int, *tl);
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
+ error = 0;
+ goto nfsmout;
+ }
+ for_ret = VOP_GETATTR(vp, &bfor, cred, procp);
+
+ if (cnt > MAX_COMMIT_COUNT) {
+ /*
+ * Give up and do the whole thing
+ */
+ if (vp->v_object &&
+ (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
+ vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
+ }
+ error = VOP_FSYNC(vp, cred, MNT_WAIT, procp);
+ } else {
+ /*
+ * Locate and synchronously write any buffers that fall
+ * into the requested range. Note: we are assuming that
+ * f_iosize is a power of 2.
+ */
+ int iosize = vp->v_mount->mnt_stat.f_iosize;
+ int iomask = iosize - 1;
+ int s;
+ daddr_t lblkno;
+
+ /*
+ * Align to iosize boundry, super-align to page boundry.
+ */
+ if (off & iomask) {
+ cnt += off & iomask;
+ off &= ~(u_quad_t)iomask;
+ }
+ if (off & PAGE_MASK) {
+ cnt += off & PAGE_MASK;
+ off &= ~(u_quad_t)PAGE_MASK;
+ }
+ lblkno = off / iosize;
+
+ if (vp->v_object &&
+ (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
+ vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
+ }
+
+ s = splbio();
+ while (cnt > 0) {
+ struct buf *bp;
+
+ /*
+ * If we have a buffer and it is marked B_DELWRI we
+ * have to lock and write it. Otherwise the prior
+ * write is assumed to have already been committed.
+ */
+ if ((bp = gbincore(vp, lblkno)) != NULL && (bp->b_flags & B_DELWRI)) {
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
+ BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL);
+ continue; /* retry */
+ }
+ bremfree(bp);
+ bp->b_flags &= ~B_ASYNC;
+ VOP_BWRITE(bp->b_vp, bp);
+ ++nfs_commit_miss;
+ }
+ ++nfs_commit_blks;
+ if (cnt < iosize)
+ break;
+ cnt -= iosize;
+ ++lblkno;
+ }
+ splx(s);
+ }
+
+ aft_ret = VOP_GETATTR(vp, &aft, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
+ nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
+ if (!error) {
+ nfsm_build(tl, u_int32_t *, NFSX_V3WRITEVERF);
+ if (nfsver.tv_sec == 0)
+ nfsver = boottime;
+ *tl++ = txdr_unsigned(nfsver.tv_sec);
+ *tl = txdr_unsigned(nfsver.tv_usec);
+ } else {
+ error = 0;
+ }
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs statfs service
+ */
+int
+nfsrv_statfs(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register struct statfs *sf;
+ register struct nfs_statfs *sfp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, getret = 1;
+ int v3 = (nfsd->nd_flag & ND_NFSV3);
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp = NULL;
+ struct vattr at;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ struct statfs statfs;
+ u_quad_t frev, tval;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ cache = 0;
+#endif
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ sf = &statfs;
+ error = VFS_STATFS(vp->v_mount, sf, procp);
+ getret = VOP_GETATTR(vp, &at, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
+ if (v3)
+ nfsm_srvpostop_attr(getret, &at);
+ if (error) {
+ error = 0;
+ goto nfsmout;
+ }
+ nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
+ if (v3) {
+ tval = (u_quad_t)sf->f_blocks;
+ tval *= (u_quad_t)sf->f_bsize;
+ txdr_hyper(tval, &sfp->sf_tbytes);
+ tval = (u_quad_t)sf->f_bfree;
+ tval *= (u_quad_t)sf->f_bsize;
+ txdr_hyper(tval, &sfp->sf_fbytes);
+ tval = (u_quad_t)sf->f_bavail;
+ tval *= (u_quad_t)sf->f_bsize;
+ txdr_hyper(tval, &sfp->sf_abytes);
+ sfp->sf_tfiles.nfsuquad[0] = 0;
+ sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
+ sfp->sf_ffiles.nfsuquad[0] = 0;
+ sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
+ sfp->sf_afiles.nfsuquad[0] = 0;
+ sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
+ sfp->sf_invarsec = 0;
+ } else {
+ sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
+ sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
+ sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
+ sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
+ sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
+ }
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs fsinfo service
+ */
+int
+nfsrv_fsinfo(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register u_int32_t *tl;
+ register struct nfsv3_fsinfo *sip;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, getret = 1, pref;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp = NULL;
+ struct vattr at;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev, maxfsize;
+ struct statfs sb;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ cache = 0;
+#endif
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+
+ /* XXX Try to make a guess on the max file size. */
+ VFS_STATFS(vp->v_mount, &sb, procp);
+ maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
+
+ getret = VOP_GETATTR(vp, &at, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
+ nfsm_srvpostop_attr(getret, &at);
+ nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
+
+ /*
+ * XXX
+ * There should be file system VFS OP(s) to get this information.
+ * For now, assume ufs.
+ */
+ if (slp->ns_so->so_type == SOCK_DGRAM)
+ pref = NFS_MAXDGRAMDATA;
+ else
+ pref = NFS_MAXDATA;
+ sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
+ sip->fs_rtpref = txdr_unsigned(pref);
+ sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
+ sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
+ sip->fs_wtpref = txdr_unsigned(pref);
+ sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
+ sip->fs_dtpref = txdr_unsigned(pref);
+ txdr_hyper(maxfsize, &sip->fs_maxfilesize);
+ sip->fs_timedelta.nfsv3_sec = 0;
+ sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
+ sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
+ NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
+ NFSV3FSINFO_CANSETTIME);
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * nfs pathconf service
+ */
+int
+nfsrv_pathconf(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
+ struct sockaddr *nam = nfsd->nd_nam;
+ caddr_t dpos = nfsd->nd_dpos;
+ struct ucred *cred = &nfsd->nd_cr;
+ register u_int32_t *tl;
+ register struct nfsv3_pathconf *pc;
+ register int32_t t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, getret = 1;
+ register_t linkmax, namemax, chownres, notrunc;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp = NULL;
+ struct vattr at;
+ nfsfh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ cache = 0;
+#endif
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
+ &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
+ if (error) {
+ nfsm_reply(NFSX_UNSIGNED);
+ nfsm_srvpostop_attr(getret, &at);
+ error = 0;
+ goto nfsmout;
+ }
+ error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
+ if (!error)
+ error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
+ if (!error)
+ error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
+ if (!error)
+ error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
+ getret = VOP_GETATTR(vp, &at, cred, procp);
+ vput(vp);
+ vp = NULL;
+ nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
+ nfsm_srvpostop_attr(getret, &at);
+ if (error) {
+ error = 0;
+ goto nfsmout;
+ }
+ nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF);
+
+ pc->pc_linkmax = txdr_unsigned(linkmax);
+ pc->pc_namemax = txdr_unsigned(namemax);
+ pc->pc_notrunc = txdr_unsigned(notrunc);
+ pc->pc_chownrestricted = txdr_unsigned(chownres);
+
+ /*
+ * These should probably be supported by VOP_PATHCONF(), but
+ * until msdosfs is exportable (why would you want to?), the
+ * Unix defaults should be ok.
+ */
+ pc->pc_caseinsensitive = nfs_false;
+ pc->pc_casepreserving = nfs_true;
+nfsmout:
+ if (vp)
+ vput(vp);
+ return(error);
+}
+
+/*
+ * Null operation, used by clients to ping server
+ */
+/* ARGSUSED */
+int
+nfsrv_null(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep;
+ caddr_t bpos;
+ int error = NFSERR_RETVOID, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ cache = 0;
+#endif
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * No operation, used for obsolete procedures
+ */
+/* ARGSUSED */
+int
+nfsrv_noop(nfsd, slp, procp, mrq)
+ struct nfsrv_descript *nfsd;
+ struct nfssvc_sock *slp;
+ struct proc *procp;
+ struct mbuf **mrq;
+{
+ struct mbuf *mrep = nfsd->nd_mrep;
+ caddr_t bpos;
+ int error, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+#ifndef nolint
+ cache = 0;
+#endif
+ if (nfsd->nd_repstat)
+ error = nfsd->nd_repstat;
+ else
+ error = EPROCUNAVAIL;
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * Perform access checking for vnodes obtained from file handles that would
+ * refer to files already opened by a Unix client. You cannot just use
+ * vn_writechk() and VOP_ACCESS() for two reasons.
+ * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
+ * 2 - The owner is to be given access irrespective of mode bits for some
+ * operations, so that processes that chmod after opening a file don't
+ * break. I don't like this because it opens a security hole, but since
+ * the nfs server opens a security hole the size of a barn door anyhow,
+ * what the heck.
+ *
+ * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
+ * will return EPERM instead of EACCESS. EPERM is always an error.
+ */
+static int
+nfsrv_access(vp, flags, cred, rdonly, p, override)
+ register struct vnode *vp;
+ int flags;
+ register struct ucred *cred;
+ int rdonly;
+ struct proc *p;
+ int override;
+{
+ struct vattr vattr;
+ int error;
+
+ nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
+ if (flags & VWRITE) {
+ /* Just vn_writechk() changed to check rdonly */
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket or a block or character
+ * device resident on the file system.
+ */
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ switch (vp->v_type) {
+ case VREG:
+ case VDIR:
+ case VLNK:
+ return (EROFS);
+ default:
+ break;
+ }
+ }
+ /*
+ * If there's shared text associated with
+ * the inode, we can't allow writing.
+ */
+ if (vp->v_flag & VTEXT)
+ return (ETXTBSY);
+ }
+ error = VOP_GETATTR(vp, &vattr, cred, p);
+ if (error)
+ return (error);
+ error = VOP_ACCESS(vp, flags, cred, p);
+ /*
+ * Allow certain operations for the owner (reads and writes
+ * on files that are already open).
+ */
+ if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
+ error = 0;
+ return error;
+}
+#endif /* NFS_NOSERVER */
+
diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c
new file mode 100644
index 0000000..41fd303
--- /dev/null
+++ b/sys/nfs/nfs_socket.c
@@ -0,0 +1,2284 @@
+/*
+ * Copyright (c) 1989, 1991, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+#include <sys/sysctl.h>
+#include <sys/signalvar.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write - A+4D
+ * other - nm_timeo
+ */
+#define NFS_RTO(n, t) \
+ ((t) == 0 ? (n)->nm_timeo : \
+ ((t) < 3 ? \
+ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
+ rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
+ rpc_auth_kerb;
+extern u_int32_t nfs_prog, nqnfs_prog;
+extern time_t nqnfsstarttime;
+extern struct nfsstats nfsstats;
+extern int nfsv3_procid[NFS_NPROCS];
+extern int nfs_ticks;
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+ 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
+ 0, 0, 0,
+};
+
+static int nfs_realign_test;
+static int nfs_realign_count;
+
+SYSCTL_DECL(_vfs_nfs);
+
+SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RD, &nfs_realign_test, 0, "");
+SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RD, &nfs_realign_count, 0, "");
+
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define NFS_CWNDSCALE 256
+#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct callout_handle nfs_timer_handle;
+
+static int nfs_msg __P((struct proc *,char *,char *));
+static int nfs_rcvlock __P((struct nfsreq *));
+static void nfs_rcvunlock __P((struct nfsreq *));
+static void nfs_realign __P((struct mbuf **pm, int hsiz));
+static int nfs_receive __P((struct nfsreq *rep, struct sockaddr **aname,
+ struct mbuf **mp));
+static void nfs_softterm __P((struct nfsreq *rep));
+static int nfs_reconnect __P((struct nfsreq *rep));
+#ifndef NFS_NOSERVER
+static int nfsrv_getstream __P((struct nfssvc_sock *,int));
+
+int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
+ struct nfssvc_sock *slp,
+ struct proc *procp,
+ struct mbuf **mreqp)) = {
+ nfsrv_null,
+ nfsrv_getattr,
+ nfsrv_setattr,
+ nfsrv_lookup,
+ nfsrv3_access,
+ nfsrv_readlink,
+ nfsrv_read,
+ nfsrv_write,
+ nfsrv_create,
+ nfsrv_mkdir,
+ nfsrv_symlink,
+ nfsrv_mknod,
+ nfsrv_remove,
+ nfsrv_rmdir,
+ nfsrv_rename,
+ nfsrv_link,
+ nfsrv_readdir,
+ nfsrv_readdirplus,
+ nfsrv_statfs,
+ nfsrv_fsinfo,
+ nfsrv_pathconf,
+ nfsrv_commit,
+ nqnfsrv_getlease,
+ nqnfsrv_vacated,
+ nfsrv_noop,
+ nfsrv_noop
+};
+#endif /* NFS_NOSERVER */
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+int
+nfs_connect(nmp, rep)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+{
+ register struct socket *so;
+ int s, error, rcvreserve, sndreserve;
+ struct sockaddr *saddr;
+ struct sockaddr_in *sin;
+ struct proc *p = &proc0; /* only used for socreate and sobind */
+
+ nmp->nm_so = (struct socket *)0;
+ saddr = nmp->nm_nam;
+ error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
+ nmp->nm_soproto, p);
+ if (error)
+ goto bad;
+ so = nmp->nm_so;
+ nmp->nm_soflags = so->so_proto->pr_flags;
+
+ /*
+ * Some servers require that the client port be a reserved port number.
+ */
+ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+ struct sockopt sopt;
+ int ip;
+ struct sockaddr_in ssin;
+
+ bzero(&sopt, sizeof sopt);
+ ip = IP_PORTRANGE_LOW;
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_PORTRANGE;
+ sopt.sopt_val = (void *)&ip;
+ sopt.sopt_valsize = sizeof(ip);
+ sopt.sopt_p = NULL;
+ error = sosetopt(so, &sopt);
+ if (error)
+ goto bad;
+ bzero(&ssin, sizeof ssin);
+ sin = &ssin;
+ sin->sin_len = sizeof (struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ sin->sin_port = htons(0);
+ error = sobind(so, (struct sockaddr *)sin, p);
+ if (error)
+ goto bad;
+ bzero(&sopt, sizeof sopt);
+ ip = IP_PORTRANGE_DEFAULT;
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_PORTRANGE;
+ sopt.sopt_val = (void *)&ip;
+ sopt.sopt_valsize = sizeof(ip);
+ sopt.sopt_p = NULL;
+ error = sosetopt(so, &sopt);
+ if (error)
+ goto bad;
+ }
+
+ /*
+ * Protocols that do not require connections may be optionally left
+ * unconnected for servers that reply from a port other than NFS_PORT.
+ */
+ if (nmp->nm_flag & NFSMNT_NOCONN) {
+ if (nmp->nm_soflags & PR_CONNREQUIRED) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ } else {
+ error = soconnect(so, nmp->nm_nam, p);
+ if (error)
+ goto bad;
+
+ /*
+ * Wait for the connection to complete. Cribbed from the
+ * connect system call but with the wait timing out so
+ * that interruptible mounts don't hang here for a long time.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+ "nfscon", 2 * hz);
+ if ((so->so_state & SS_ISCONNECTING) &&
+ so->so_error == 0 && rep &&
+ (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
+ so->so_state &= ~SS_ISCONNECTING;
+ splx(s);
+ goto bad;
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto bad;
+ }
+ splx(s);
+ }
+ if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+ so->so_rcv.sb_timeo = (5 * hz);
+ so->so_snd.sb_timeo = (5 * hz);
+ } else {
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ }
+ if (nmp->nm_sotype == SOCK_DGRAM) {
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+ rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
+ NFS_MAXPKTHDR) * 2;
+ } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+ rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
+ NFS_MAXPKTHDR) * 2;
+ } else {
+ if (nmp->nm_sotype != SOCK_STREAM)
+ panic("nfscon sotype");
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ struct sockopt sopt;
+ int val;
+
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_KEEPALIVE;
+ sopt.sopt_val = &val;
+ sopt.sopt_valsize = sizeof val;
+ val = 1;
+ sosetopt(so, &sopt);
+ }
+ if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ struct sockopt sopt;
+ int val;
+
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = &val;
+ sopt.sopt_valsize = sizeof val;
+ val = 1;
+ sosetopt(so, &sopt);
+ }
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
+ sizeof (u_int32_t)) * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
+ sizeof (u_int32_t)) * 2;
+ }
+ error = soreserve(so, sndreserve, rcvreserve);
+ if (error)
+ goto bad;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+ /* Initialize other non-zero congestion variables */
+ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
+ nmp->nm_srtt[3] = (NFS_TIMEO << 3);
+ nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+ nmp->nm_sdrtt[3] = 0;
+ nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
+ nmp->nm_sent = 0;
+ nmp->nm_timeouts = 0;
+ return (0);
+
+bad:
+ nfs_disconnect(nmp);
+ return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+static int
+nfs_reconnect(rep)
+ register struct nfsreq *rep;
+{
+ register struct nfsreq *rp;
+ register struct nfsmount *nmp = rep->r_nmp;
+ int error;
+
+ nfs_disconnect(nmp);
+ while ((error = nfs_connect(nmp, rep)) != 0) {
+ if (error == EINTR || error == ERESTART)
+ return (EINTR);
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+ }
+
+ /*
+ * Loop through outstanding request list and fix up all requests
+ * on old socket.
+ */
+ for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
+ if (rp->r_nmp == nmp)
+ rp->r_flags |= R_MUSTRESEND;
+ }
+ return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+ register struct nfsmount *nmp;
+{
+ register struct socket *so;
+
+ if (nmp->nm_so) {
+ so = nmp->nm_so;
+ nmp->nm_so = (struct socket *)0;
+ soshutdown(so, 2);
+ soclose(so);
+ }
+}
+
+void
+nfs_safedisconnect(nmp)
+ struct nfsmount *nmp;
+{
+ struct nfsreq dummyreq;
+
+ bzero(&dummyreq, sizeof(dummyreq));
+ dummyreq.r_nmp = nmp;
+ nfs_rcvlock(&dummyreq);
+ nfs_disconnect(nmp);
+ nfs_rcvunlock(&dummyreq);
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (?)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (?)
+ */
+int
+nfs_send(so, nam, top, rep)
+ register struct socket *so;
+ struct sockaddr *nam;
+ register struct mbuf *top;
+ struct nfsreq *rep;
+{
+ struct sockaddr *sendnam;
+ int error, soflags, flags;
+
+ if (rep) {
+ if (rep->r_flags & R_SOFTTERM) {
+ m_freem(top);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ rep->r_flags |= R_MUSTRESEND;
+ m_freem(top);
+ return (0);
+ }
+ rep->r_flags &= ~R_MUSTRESEND;
+ soflags = rep->r_nmp->nm_soflags;
+ } else
+ soflags = so->so_proto->pr_flags;
+ if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+ sendnam = (struct sockaddr *)0;
+ else
+ sendnam = nam;
+ if (so->so_type == SOCK_SEQPACKET)
+ flags = MSG_EOR;
+ else
+ flags = 0;
+
+ error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0,
+ flags, curproc /*XXX*/);
+ /*
+ * ENOBUFS for dgram sockets is transient and non fatal.
+ * No need to log, and no need to break a soft mount.
+ */
+ if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
+ error = 0;
+ if (rep) /* do backoff retransmit on client */
+ rep->r_flags |= R_MUSTRESEND;
+ }
+
+ if (error) {
+ if (rep) {
+ log(LOG_INFO, "nfs send error %d for server %s\n",error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ /*
+ * Deal with errors for the client side.
+ */
+ if (rep->r_flags & R_SOFTTERM)
+ error = EINTR;
+ else
+ rep->r_flags |= R_MUSTRESEND;
+ } else
+ log(LOG_INFO, "nfsd send error %d\n", error);
+
+ /*
+ * Handle any recoverable (soft) socket errors here. (?)
+ */
+ if (error != EINTR && error != ERESTART &&
+ error != EWOULDBLOCK && error != EPIPE)
+ error = 0;
+ }
+ return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ * small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+static int
+nfs_receive(rep, aname, mp)
+ register struct nfsreq *rep;
+ struct sockaddr **aname;
+ struct mbuf **mp;
+{
+ register struct socket *so;
+ struct uio auio;
+ struct iovec aio;
+ register struct mbuf *m;
+ struct mbuf *control;
+ u_int32_t len;
+ struct sockaddr **getnam;
+ int error, sotype, rcvflg;
+ struct proc *p = curproc; /* XXX */
+
+ /*
+ * Set up arguments for soreceive()
+ */
+ *mp = (struct mbuf *)0;
+ *aname = (struct sockaddr *)0;
+ sotype = rep->r_nmp->nm_sotype;
+
+ /*
+ * For reliable protocols, lock against other senders/receivers
+ * in case a reconnect is necessary.
+ * For SOCK_STREAM, first get the Record Mark to find out how much
+ * more there is to get.
+ * We must lock the socket against other receivers
+ * until we have an entire rpc request/reply.
+ */
+ if (sotype != SOCK_DGRAM) {
+ error = nfs_sndlock(rep);
+ if (error)
+ return (error);
+tryagain:
+ /*
+ * Check for fatal errors and resending request.
+ */
+ /*
+ * Ugh: If a reconnect attempt just happened, nm_so
+ * would have changed. NULL indicates a failed
+ * attempt that has essentially shut down this
+ * mount point.
+ */
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+ nfs_sndunlock(rep);
+ return (EINTR);
+ }
+ so = rep->r_nmp->nm_so;
+ if (!so) {
+ error = nfs_reconnect(rep);
+ if (error) {
+ nfs_sndunlock(rep);
+ return (error);
+ }
+ goto tryagain;
+ }
+ while (rep->r_flags & R_MUSTRESEND) {
+ m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+ nfsstats.rpcretries++;
+ error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
+ if (error) {
+ if (error == EINTR || error == ERESTART ||
+ (error = nfs_reconnect(rep)) != 0) {
+ nfs_sndunlock(rep);
+ return (error);
+ }
+ goto tryagain;
+ }
+ }
+ nfs_sndunlock(rep);
+ if (sotype == SOCK_STREAM) {
+ aio.iov_base = (caddr_t) &len;
+ aio.iov_len = sizeof(u_int32_t);
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(u_int32_t);
+ auio.uio_procp = p;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, (struct sockaddr **)0, &auio,
+ (struct mbuf **)0, (struct mbuf **)0,
+ &rcvflg);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK);
+ if (!error && auio.uio_resid > 0) {
+ /*
+ * Don't log a 0 byte receive; it means
+ * that the socket has been closed, and
+ * can happen during normal operation
+ * (forcible unmount or Solaris server).
+ */
+ if (auio.uio_resid != sizeof (u_int32_t))
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ (int)(sizeof(u_int32_t) - auio.uio_resid),
+ (int)sizeof(u_int32_t),
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ if (error)
+ goto errout;
+ len = ntohl(len) & ~0x80000000;
+ /*
+ * This is SERIOUS! We are out of sync with the sender
+ * and forcing a disconnect/reconnect is all I can do.
+ */
+ if (len > NFS_MAXPACKET) {
+ log(LOG_ERR, "%s (%d) from nfs server %s\n",
+ "impossible packet length",
+ len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EFBIG;
+ goto errout;
+ }
+ auio.uio_resid = len;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, (struct sockaddr **)0,
+ &auio, mp, (struct mbuf **)0, &rcvflg);
+ } while (error == EWOULDBLOCK || error == EINTR ||
+ error == ERESTART);
+ if (!error && auio.uio_resid > 0) {
+ if (len != auio.uio_resid)
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ len - auio.uio_resid, len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ } else {
+ /*
+ * NB: Since uio_resid is big, MSG_WAITALL is ignored
+ * and soreceive() will return when it has either a
+ * control msg or a data msg.
+ * We have no use for control msg., but must grab them
+ * and then throw them away so we know what is going
+ * on.
+ */
+ auio.uio_resid = len = 100000000; /* Anything Big */
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, (struct sockaddr **)0,
+ &auio, mp, &control, &rcvflg);
+ if (control)
+ m_freem(control);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK ||
+ (!error && *mp == NULL && control));
+ if ((rcvflg & MSG_EOR) == 0)
+ printf("Egad!!\n");
+ if (!error && *mp == NULL)
+ error = EPIPE;
+ len -= auio.uio_resid;
+ }
+errout:
+ if (error && error != EINTR && error != ERESTART) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ if (error != EPIPE)
+ log(LOG_INFO,
+ "receive error %d from nfs server %s\n",
+ error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = nfs_sndlock(rep);
+ if (!error)
+ error = nfs_reconnect(rep);
+ if (!error)
+ goto tryagain;
+ else
+ nfs_sndunlock(rep);
+ }
+ } else {
+ if ((so = rep->r_nmp->nm_so) == NULL)
+ return (EACCES);
+ if (so->so_state & SS_ISCONNECTED)
+ getnam = (struct sockaddr **)0;
+ else
+ getnam = aname;
+ auio.uio_resid = len = 1000000;
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, getnam, &auio, mp,
+ (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK &&
+ (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ } while (error == EWOULDBLOCK);
+ len -= auio.uio_resid;
+ }
+ if (error) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ }
+ /*
+ * Search for any mbufs that are not a multiple of 4 bytes long
+ * or with m_data not longword aligned.
+ * These could cause pointer alignment problems, so copy them to
+ * well aligned mbufs.
+ */
+ nfs_realign(mp, 5 * NFSX_UNSIGNED);
+ return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+int
+nfs_reply(myrep)
+ struct nfsreq *myrep;
+{
+ register struct nfsreq *rep;
+ register struct nfsmount *nmp = myrep->r_nmp;
+ register int32_t t1;
+ struct mbuf *mrep, *md;
+ struct sockaddr *nam;
+ u_int32_t rxid, *tl;
+ caddr_t dpos, cp2;
+ int error;
+
+ /*
+ * Loop around until we get our own reply
+ */
+ for (;;) {
+ /*
+ * Lock against other receivers so that I don't get stuck in
+ * sbwait() after someone else has received my reply for me.
+ * Also necessary for connection based protocols to avoid
+ * race conditions during a reconnect.
+ * If nfs_rcvlock() returns EALREADY, that means that
+ * the reply has already been recieved by another
+ * process and we can return immediately. In this
+ * case, the lock is not taken to avoid races with
+ * other processes.
+ */
+ error = nfs_rcvlock(myrep);
+ if (error == EALREADY)
+ return (0);
+ if (error)
+ return (error);
+ /*
+ * Get the next Rpc reply off the socket
+ */
+ error = nfs_receive(myrep, &nam, &mrep);
+ nfs_rcvunlock(myrep);
+ if (error) {
+
+ /*
+ * Ignore routing errors on connectionless protocols??
+ */
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+ nmp->nm_so->so_error = 0;
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+ return (error);
+ }
+ if (nam)
+ FREE(nam, M_SONAME);
+
+ /*
+ * Get the xid and check that it is an rpc reply
+ */
+ md = mrep;
+ dpos = mtod(md, caddr_t);
+ nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
+ rxid = *tl++;
+ if (*tl != rpc_reply) {
+#ifndef NFS_NOSERVER
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (nqnfs_callback(nmp, mrep, md, dpos))
+ nfsstats.rpcinvalid++;
+ } else {
+ nfsstats.rpcinvalid++;
+ m_freem(mrep);
+ }
+#else
+ nfsstats.rpcinvalid++;
+ m_freem(mrep);
+#endif
+nfsmout:
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+
+ /*
+ * Loop through the request list to match up the reply
+ * Iff no match, just drop the datagram
+ */
+ for (rep = nfs_reqq.tqh_first; rep != 0;
+ rep = rep->r_chain.tqe_next) {
+ if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+ /* Found it.. */
+ rep->r_mrep = mrep;
+ rep->r_md = md;
+ rep->r_dpos = dpos;
+ if (nfsrtton) {
+ struct rttl *rt;
+
+ rt = &nfsrtt.rttl[nfsrtt.pos];
+ rt->proc = rep->r_procnum;
+ rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+ rt->sent = nmp->nm_sent;
+ rt->cwnd = nmp->nm_cwnd;
+ rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+ rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+ rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+ getmicrotime(&rt->tstamp);
+ if (rep->r_flags & R_TIMING)
+ rt->rtt = rep->r_rtt;
+ else
+ rt->rtt = 1000000;
+ nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+ }
+ /*
+ * Update congestion window.
+ * Do the additive increase of
+ * one rpc/rtt.
+ */
+ if (nmp->nm_cwnd <= nmp->nm_sent) {
+ nmp->nm_cwnd +=
+ (NFS_CWNDSCALE * NFS_CWNDSCALE +
+ (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+ if (nmp->nm_cwnd > NFS_MAXCWND)
+ nmp->nm_cwnd = NFS_MAXCWND;
+ }
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
+ /*
+ * Update rtt using a gain of 0.125 on the mean
+ * and a gain of 0.25 on the deviation.
+ */
+ if (rep->r_flags & R_TIMING) {
+ /*
+ * Since the timer resolution of
+ * NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since
+ * r_rtt == N means that the actual
+ * rtt is between N+dt and N+2-dt ticks,
+ * add 1.
+ */
+ t1 = rep->r_rtt + 1;
+ t1 -= (NFS_SRTT(rep) >> 3);
+ NFS_SRTT(rep) += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= (NFS_SDRTT(rep) >> 2);
+ NFS_SDRTT(rep) += t1;
+ }
+ nmp->nm_timeouts = 0;
+ break;
+ }
+ }
+ /*
+ * If not matched to a request, drop it.
+ * If it's mine, get out.
+ */
+ if (rep == 0) {
+ nfsstats.rpcunexpected++;
+ m_freem(mrep);
+ } else if (rep == myrep) {
+ if (rep->r_mrep == NULL)
+ panic("nfsreply nil");
+ return (0);
+ }
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ }
+}
+
+/*
+ * nfs_request - goes something like this
+ * - fill in request struct
+ * - links it into list
+ * - calls nfs_send() for first transmit
+ * - calls nfs_receive() to get reply
+ * - break down rpc header and return with nfs reply pointed to
+ * by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+int
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+ struct vnode *vp;
+ struct mbuf *mrest;
+ int procnum;
+ struct proc *procp;
+ struct ucred *cred;
+ struct mbuf **mrp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+{
+ register struct mbuf *mrep, *m2;
+ register struct nfsreq *rep;
+ register u_int32_t *tl;
+ register int i;
+ struct nfsmount *nmp;
+ struct mbuf *m, *md, *mheadend;
+ struct nfsnode *np;
+ char nickv[RPCX_NICKVERF];
+ time_t reqtime, waituntil;
+ caddr_t dpos, cp2;
+ int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+ int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+ int verf_len, verf_type;
+ u_int32_t xid;
+ u_quad_t frev;
+ char *auth_str, *verf_str;
+ NFSKERBKEY_T key; /* save session key */
+
+ nmp = VFSTONFS(vp->v_mount);
+ MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+ rep->r_nmp = nmp;
+ rep->r_vp = vp;
+ rep->r_procp = procp;
+ rep->r_procnum = procnum;
+ i = 0;
+ m = mrest;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ mrest_len = i;
+
+ /*
+ * Get the RPC header with authorization.
+ */
+kerbauth:
+ verf_str = auth_str = (char *)0;
+ if (nmp->nm_flag & NFSMNT_KERB) {
+ verf_str = nickv;
+ verf_len = sizeof (nickv);
+ auth_type = RPCAUTH_KERB4;
+ bzero((caddr_t)key, sizeof (key));
+ if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
+ &auth_len, verf_str, verf_len)) {
+ error = nfs_getauth(nmp, rep, cred, &auth_str,
+ &auth_len, verf_str, &verf_len, key);
+ if (error) {
+ free((caddr_t)rep, M_NFSREQ);
+ m_freem(mrest);
+ return (error);
+ }
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ if (cred->cr_ngroups < 1)
+ panic("nfsreq nogrps");
+ auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+ nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+ 5 * NFSX_UNSIGNED;
+ }
+ m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
+ auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
+ if (auth_str)
+ free(auth_str, M_TEMP);
+
+ /*
+ * For stream protocols, insert a Sun RPC Record Mark.
+ */
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_int32_t *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ rep->r_mreq = m;
+ rep->r_xid = xid;
+tryagain:
+ if (nmp->nm_flag & NFSMNT_SOFT)
+ rep->r_retry = nmp->nm_retry;
+ else
+ rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
+ rep->r_rtt = rep->r_rexmit = 0;
+ if (proct[procnum] > 0)
+ rep->r_flags = R_TIMING;
+ else
+ rep->r_flags = 0;
+ rep->r_mrep = NULL;
+
+ /*
+ * Do the client side RPC.
+ */
+ nfsstats.rpcrequests++;
+ /*
+ * Chain request into list of outstanding requests. Be sure
+ * to put it LAST so timer finds oldest requests first.
+ */
+ s = splsoftclock();
+ TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
+
+ /* Get send time for nqnfs */
+ reqtime = time_second;
+
+ /*
+ * If backing off another request or avoiding congestion, don't
+ * send this one now but let timer do it. If not timing a request,
+ * do it now.
+ */
+ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+ (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ nmp->nm_sent < nmp->nm_cwnd)) {
+ splx(s);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ error = nfs_sndlock(rep);
+ if (!error) {
+ m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
+ error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(rep);
+ }
+ if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+ nmp->nm_sent += NFS_CWNDSCALE;
+ rep->r_flags |= R_SENT;
+ }
+ } else {
+ splx(s);
+ rep->r_rtt = -1;
+ }
+
+ /*
+ * Wait for the reply from our send or the timer's.
+ */
+ if (!error || error == EPIPE)
+ error = nfs_reply(rep);
+
+ /*
+ * RPC done, unlink the request.
+ */
+ s = splsoftclock();
+ TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
+ splx(s);
+
+ /*
+ * Decrement the outstanding request count.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT; /* paranoia */
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
+
+ /*
+ * If there was a successful reply and a tprintf msg.
+ * tprintf a response.
+ */
+ if (!error && (rep->r_flags & R_TPRINTFMSG))
+ nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "is alive again");
+ mrep = rep->r_mrep;
+ md = rep->r_md;
+ dpos = rep->r_dpos;
+ if (error) {
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * break down the rpc header and check if ok
+ */
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ if (*tl++ == rpc_msgdenied) {
+ if (*tl == rpc_mismatch)
+ error = EOPNOTSUPP;
+ else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+ if (!failed_auth) {
+ failed_auth++;
+ mheadend->m_next = (struct mbuf *)0;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ goto kerbauth;
+ } else
+ error = EAUTH;
+ } else
+ error = EACCES;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * Grab any Kerberos verifier, otherwise just throw it away.
+ */
+ verf_type = fxdr_unsigned(int, *tl++);
+ i = fxdr_unsigned(int32_t, *tl);
+ if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
+ error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
+ if (error)
+ goto nfsmout;
+ } else if (i > 0)
+ nfsm_adv(nfsm_rndup(i));
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ /* 0 == ok */
+ if (*tl == 0) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ if (*tl != 0) {
+ error = fxdr_unsigned(int, *tl);
+ if ((nmp->nm_flag & NFSMNT_NFSV3) &&
+ error == NFSERR_TRYLATER) {
+ m_freem(mrep);
+ error = 0;
+ waituntil = time_second + trylater_delay;
+ while (time_second < waituntil)
+ (void) tsleep((caddr_t)&lbolt,
+ PSOCK, "nqnfstry", 0);
+ trylater_delay *= nfs_backoff[trylater_cnt];
+ if (trylater_cnt < 7)
+ trylater_cnt++;
+ goto tryagain;
+ }
+
+ /*
+ * If the File Handle was stale, invalidate the
+ * lookup cache, just in case.
+ */
+ if (error == ESTALE)
+ cache_purge(vp);
+ if (nmp->nm_flag & NFSMNT_NFSV3) {
+ *mrp = mrep;
+ *mdp = md;
+ *dposp = dpos;
+ error |= NFSERR_RETERR;
+ } else
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * For nqnfs, get any lease in reply
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ if (*tl) {
+ np = VTONFS(vp);
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_int32_t *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time_second) {
+ frev = fxdr_hyper(tl);
+ nqnfs_clientlease(nmp, np, nqlflag,
+ cachable, reqtime, frev);
+ }
+ }
+ }
+ *mrp = mrep;
+ *mdp = md;
+ *dposp = dpos;
+ m_freem(rep->r_mreq);
+ FREE((caddr_t)rep, M_NFSREQ);
+ return (0);
+ }
+ m_freem(mrep);
+ error = EPROTONOSUPPORT;
+nfsmout:
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+int
+nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
+ int siz;
+ struct nfsrv_descript *nd;
+ struct nfssvc_sock *slp;
+ int err;
+ int cache;
+ u_quad_t *frev;
+ struct mbuf **mrq;
+ struct mbuf **mbp;
+ caddr_t *bposp;
+{
+ register u_int32_t *tl;
+ register struct mbuf *mreq;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2;
+
+ MGETHDR(mreq, M_WAIT, MT_DATA);
+ mb = mreq;
+ /*
+ * If this is a big reply, use a cluster else
+ * try and leave leading space for the lower level headers.
+ */
+ siz += RPC_REPLYSIZ;
+ if (siz >= MINCLSIZE) {
+ MCLGET(mreq, M_WAIT);
+ } else
+ mreq->m_data += max_hdr;
+ tl = mtod(mreq, u_int32_t *);
+ mreq->m_len = 6 * NFSX_UNSIGNED;
+ bpos = ((caddr_t)tl) + mreq->m_len;
+ *tl++ = txdr_unsigned(nd->nd_retxid);
+ *tl++ = rpc_reply;
+ if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
+ *tl++ = rpc_msgdenied;
+ if (err & NFSERR_AUTHERR) {
+ *tl++ = rpc_autherr;
+ *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
+ mreq->m_len -= NFSX_UNSIGNED;
+ bpos -= NFSX_UNSIGNED;
+ } else {
+ *tl++ = rpc_mismatch;
+ *tl++ = txdr_unsigned(RPC_VER2);
+ *tl = txdr_unsigned(RPC_VER2);
+ }
+ } else {
+ *tl++ = rpc_msgaccepted;
+
+ /*
+ * For Kerberos authentication, we must send the nickname
+ * verifier back, otherwise just RPCAUTH_NULL.
+ */
+ if (nd->nd_flag & ND_KERBFULL) {
+ register struct nfsuid *nuidp;
+ struct timeval ktvin, ktvout;
+
+ for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
+ nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
+ if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
+ (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
+ &nuidp->nu_haddr, nd->nd_nam2)))
+ break;
+ }
+ if (nuidp) {
+ ktvin.tv_sec =
+ txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
+ ktvin.tv_usec =
+ txdr_unsigned(nuidp->nu_timestamp.tv_usec);
+
+ /*
+ * Encrypt the timestamp in ecb mode using the
+ * session key.
+ */
+#ifdef NFSKERB
+ XXX
+#endif
+
+ *tl++ = rpc_auth_kerb;
+ *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
+ *tl = ktvout.tv_sec;
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ *tl++ = ktvout.tv_usec;
+ *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
+ } else {
+ *tl++ = 0;
+ *tl++ = 0;
+ }
+ } else {
+ *tl++ = 0;
+ *tl++ = 0;
+ }
+ switch (err) {
+ case EPROGUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROGUNAVAIL);
+ break;
+ case EPROGMISMATCH:
+ *tl = txdr_unsigned(RPC_PROGMISMATCH);
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ if (nd->nd_flag & ND_NQNFS) {
+ *tl++ = txdr_unsigned(3);
+ *tl = txdr_unsigned(3);
+ } else {
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(3);
+ }
+ break;
+ case EPROCUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROCUNAVAIL);
+ break;
+ case EBADRPC:
+ *tl = txdr_unsigned(RPC_GARBAGE);
+ break;
+ default:
+ *tl = 0;
+ if (err != NFSERR_RETVOID) {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ if (err)
+ *tl = txdr_unsigned(nfsrv_errmap(nd, err));
+ else
+ *tl = 0;
+ }
+ break;
+ };
+ }
+
+ /*
+ * For nqnfs, piggyback lease as requested.
+ */
+ if ((nd->nd_flag & ND_NQNFS) && err == 0) {
+ if (nd->nd_flag & ND_LEASE) {
+ nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nd->nd_duration);
+ txdr_hyper(*frev, tl);
+ } else {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ if (mrq != NULL)
+ *mrq = mreq;
+ *mbp = mb;
+ *bposp = bpos;
+ if (err != 0 && err != NFSERR_RETVOID)
+ nfsstats.srvrpc_errs++;
+ return (0);
+}
+
+
+#endif /* NFS_NOSERVER */
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+ void *arg; /* never used */
+{
+ register struct nfsreq *rep;
+ register struct mbuf *m;
+ register struct socket *so;
+ register struct nfsmount *nmp;
+ register int timeo;
+ int s, error;
+#ifndef NFS_NOSERVER
+ static long lasttime = 0;
+ register struct nfssvc_sock *slp;
+ u_quad_t cur_usec;
+#endif /* NFS_NOSERVER */
+ struct proc *p = &proc0; /* XXX for credentials, will break if sleep */
+
+ s = splnet();
+ for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
+ nmp = rep->r_nmp;
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+ continue;
+ if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+ nfs_softterm(rep);
+ continue;
+ }
+ if (rep->r_rtt >= 0) {
+ rep->r_rtt++;
+ if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+ timeo = nmp->nm_timeo;
+ else
+ timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+ if (nmp->nm_timeouts > 0)
+ timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+ if (rep->r_rtt <= timeo)
+ continue;
+ if (nmp->nm_timeouts < 8)
+ nmp->nm_timeouts++;
+ }
+ /*
+ * Check for server not responding
+ */
+ if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+ rep->r_rexmit > nmp->nm_deadthresh) {
+ nfs_msg(rep->r_procp,
+ nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "not responding");
+ rep->r_flags |= R_TPRINTFMSG;
+ }
+ if (rep->r_rexmit >= rep->r_retry) { /* too many */
+ nfsstats.rpctimeouts++;
+ nfs_softterm(rep);
+ continue;
+ }
+ if (nmp->nm_sotype != SOCK_DGRAM) {
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ continue;
+ }
+ if ((so = nmp->nm_so) == NULL)
+ continue;
+
+ /*
+ * If there is enough space and the window allows..
+ * Resend it
+ * Set r_rtt to -1 in case we fail to send it now.
+ */
+ rep->r_rtt = -1;
+ if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+ ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ (rep->r_flags & R_SENT) ||
+ nmp->nm_sent < nmp->nm_cwnd) &&
+ (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+ error = (*so->so_proto->pr_usrreqs->pru_send)
+ (so, 0, m, (struct sockaddr *)0,
+ (struct mbuf *)0, p);
+ else
+ error = (*so->so_proto->pr_usrreqs->pru_send)
+ (so, 0, m, nmp->nm_nam, (struct mbuf *)0,
+ p);
+ if (error) {
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+ so->so_error = 0;
+ } else {
+ /*
+ * Iff first send, start timing
+ * else turn timing off, backoff timer
+ * and divide congestion window by 2.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_TIMING;
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ nmp->nm_cwnd >>= 1;
+ if (nmp->nm_cwnd < NFS_CWNDSCALE)
+ nmp->nm_cwnd = NFS_CWNDSCALE;
+ nfsstats.rpcretries++;
+ } else {
+ rep->r_flags |= R_SENT;
+ nmp->nm_sent += NFS_CWNDSCALE;
+ }
+ rep->r_rtt = 0;
+ }
+ }
+ }
+#ifndef NFS_NOSERVER
+ /*
+ * Call the nqnfs server timer once a second to handle leases.
+ */
+ if (lasttime != time_second) {
+ lasttime = time_second;
+ nqnfs_serverd();
+ }
+
+ /*
+ * Scan the write gathering queues for writes that need to be
+ * completed now.
+ */
+ cur_usec = nfs_curusec();
+ for (slp = nfssvc_sockhead.tqh_first; slp != 0;
+ slp = slp->ns_chain.tqe_next) {
+ if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
+ nfsrv_wakenfsd(slp);
+ }
+#endif /* NFS_NOSERVER */
+ splx(s);
+ nfs_timer_handle = timeout(nfs_timer, (void *)0, nfs_ticks);
+}
+
+/*
+ * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT).
+ * The nm_send count is decremented now to avoid deadlocks when the process in
+ * soreceive() hasn't yet managed to send its own request.
+ */
+
+static void
+nfs_softterm(rep)
+ struct nfsreq *rep;
+{
+ rep->r_flags |= R_SOFTTERM;
+
+ if (rep->r_flags & R_SENT) {
+ rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
+ rep->r_flags &= ~R_SENT;
+ }
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+int
+nfs_sigintr(nmp, rep, p)
+ struct nfsmount *nmp;
+ struct nfsreq *rep;
+ register struct proc *p;
+{
+ sigset_t tmpset;
+
+ if (rep && (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ if (!(nmp->nm_flag & NFSMNT_INT))
+ return (0);
+ if (p == NULL)
+ return (0);
+
+ tmpset = p->p_siglist;
+ SIGSETNAND(tmpset, p->p_sigmask);
+ SIGSETNAND(tmpset, p->p_sigignore);
+ if (SIGNOTEMPTY(p->p_siglist) && NFSINT_SIGMASK(tmpset))
+ return (EINTR);
+
+ return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+int
+nfs_sndlock(rep)
+ struct nfsreq *rep;
+{
+ register int *statep = &rep->r_nmp->nm_state;
+ struct proc *p;
+ int slpflag = 0, slptimeo = 0;
+
+ if (rep) {
+ p = rep->r_procp;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ } else
+ p = (struct proc *)0;
+ while (*statep & NFSSTA_SNDLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, p))
+ return (EINTR);
+ *statep |= NFSSTA_WANTSND;
+ (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
+ "nfsndlck", slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *statep |= NFSSTA_SNDLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(rep)
+ struct nfsreq *rep;
+{
+ register int *statep = &rep->r_nmp->nm_state;
+
+ if ((*statep & NFSSTA_SNDLOCK) == 0)
+ panic("nfs sndunlock");
+ *statep &= ~NFSSTA_SNDLOCK;
+ if (*statep & NFSSTA_WANTSND) {
+ *statep &= ~NFSSTA_WANTSND;
+ wakeup((caddr_t)statep);
+ }
+}
+
+static int
+nfs_rcvlock(rep)
+ register struct nfsreq *rep;
+{
+ register int *statep = &rep->r_nmp->nm_state;
+ int slpflag, slptimeo = 0;
+
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ else
+ slpflag = 0;
+ while (*statep & NFSSTA_RCVLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+ return (EINTR);
+ *statep |= NFSSTA_WANTRCV;
+ (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk",
+ slptimeo);
+ /*
+ * If our reply was recieved while we were sleeping,
+ * then just return without taking the lock to avoid a
+ * situation where a single iod could 'capture' the
+ * recieve lock.
+ */
+ if (rep->r_mrep != NULL)
+ return (EALREADY);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *statep |= NFSSTA_RCVLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+static void
+nfs_rcvunlock(rep)
+ register struct nfsreq *rep;
+{
+ register int *statep = &rep->r_nmp->nm_state;
+
+ if ((*statep & NFSSTA_RCVLOCK) == 0)
+ panic("nfs rcvunlock");
+ *statep &= ~NFSSTA_RCVLOCK;
+ if (*statep & NFSSTA_WANTRCV) {
+ *statep &= ~NFSSTA_WANTRCV;
+ wakeup((caddr_t)statep);
+ }
+}
+
+/*
+ * nfs_realign:
+ *
+ * Check for badly aligned mbuf data and realign by copying the unaligned
+ * portion of the data into a new mbuf chain and freeing the portions
+ * of the old chain that were replaced.
+ *
+ * We cannot simply realign the data within the existing mbuf chain
+ * because the underlying buffers may contain other rpc commands and
+ * we cannot afford to overwrite them.
+ *
+ * We would prefer to avoid this situation entirely. The situation does
+ * not occur with NFS/UDP and is supposed to only occassionally occur
+ * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
+ */
+static void
+nfs_realign(pm, hsiz)
+ register struct mbuf **pm;
+ int hsiz;
+{
+ struct mbuf *m;
+ struct mbuf *n = NULL;
+ int off = 0;
+
+ ++nfs_realign_test;
+
+ while ((m = *pm) != NULL) {
+ if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
+ MGET(n, M_WAIT, MT_DATA);
+ if (m->m_len >= MINCLSIZE) {
+ MCLGET(n, M_WAIT);
+ }
+ n->m_len = 0;
+ break;
+ }
+ pm = &m->m_next;
+ }
+
+ /*
+ * If n is non-NULL, loop on m copying data, then replace the
+ * portion of the chain that had to be realigned.
+ */
+ if (n != NULL) {
+ ++nfs_realign_count;
+ while (m) {
+ m_copyback(n, off, m->m_len, mtod(m, caddr_t));
+ off += m->m_len;
+ m = m->m_next;
+ }
+ m_freem(*pm);
+ *pm = n;
+ }
+}
+
+#ifndef NFS_NOSERVER
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+int
+nfs_getreq(nd, nfsd, has_header)
+ register struct nfsrv_descript *nd;
+ struct nfsd *nfsd;
+ int has_header;
+{
+ register int len, i;
+ register u_int32_t *tl;
+ register int32_t t1;
+ struct uio uio;
+ struct iovec iov;
+ caddr_t dpos, cp2, cp;
+ u_int32_t nfsvers, auth_type;
+ uid_t nickuid;
+ int error = 0, nqnfs = 0, ticklen;
+ struct mbuf *mrep, *md;
+ register struct nfsuid *nuidp;
+ struct timeval tvin, tvout;
+#if 0 /* until encrypted keys are implemented */
+ NFSKERBKEYSCHED_T keys; /* stores key schedule */
+#endif
+
+ mrep = nd->nd_mrep;
+ md = nd->nd_md;
+ dpos = nd->nd_dpos;
+ if (has_header) {
+ nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
+ nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
+ if (*tl++ != rpc_call) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ } else
+ nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
+ nd->nd_repstat = 0;
+ nd->nd_flag = 0;
+ if (*tl++ != rpc_vers) {
+ nd->nd_repstat = ERPCMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ if (*tl != nfs_prog) {
+ if (*tl == nqnfs_prog)
+ nqnfs++;
+ else {
+ nd->nd_repstat = EPROGUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ }
+ tl++;
+ nfsvers = fxdr_unsigned(u_int32_t, *tl++);
+ if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
+ (nfsvers != NQNFS_VER3 && nqnfs)) {
+ nd->nd_repstat = EPROGMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ if (nqnfs)
+ nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
+ else if (nfsvers == NFS_VER3)
+ nd->nd_flag = ND_NFSV3;
+ nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
+ if (nd->nd_procnum == NFSPROC_NULL)
+ return (0);
+ if (nd->nd_procnum >= NFS_NPROCS ||
+ (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
+ (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
+ nd->nd_repstat = EPROCUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ if ((nd->nd_flag & ND_NFSV3) == 0)
+ nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
+ auth_type = *tl++;
+ len = fxdr_unsigned(int, *tl++);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+
+ nd->nd_flag &= ~ND_KERBAUTH;
+ /*
+ * Handle auth_unix or auth_kerb.
+ */
+ if (auth_type == rpc_auth_unix) {
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > NFS_MAXNAMLEN) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_adv(nfsm_rndup(len));
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
+ nd->nd_cr.cr_ref = 1;
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
+ for (i = 1; i <= len; i++)
+ if (i < NGROUPS)
+ nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+ else
+ tl++;
+ nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+ if (nd->nd_cr.cr_ngroups > 1)
+ nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ if (len > 0)
+ nfsm_adv(nfsm_rndup(len));
+ } else if (auth_type == rpc_auth_kerb) {
+ switch (fxdr_unsigned(int, *tl++)) {
+ case RPCAKN_FULLNAME:
+ ticklen = fxdr_unsigned(int, *tl);
+ *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
+ uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
+ nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
+ if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ uio.uio_offset = 0;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
+ iov.iov_len = RPCAUTH_MAXSIZ - 4;
+ nfsm_mtouio(&uio, uio.uio_resid);
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ if (*tl++ != rpc_auth_kerb ||
+ fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
+ printf("Bad kerb verifier\n");
+ nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
+ tl = (u_int32_t *)cp;
+ if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
+ printf("Not fullname kerb verifier\n");
+ nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ cp += NFSX_UNSIGNED;
+ bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
+ nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
+ nd->nd_flag |= ND_KERBFULL;
+ nfsd->nfsd_flag |= NFSD_NEEDAUTH;
+ break;
+ case RPCAKN_NICKNAME:
+ if (len != 2 * NFSX_UNSIGNED) {
+ printf("Kerb nickname short\n");
+ nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nickuid = fxdr_unsigned(uid_t, *tl);
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ if (*tl++ != rpc_auth_kerb ||
+ fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
+ printf("Kerb nick verifier bad\n");
+ nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ tvin.tv_sec = *tl++;
+ tvin.tv_usec = *tl;
+
+ for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
+ nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
+ if (nuidp->nu_cr.cr_uid == nickuid &&
+ (!nd->nd_nam2 ||
+ netaddr_match(NU_NETFAM(nuidp),
+ &nuidp->nu_haddr, nd->nd_nam2)))
+ break;
+ }
+ if (!nuidp) {
+ nd->nd_repstat =
+ (NFSERR_AUTHERR|AUTH_REJECTCRED);
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+
+ /*
+ * Now, decrypt the timestamp using the session key
+ * and validate it.
+ */
+#ifdef NFSKERB
+ XXX
+#endif
+
+ tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
+ tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
+ if (nuidp->nu_expire < time_second ||
+ nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
+ (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
+ nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
+ nuidp->nu_expire = 0;
+ nd->nd_repstat =
+ (NFSERR_AUTHERR|AUTH_REJECTVERF);
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
+ nd->nd_flag |= ND_KERBNICK;
+ };
+ } else {
+ nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+
+ /*
+ * For nqnfs, get piggybacked lease request.
+ */
+ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ nd->nd_flag |= fxdr_unsigned(int, *tl);
+ if (nd->nd_flag & ND_LEASE) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ nd->nd_duration = fxdr_unsigned(int32_t, *tl);
+ } else
+ nd->nd_duration = NQ_MINLEASE;
+ } else
+ nd->nd_duration = NQ_MINLEASE;
+ nd->nd_md = md;
+ nd->nd_dpos = dpos;
+ return (0);
+nfsmout:
+ return (error);
+}
+
+#endif
+
+static int
+nfs_msg(p, server, msg)
+ struct proc *p;
+ char *server, *msg;
+{
+ tpr_t tpr;
+
+ if (p)
+ tpr = tprintf_open(p);
+ else
+ tpr = NULL;
+ tprintf(tpr, "nfs server %s: %s\n", server, msg);
+ tprintf_close(tpr);
+ return (0);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+ struct socket *so;
+ void *arg;
+ int waitflag;
+{
+ register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+ register struct mbuf *m;
+ struct mbuf *mp;
+ struct sockaddr *nam;
+ struct uio auio;
+ int flags, error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+#ifdef notdef
+ /*
+ * Define this to test for nfsds handling this under heavy load.
+ */
+ if (waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+ }
+#endif
+ auio.uio_procp = NULL;
+ if (so->so_type == SOCK_STREAM) {
+ /*
+ * If there are already records on the queue, defer soreceive()
+ * to an nfsd so that there is feedback to the TCP layer that
+ * the nfs servers are heavily loaded.
+ */
+ if (STAILQ_FIRST(&slp->ns_rec) && waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ;
+ goto dorecs;
+ }
+
+ /*
+ * Do soreceive().
+ */
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+ if (error || mp == (struct mbuf *)0) {
+ if (error == EWOULDBLOCK)
+ slp->ns_flag |= SLP_NEEDQ;
+ else
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ m = mp;
+ if (slp->ns_rawend) {
+ slp->ns_rawend->m_next = m;
+ slp->ns_cc += 1000000000 - auio.uio_resid;
+ } else {
+ slp->ns_raw = m;
+ slp->ns_cc = 1000000000 - auio.uio_resid;
+ }
+ while (m->m_next)
+ m = m->m_next;
+ slp->ns_rawend = m;
+
+ /*
+ * Now try and parse record(s) out of the raw stream data.
+ */
+ error = nfsrv_getstream(slp, waitflag);
+ if (error) {
+ if (error == EPERM)
+ slp->ns_flag |= SLP_DISCONN;
+ else
+ slp->ns_flag |= SLP_NEEDQ;
+ }
+ } else {
+ do {
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, &nam, &auio, &mp,
+ (struct mbuf **)0, &flags);
+ if (mp) {
+ struct nfsrv_rec *rec;
+ rec = malloc(sizeof(struct nfsrv_rec),
+ M_NFSRVDESC, waitflag);
+ if (!rec) {
+ if (nam)
+ FREE(nam, M_SONAME);
+ m_freem(mp);
+ continue;
+ }
+ nfs_realign(&mp, 10 * NFSX_UNSIGNED);
+ rec->nr_address = nam;
+ rec->nr_packet = mp;
+ STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
+ }
+ if (error) {
+ if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+ && error != EWOULDBLOCK) {
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ }
+ } while (mp);
+ }
+
+ /*
+ * Now try and process the request records, non-blocking.
+ */
+dorecs:
+ if (waitflag == M_DONTWAIT &&
+ (STAILQ_FIRST(&slp->ns_rec)
+ || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+ nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+static int
+nfsrv_getstream(slp, waitflag)
+ register struct nfssvc_sock *slp;
+ int waitflag;
+{
+ register struct mbuf *m, **mpp;
+ register char *cp1, *cp2;
+ register int len;
+ struct mbuf *om, *m2, *recm = NULL;
+ u_int32_t recmark;
+
+ if (slp->ns_flag & SLP_GETSTREAM)
+ panic("nfs getstream");
+ slp->ns_flag |= SLP_GETSTREAM;
+ for (;;) {
+ if (slp->ns_reclen == 0) {
+ if (slp->ns_cc < NFSX_UNSIGNED) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ m = slp->ns_raw;
+ if (m->m_len >= NFSX_UNSIGNED) {
+ bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+ m->m_data += NFSX_UNSIGNED;
+ m->m_len -= NFSX_UNSIGNED;
+ } else {
+ cp1 = (caddr_t)&recmark;
+ cp2 = mtod(m, caddr_t);
+ while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+ while (m->m_len == 0) {
+ m = m->m_next;
+ cp2 = mtod(m, caddr_t);
+ }
+ *cp1++ = *cp2++;
+ m->m_data++;
+ m->m_len--;
+ }
+ }
+ slp->ns_cc -= NFSX_UNSIGNED;
+ recmark = ntohl(recmark);
+ slp->ns_reclen = recmark & ~0x80000000;
+ if (recmark & 0x80000000)
+ slp->ns_flag |= SLP_LASTFRAG;
+ else
+ slp->ns_flag &= ~SLP_LASTFRAG;
+ if (slp->ns_reclen > NFS_MAXPACKET) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EPERM);
+ }
+ }
+
+ /*
+ * Now get the record part.
+ */
+ if (slp->ns_cc == slp->ns_reclen) {
+ recm = slp->ns_raw;
+ slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+ slp->ns_cc = slp->ns_reclen = 0;
+ } else if (slp->ns_cc > slp->ns_reclen) {
+ len = 0;
+ m = slp->ns_raw;
+ om = (struct mbuf *)0;
+ while (len < slp->ns_reclen) {
+ if ((len + m->m_len) > slp->ns_reclen) {
+ m2 = m_copym(m, 0, slp->ns_reclen - len,
+ waitflag);
+ if (m2) {
+ if (om) {
+ om->m_next = m2;
+ recm = slp->ns_raw;
+ } else
+ recm = m2;
+ m->m_data += slp->ns_reclen - len;
+ m->m_len -= slp->ns_reclen - len;
+ len = slp->ns_reclen;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EWOULDBLOCK);
+ }
+ } else if ((len + m->m_len) == slp->ns_reclen) {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ recm = slp->ns_raw;
+ om->m_next = (struct mbuf *)0;
+ } else {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ }
+ }
+ slp->ns_raw = m;
+ slp->ns_cc -= len;
+ slp->ns_reclen = 0;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+
+ /*
+ * Accumulate the fragments into a record.
+ */
+ mpp = &slp->ns_frag;
+ while (*mpp)
+ mpp = &((*mpp)->m_next);
+ *mpp = recm;
+ if (slp->ns_flag & SLP_LASTFRAG) {
+ struct nfsrv_rec *rec;
+ rec = malloc(sizeof(struct nfsrv_rec), M_NFSRVDESC, waitflag);
+ if (!rec) {
+ m_freem(slp->ns_frag);
+ } else {
+ nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
+ rec->nr_address = (struct sockaddr *)0;
+ rec->nr_packet = slp->ns_frag;
+ STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
+ }
+ slp->ns_frag = (struct mbuf *)0;
+ }
+ }
+}
+
+/*
+ * Parse an RPC header.
+ */
+int
+nfsrv_dorec(slp, nfsd, ndp)
+ register struct nfssvc_sock *slp;
+ struct nfsd *nfsd;
+ struct nfsrv_descript **ndp;
+{
+ struct nfsrv_rec *rec;
+ register struct mbuf *m;
+ struct sockaddr *nam;
+ register struct nfsrv_descript *nd;
+ int error;
+
+ *ndp = NULL;
+ if ((slp->ns_flag & SLP_VALID) == 0 || !STAILQ_FIRST(&slp->ns_rec))
+ return (ENOBUFS);
+ rec = STAILQ_FIRST(&slp->ns_rec);
+ STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
+ nam = rec->nr_address;
+ m = rec->nr_packet;
+ free(rec, M_NFSRVDESC);
+ MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
+ M_NFSRVDESC, M_WAITOK);
+ nd->nd_md = nd->nd_mrep = m;
+ nd->nd_nam2 = nam;
+ nd->nd_dpos = mtod(m, caddr_t);
+ error = nfs_getreq(nd, nfsd, TRUE);
+ if (error) {
+ if (nam) {
+ FREE(nam, M_SONAME);
+ }
+ free((caddr_t)nd, M_NFSRVDESC);
+ return (error);
+ }
+ *ndp = nd;
+ nfsd->nfsd_nd = nd;
+ return (0);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+ struct nfssvc_sock *slp;
+{
+ register struct nfsd *nd;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+ for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
+ if (nd->nfsd_flag & NFSD_WAITING) {
+ nd->nfsd_flag &= ~NFSD_WAITING;
+ if (nd->nfsd_slp)
+ panic("nfsd wakeup");
+ slp->ns_sref++;
+ nd->nfsd_slp = slp;
+ wakeup((caddr_t)nd);
+ return;
+ }
+ }
+ slp->ns_flag |= SLP_DOREC;
+ nfsd_head_flag |= NFSD_CHECKSLP;
+}
+#endif /* NFS_NOSERVER */
diff --git a/sys/nfs/nfs_srvcache.c b/sys/nfs/nfs_srvcache.c
new file mode 100644
index 0000000..6f9d42f
--- /dev/null
+++ b/sys/nfs/nfs_srvcache.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_srvcache.c 8.3 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+/*
+ * Reference: Chet Juszczak, "Improving the Performance and Correctness
+ * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
+ * pages 53-63. San Diego, February 1989.
+ */
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h> /* for dup_sockaddr */
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+
+#ifndef NFS_NOSERVER
+extern struct nfsstats nfsstats;
+extern int nfsv2_procid[NFS_NPROCS];
+static long numnfsrvcache;
+static long desirednfsrvcache = NFSRVCACHESIZ;
+
+#define NFSRCHASH(xid) \
+ (&nfsrvhashtbl[((xid) + ((xid) >> 24)) & nfsrvhash])
+static LIST_HEAD(nfsrvhash, nfsrvcache) *nfsrvhashtbl;
+static TAILQ_HEAD(nfsrvlru, nfsrvcache) nfsrvlruhead;
+static u_long nfsrvhash;
+
+#define TRUE 1
+#define FALSE 0
+
+#define NETFAMILY(rp) \
+ (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO)
+
+/*
+ * Static array that defines which nfs rpc's are nonidempotent
+ */
+static int nonidempotent[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+};
+
+/* True iff the rpc reply is an nfs status ONLY! */
+static int nfsv2_repstat[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+};
+
+/*
+ * Initialize the server request cache list
+ */
+void
+nfsrv_initcache()
+{
+
+ nfsrvhashtbl = hashinit(desirednfsrvcache, M_NFSD, &nfsrvhash);
+ TAILQ_INIT(&nfsrvlruhead);
+}
+
+/*
+ * Look for the request in the cache
+ * If found then
+ * return action and optionally reply
+ * else
+ * insert it in the cache
+ *
+ * The rules are as follows:
+ * - if in progress, return DROP request
+ * - if completed within DELAY of the current time, return DROP it
+ * - if completed a longer time ago return REPLY if the reply was cached or
+ * return DOIT
+ * Update/add new request at end of lru list
+ */
+int
+nfsrv_getcache(nd, slp, repp)
+ register struct nfsrv_descript *nd;
+ struct nfssvc_sock *slp;
+ struct mbuf **repp;
+{
+ register struct nfsrvcache *rp;
+ struct mbuf *mb;
+ struct sockaddr_in *saddr;
+ caddr_t bpos;
+ int ret;
+
+ /*
+ * Don't cache recent requests for reliable transport protocols.
+ * (Maybe we should for the case of a reconnect, but..)
+ */
+ if (!nd->nd_nam2)
+ return (RC_DOIT);
+loop:
+ for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
+ rp = rp->rc_hash.le_next) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
+ NFS_DPF(RC, ("H%03x", rp->rc_xid & 0xfff));
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ /* If not at end of LRU chain, move it there */
+ if (rp->rc_lru.tqe_next) {
+ TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
+ TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
+ }
+ if (rp->rc_state == RC_UNUSED)
+ panic("nfsrv cache");
+ if (rp->rc_state == RC_INPROG) {
+ nfsstats.srvcache_inproghits++;
+ ret = RC_DROPIT;
+ } else if (rp->rc_flag & RC_REPSTATUS) {
+ nfsstats.srvcache_nonidemdonehits++;
+ nfs_rephead(0, nd, slp, rp->rc_status,
+ 0, (u_quad_t *)0, repp, &mb, &bpos);
+ ret = RC_REPLY;
+ } else if (rp->rc_flag & RC_REPMBUF) {
+ nfsstats.srvcache_nonidemdonehits++;
+ *repp = m_copym(rp->rc_reply, 0, M_COPYALL,
+ M_WAIT);
+ ret = RC_REPLY;
+ } else {
+ nfsstats.srvcache_idemdonehits++;
+ rp->rc_state = RC_INPROG;
+ ret = RC_DOIT;
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (ret);
+ }
+ }
+ nfsstats.srvcache_misses++;
+ NFS_DPF(RC, ("M%03x", nd->nd_retxid & 0xfff));
+ if (numnfsrvcache < desirednfsrvcache) {
+ rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
+ M_NFSD, M_WAITOK);
+ bzero((char *)rp, sizeof *rp);
+ numnfsrvcache++;
+ rp->rc_flag = RC_LOCKED;
+ } else {
+ rp = nfsrvlruhead.tqh_first;
+ while ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ rp = nfsrvlruhead.tqh_first;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ LIST_REMOVE(rp, rc_hash);
+ TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
+ if (rp->rc_flag & RC_REPMBUF)
+ m_freem(rp->rc_reply);
+ if (rp->rc_flag & RC_NAM)
+ FREE(rp->rc_nam, M_SONAME);
+ rp->rc_flag &= (RC_LOCKED | RC_WANTED);
+ }
+ TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
+ rp->rc_state = RC_INPROG;
+ rp->rc_xid = nd->nd_retxid;
+ saddr = (struct sockaddr_in *)nd->nd_nam;
+ switch (saddr->sin_family) {
+ case AF_INET:
+ rp->rc_flag |= RC_INETADDR;
+ rp->rc_inetaddr = saddr->sin_addr.s_addr;
+ break;
+ case AF_ISO:
+ default:
+ rp->rc_flag |= RC_NAM;
+ rp->rc_nam = dup_sockaddr(nd->nd_nam, 1);
+ break;
+ };
+ rp->rc_proc = nd->nd_procnum;
+ LIST_INSERT_HEAD(NFSRCHASH(nd->nd_retxid), rp, rc_hash);
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (RC_DOIT);
+}
+
+/*
+ * Update a request cache entry after the rpc has been done
+ */
+void
+nfsrv_updatecache(nd, repvalid, repmbuf)
+ register struct nfsrv_descript *nd;
+ int repvalid;
+ struct mbuf *repmbuf;
+{
+ register struct nfsrvcache *rp;
+
+ if (!nd->nd_nam2)
+ return;
+loop:
+ for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
+ rp = rp->rc_hash.le_next) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
+ NFS_DPF(RC, ("U%03x", rp->rc_xid & 0xfff));
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ if (rp->rc_state == RC_DONE) {
+ /*
+ * This can occur if the cache is too small.
+ * Retransmits of the same request aren't
+ * dropped so we may see the operation
+ * complete more then once.
+ */
+ if (rp->rc_flag & RC_REPMBUF) {
+ m_freem(rp->rc_reply);
+ rp->rc_flag &= ~RC_REPMBUF;
+ }
+ }
+ rp->rc_state = RC_DONE;
+ /*
+ * If we have a valid reply update status and save
+ * the reply for non-idempotent rpc's.
+ */
+ if (repvalid && nonidempotent[nd->nd_procnum]) {
+ if ((nd->nd_flag & ND_NFSV3) == 0 &&
+ nfsv2_repstat[nfsv2_procid[nd->nd_procnum]]) {
+ rp->rc_status = nd->nd_repstat;
+ rp->rc_flag |= RC_REPSTATUS;
+ } else {
+ rp->rc_reply = m_copym(repmbuf,
+ 0, M_COPYALL, M_WAIT);
+ rp->rc_flag |= RC_REPMBUF;
+ }
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return;
+ }
+ }
+ NFS_DPF(RC, ("L%03x", nd->nd_retxid & 0xfff));
+}
+
+/*
+ * Clean out the cache. Called when the last nfsd terminates.
+ */
+void
+nfsrv_cleancache()
+{
+ register struct nfsrvcache *rp, *nextrp;
+
+ for (rp = nfsrvlruhead.tqh_first; rp != 0; rp = nextrp) {
+ nextrp = rp->rc_lru.tqe_next;
+ LIST_REMOVE(rp, rc_hash);
+ TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
+ if (rp->rc_flag & RC_REPMBUF)
+ m_freem(rp->rc_reply);
+ if (rp->rc_flag & RC_NAM)
+ free(rp->rc_nam, M_SONAME);
+ free(rp, M_NFSD);
+ }
+ numnfsrvcache = 0;
+}
+
+#endif /* NFS_NOSERVER */
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
new file mode 100644
index 0000000..e63ba63
--- /dev/null
+++ b/sys/nfs/nfs_subs.c
@@ -0,0 +1,2272 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
+ * $FreeBSD$
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#include <sys/sysent.h>
+#include <sys/syscall.h>
+#include <sys/conf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_int32_t nfs_xdrneg1;
+u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
+ rpc_auth_kerb;
+u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_int32_t nfs_xid = 0;
+static enum vtype nv2tov_type[8]= {
+ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON
+};
+enum vtype nv3tov_type[8]= {
+ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
+};
+
+int nfs_ticks;
+int nfs_pbuf_freecnt = -1; /* start out unlimited */
+
+struct nfs_reqq nfs_reqq;
+struct nfssvc_sockhead nfssvc_sockhead;
+int nfssvc_sockhead_flag;
+struct nfsd_head nfsd_head;
+int nfsd_head_flag;
+struct nfs_bufq nfs_bufq;
+struct nqtimerhead nqtimerhead;
+struct nqfhhashhead *nqfhhashtbl;
+u_long nqfhhash;
+
+static void (*nfs_prev_lease_updatetime) __P((int));
+static int nfs_prev_nfssvc_sy_narg;
+static sy_call_t *nfs_prev_nfssvc_sy_call;
+
+#ifndef NFS_NOSERVER
+
+static vop_t *nfs_prev_vop_lease_check;
+static int nfs_prev_getfh_sy_narg;
+static sy_call_t *nfs_prev_getfh_sy_call;
+
+/*
+ * Mapping of old NFS Version 2 RPC numbers to generic numbers.
+ */
+int nfsv3_procid[NFS_NPROCS] = {
+ NFSPROC_NULL,
+ NFSPROC_GETATTR,
+ NFSPROC_SETATTR,
+ NFSPROC_NOOP,
+ NFSPROC_LOOKUP,
+ NFSPROC_READLINK,
+ NFSPROC_READ,
+ NFSPROC_NOOP,
+ NFSPROC_WRITE,
+ NFSPROC_CREATE,
+ NFSPROC_REMOVE,
+ NFSPROC_RENAME,
+ NFSPROC_LINK,
+ NFSPROC_SYMLINK,
+ NFSPROC_MKDIR,
+ NFSPROC_RMDIR,
+ NFSPROC_READDIR,
+ NFSPROC_FSSTAT,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP,
+ NFSPROC_NOOP
+};
+
+#endif /* NFS_NOSERVER */
+/*
+ * and the reverse mapping from generic to Version 2 procedure numbers
+ */
+int nfsv2_procid[NFS_NPROCS] = {
+ NFSV2PROC_NULL,
+ NFSV2PROC_GETATTR,
+ NFSV2PROC_SETATTR,
+ NFSV2PROC_LOOKUP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_READLINK,
+ NFSV2PROC_READ,
+ NFSV2PROC_WRITE,
+ NFSV2PROC_CREATE,
+ NFSV2PROC_MKDIR,
+ NFSV2PROC_SYMLINK,
+ NFSV2PROC_CREATE,
+ NFSV2PROC_REMOVE,
+ NFSV2PROC_RMDIR,
+ NFSV2PROC_RENAME,
+ NFSV2PROC_LINK,
+ NFSV2PROC_READDIR,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_STATFS,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+ NFSV2PROC_NOOP,
+};
+
+#ifndef NFS_NOSERVER
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static u_char nfsrv_v2errmap[ELAST] = {
+ NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
+ NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
+ NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO /* << Last is 86 */
+};
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Although it is not obvious whether or not NFS clients really care if
+ * a returned error value is in the specified list for the procedure, the
+ * safest thing to do is filter them appropriately. For Version 2, the
+ * X/Open XNFS document is the only specification that defines error values
+ * for each RPC (The RFC simply lists all possible error values for all RPCs),
+ * so I have decided to not do this for Version 2.
+ * The first entry is the default error return and the rest are the valid
+ * errors for that RPC in increasing numeric order.
+ */
+static short nfsv3err_null[] = {
+ 0,
+ 0,
+};
+
+static short nfsv3err_getattr[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_setattr[] = {
+ NFSERR_IO,
+ NFSERR_PERM,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOT_SYNC,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_lookup[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_NAMETOL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_access[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_readlink[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_read[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_NXIO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_write[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_INVAL,
+ NFSERR_FBIG,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_create[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_mkdir[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_symlink[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_mknod[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ NFSERR_BADTYPE,
+ 0,
+};
+
+static short nfsv3err_remove[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_rmdir[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_NOTDIR,
+ NFSERR_INVAL,
+ NFSERR_ROFS,
+ NFSERR_NAMETOL,
+ NFSERR_NOTEMPTY,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_rename[] = {
+ NFSERR_IO,
+ NFSERR_NOENT,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_XDEV,
+ NFSERR_NOTDIR,
+ NFSERR_ISDIR,
+ NFSERR_INVAL,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_MLINK,
+ NFSERR_NAMETOL,
+ NFSERR_NOTEMPTY,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_link[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_EXIST,
+ NFSERR_XDEV,
+ NFSERR_NOTDIR,
+ NFSERR_INVAL,
+ NFSERR_NOSPC,
+ NFSERR_ROFS,
+ NFSERR_MLINK,
+ NFSERR_NAMETOL,
+ NFSERR_DQUOT,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_NOTSUPP,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_readdir[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_BAD_COOKIE,
+ NFSERR_TOOSMALL,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_readdirplus[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_ACCES,
+ NFSERR_NOTDIR,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_BAD_COOKIE,
+ NFSERR_NOTSUPP,
+ NFSERR_TOOSMALL,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_fsstat[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_fsinfo[] = {
+ NFSERR_STALE,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_pathconf[] = {
+ NFSERR_STALE,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short nfsv3err_commit[] = {
+ NFSERR_IO,
+ NFSERR_IO,
+ NFSERR_STALE,
+ NFSERR_BADHANDLE,
+ NFSERR_SERVERFAULT,
+ 0,
+};
+
+static short *nfsrv_v3errmap[] = {
+ nfsv3err_null,
+ nfsv3err_getattr,
+ nfsv3err_setattr,
+ nfsv3err_lookup,
+ nfsv3err_access,
+ nfsv3err_readlink,
+ nfsv3err_read,
+ nfsv3err_write,
+ nfsv3err_create,
+ nfsv3err_mkdir,
+ nfsv3err_symlink,
+ nfsv3err_mknod,
+ nfsv3err_remove,
+ nfsv3err_rmdir,
+ nfsv3err_rename,
+ nfsv3err_link,
+ nfsv3err_readdir,
+ nfsv3err_readdirplus,
+ nfsv3err_fsstat,
+ nfsv3err_fsinfo,
+ nfsv3err_pathconf,
+ nfsv3err_commit,
+};
+
+#endif /* NFS_NOSERVER */
+
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+extern struct nfsstats nfsstats;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern nfstype nfsv2_type[9];
+extern nfstype nfsv3_type[9];
+extern struct nfsnodehashhead *nfsnodehashtbl;
+extern u_long nfsnodehash;
+
+struct getfh_args;
+extern int getfh(struct proc *, struct getfh_args *, int *);
+struct nfssvc_args;
+extern int nfssvc(struct proc *, struct nfssvc_args *, int *);
+
+LIST_HEAD(nfsnodehashhead, nfsnode);
+
+int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
+
+u_quad_t
+nfs_curusec()
+{
+ struct timeval tv;
+
+ getmicrotime(&tv);
+ return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
+}
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_int32_t *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
+ verf_str, mrest, mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nmflag;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ int verf_len;
+ char *verf_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_int32_t *xidp;
+{
+ register struct mbuf *mb;
+ register u_int32_t *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
+
+ /* Get a pretty random xid to start with */
+ if (!nfs_xid)
+ nfs_xid = random();
+ /*
+ * Skip zero xid if it should ever happen.
+ */
+ if (++nfs_xid == 0)
+ nfs_xid++;
+
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nmflag & NFSMNT_NQNFS) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER3);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ if (nmflag & NFSMNT_NFSV3)
+ *tl++ = txdr_unsigned(NFS_VER3);
+ else
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ if (nmflag & NFSMNT_NFSV3)
+ *tl++ = txdr_unsigned(procid);
+ else
+ *tl++ = txdr_unsigned(nfsv2_procid[procid]);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_int32_t *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_KERB4:
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+
+ /*
+ * And the verifier...
+ */
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ if (verf_str) {
+ *tl++ = txdr_unsigned(RPCAUTH_KERB4);
+ *tl = txdr_unsigned(verf_len);
+ siz = verf_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(verf_str, bpos, i);
+ mb->m_len += i;
+ verf_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ } else {
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ }
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+int
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain.
+ * NOTE: can ony handle iovcnt == 1
+ */
+int
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+#ifdef DIAGNOSTIC
+ if (uiop->uio_iovcnt != 1)
+ panic("nfsm_uiotombuf: iovcnt != 1");
+#endif
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+int
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+int
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+int
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ const char *cp;
+ long siz;
+{
+ register struct mbuf *m1 = NULL, *m2;
+ long left, xfer, len, tlen;
+ u_int32_t *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_int32_t *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_int32_t *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+int
+nfs_init(vfsp)
+ struct vfsconf *vfsp;
+{
+ register int i;
+
+ nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
+
+ /*
+ * Check to see if major data structures haven't bloated.
+ */
+ if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) {
+ printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
+ printf("Try reducing NFS_UIDHASHSIZ\n");
+ }
+ if (sizeof (struct nfsuid) > NFS_UIDALLOC) {
+ printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC);
+ printf("Try unionizing the nu_nickname and nu_flag fields\n");
+ }
+ nfs_mount_type = vfsp->vfc_typenum;
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
+ if (nfs_ticks < 1)
+ nfs_ticks = 1;
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
+ nfs_iodwant[i] = (struct proc *)0;
+ nfs_iodmount[i] = (struct nfsmount *)0;
+ }
+ nfs_nhinit(); /* Init the nfsnode table */
+#ifndef NFS_NOSERVER
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+#endif
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ CIRCLEQ_INIT(&nqtimerhead);
+ nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ TAILQ_INIT(&nfs_reqq);
+
+ nfs_timer(0);
+
+ /*
+ * Set up lease_check and lease_updatetime so that other parts
+ * of the system can call us, if we are loadable.
+ */
+#ifndef NFS_NOSERVER
+ nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)];
+ default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check;
+#endif
+ nfs_prev_lease_updatetime = lease_updatetime;
+ lease_updatetime = nfs_lease_updatetime;
+ nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
+ sysent[SYS_nfssvc].sy_narg = 2;
+ nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
+ sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc;
+#ifndef NFS_NOSERVER
+ nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg;
+ sysent[SYS_getfh].sy_narg = 2;
+ nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call;
+ sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
+#endif
+
+ nfs_pbuf_freecnt = nswbuf / 2 + 1;
+
+ return (0);
+}
+
+int
+nfs_uninit(vfsp)
+ struct vfsconf *vfsp;
+{
+
+ untimeout(nfs_timer, (void *)NULL, nfs_timer_handle);
+ nfs_mount_type = -1;
+#ifndef NFS_NOSERVER
+ default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check;
+#endif
+ lease_updatetime = nfs_prev_lease_updatetime;
+ sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
+ sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
+#ifndef NFS_NOSERVER
+ sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg;
+ sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call;
+#endif
+ return (0);
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+int
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfs_fattr *fp;
+ register struct nfsnode *np;
+ register int32_t t1;
+ caddr_t cp2;
+ int error = 0, rdev;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ struct timespec mtime;
+ int v3 = NFS_ISV3(vp);
+
+ md = *mdp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
+ if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
+ return (error);
+ fp = (struct nfs_fattr *)cp2;
+ if (v3) {
+ vtyp = nfsv3tov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
+ fxdr_unsigned(int, fp->fa3_rdev.specdata2));
+ fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
+ } else {
+ vtyp = nfsv2tov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ /*
+ * XXX
+ *
+ * The duplicate information returned in fa_type and fa_mode
+ * is an ambiguity in the NFS version 2 protocol.
+ *
+ * VREG should be taken literally as a regular file. If a
+ * server intents to return some type information differently
+ * in the upper bits of the mode field (e.g. for sockets, or
+ * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we
+ * leave the examination of the mode bits even in the VREG
+ * case to avoid breakage for bogus servers, but we make sure
+ * that there are actually type bits set in the upper part of
+ * fa_mode (and failing that, trust the va_type field).
+ *
+ * NFSv3 cleared the issue, and requires fa_mode to not
+ * contain any type information (while also introduing sockets
+ * and FIFOs for fa_type).
+ */
+ if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
+ vtyp = IFTOVT(vmode);
+ rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
+ fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
+
+ /*
+ * Really ugly NFSv2 kludge.
+ */
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vtyp = VFIFO;
+ }
+
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type != vtyp) {
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+ vp->v_op = fifo_nfsv2nodeop_p;
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ addaliasu(vp, rdev);
+ }
+ np->n_mtime = mtime.tv_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_rdev = rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (v3) {
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_size = fxdr_hyper(&fp->fa3_size);
+ vap->va_blocksize = NFS_FABLKSIZE;
+ vap->va_bytes = fxdr_hyper(&fp->fa3_used);
+ vap->va_fileid = fxdr_unsigned(int32_t,
+ fp->fa3_fileid.nfsuquad[1]);
+ fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
+ fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
+ vap->va_flags = 0;
+ vap->va_filerev = 0;
+ } else {
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
+ vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
+ vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
+ * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
+ fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
+ fp->fa2_ctime.nfsv2_sec);
+ vap->va_ctime.tv_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else {
+ np->n_size = vap->va_size;
+ }
+ vnode_pager_setsize(vp, np->n_size);
+ } else {
+ np->n_size = vap->va_size;
+ }
+ }
+ np->n_attrstamp = time_second;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC)
+ vaper->va_atime = np->n_atim;
+ if (np->n_flag & NUPD)
+ vaper->va_mtime = np->n_mtim;
+ }
+ }
+ return (0);
+}
+
+#ifdef NFS_ACDEBUG
+#include <sys/sysctl.h>
+SYSCTL_DECL(_vfs_nfs);
+static int nfs_acdebug;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
+#endif
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+int
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np;
+ register struct vattr *vap;
+ struct nfsmount *nmp;
+ int timeo;
+
+ np = VTONFS(vp);
+ vap = &np->n_vattr;
+ nmp = VFSTONFS(vp->v_mount);
+ /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
+ timeo = (time_second - np->n_mtime) / 10;
+
+#ifdef NFS_ACDEBUG
+ if (nfs_acdebug>1)
+ printf("nfs_getattrcache: initial timeo = %d\n", timeo);
+#endif
+
+ if (vap->va_type == VDIR) {
+ if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
+ timeo = nmp->nm_acdirmin;
+ else if (timeo > nmp->nm_acdirmax)
+ timeo = nmp->nm_acdirmax;
+ } else {
+ if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
+ timeo = nmp->nm_acregmin;
+ else if (timeo > nmp->nm_acregmax)
+ timeo = nmp->nm_acregmax;
+ }
+
+#ifdef NFS_ACDEBUG
+ if (nfs_acdebug > 2)
+ printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
+ nmp->nm_acregmin, nmp->nm_acregmax,
+ nmp->nm_acdirmin, nmp->nm_acdirmax);
+
+ if (nfs_acdebug)
+ printf("nfs_getattrcache: age = %d; final timeo = %d\n",
+ (time_second - np->n_attrstamp), timeo);
+#endif
+
+ if ((time_second - np->n_attrstamp) >= timeo) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else {
+ np->n_size = vap->va_size;
+ }
+ vnode_pager_setsize(vp, np->n_size);
+ } else {
+ np->n_size = vap->va_size;
+ }
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC)
+ vaper->va_atime = np->n_atim;
+ if (np->n_flag & NUPD)
+ vaper->va_mtime = np->n_mtim;
+ }
+ return (0);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Set up nameidata for a lookup() call and do it.
+ *
+ * If pubflag is set, this call is done for a lookup operation on the
+ * public filehandle. In that case we allow crossing mountpoints and
+ * absolute pathnames. However, the caller is expected to check that
+ * the lookup result is within the public fs, and deny access if
+ * it is not.
+ *
+ * nfs_namei() clears out garbage fields that namei() might leave garbage.
+ * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
+ * error occurs but the parent was not requested.
+ *
+ * dirp may be set whether an error is returned or not, and must be
+ * released by the caller.
+ */
+int
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct sockaddr *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vnode **retdirp;
+ struct proc *p;
+ int kerbflag, pubflag;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp, *cp;
+ struct iovec aiov;
+ struct uio auio;
+ struct vnode *dp;
+ int error, rdonly, linklen;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ *retdirp = (struct vnode *)0;
+ cnp->cn_pnbuf = zalloc(namei_zone);
+
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
+ error = EACCES;
+ goto out;
+ }
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
+ goto out;
+ }
+
+ /*
+ * Extract and set starting directory.
+ */
+ error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly, kerbflag, pubflag);
+ if (error)
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+
+ if (rdonly)
+ cnp->cn_flags |= RDONLY;
+
+ /*
+ * Set return directory. Reference to dp is implicitly transfered
+ * to the returned pointer
+ */
+ *retdirp = dp;
+
+ if (pubflag) {
+ /*
+ * Oh joy. For WebNFS, handle those pesky '%' escapes,
+ * and the 'native path' indicator.
+ */
+ cp = zalloc(namei_zone);
+ fromcp = cnp->cn_pnbuf;
+ tocp = cp;
+ if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
+ switch ((unsigned char)*fromcp) {
+ case WEBNFS_NATIVE_CHAR:
+ /*
+ * 'Native' path for us is the same
+ * as a path according to the NFS spec,
+ * just skip the escape char.
+ */
+ fromcp++;
+ break;
+ /*
+ * More may be added in the future, range 0x80-0xff
+ */
+ default:
+ error = EIO;
+ zfree(namei_zone, cp);
+ goto out;
+ }
+ }
+ /*
+ * Translate the '%' escapes, URL-style.
+ */
+ while (*fromcp != '\0') {
+ if (*fromcp == WEBNFS_ESC_CHAR) {
+ if (fromcp[1] != '\0' && fromcp[2] != '\0') {
+ fromcp++;
+ *tocp++ = HEXSTRTOI(fromcp);
+ fromcp += 2;
+ continue;
+ } else {
+ error = ENOENT;
+ zfree(namei_zone, cp);
+ goto out;
+ }
+ } else
+ *tocp++ = *fromcp++;
+ }
+ *tocp = '\0';
+ zfree(namei_zone, cnp->cn_pnbuf);
+ cnp->cn_pnbuf = cp;
+ }
+
+ ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
+ ndp->ni_segflg = UIO_SYSSPACE;
+
+ if (pubflag) {
+ ndp->ni_rootdir = rootvnode;
+ ndp->ni_loopcnt = 0;
+ if (cnp->cn_pnbuf[0] == '/')
+ dp = rootvnode;
+ } else {
+ cnp->cn_flags |= NOCROSSMOUNT;
+ }
+
+ /*
+ * Initialize for scan, set ni_startdir and bump ref on dp again
+ * becuase lookup() will dereference ni_startdir.
+ */
+
+ cnp->cn_proc = p;
+ VREF(dp);
+ ndp->ni_startdir = dp;
+
+ for (;;) {
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Call lookup() to do the real work. If an error occurs,
+ * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
+ * we do not have to dereference anything before returning.
+ * In either case ni_startdir will be dereferenced and NULLed
+ * out.
+ */
+ error = lookup(ndp);
+ if (error)
+ break;
+
+ /*
+ * Check for encountering a symbolic link. Trivial
+ * termination occurs if no symlink encountered.
+ * Note: zfree is safe because error is 0, so we will
+ * not zfree it again when we break.
+ */
+ if ((cnp->cn_flags & ISSYMLINK) == 0) {
+ nfsrv_object_create(ndp->ni_vp);
+ if (cnp->cn_flags & (SAVENAME | SAVESTART))
+ cnp->cn_flags |= HASBUF;
+ else
+ zfree(namei_zone, cnp->cn_pnbuf);
+ break;
+ }
+
+ /*
+ * Validate symlink
+ */
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ VOP_UNLOCK(ndp->ni_dvp, 0, p);
+ if (!pubflag) {
+ error = EINVAL;
+ goto badlink2;
+ }
+
+ if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+ error = ELOOP;
+ goto badlink2;
+ }
+ if (ndp->ni_pathlen > 1)
+ cp = zalloc(namei_zone);
+ else
+ cp = cnp->cn_pnbuf;
+ aiov.iov_base = cp;
+ aiov.iov_len = MAXPATHLEN;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = (struct proc *)0;
+ auio.uio_resid = MAXPATHLEN;
+ error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
+ if (error) {
+ badlink1:
+ if (ndp->ni_pathlen > 1)
+ zfree(namei_zone, cp);
+ badlink2:
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ break;
+ }
+ linklen = MAXPATHLEN - auio.uio_resid;
+ if (linklen == 0) {
+ error = ENOENT;
+ goto badlink1;
+ }
+ if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
+ error = ENAMETOOLONG;
+ goto badlink1;
+ }
+
+ /*
+ * Adjust or replace path
+ */
+ if (ndp->ni_pathlen > 1) {
+ bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+ zfree(namei_zone, cnp->cn_pnbuf);
+ cnp->cn_pnbuf = cp;
+ } else
+ cnp->cn_pnbuf[linklen] = '\0';
+ ndp->ni_pathlen += linklen;
+
+ /*
+ * Cleanup refs for next loop and check if root directory
+ * should replace current directory. Normally ni_dvp
+ * becomes the new base directory and is cleaned up when
+ * we loop. Explicitly null pointers after invalidation
+ * to clarify operation.
+ */
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+
+ if (cnp->cn_pnbuf[0] == '/') {
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = ndp->ni_rootdir;
+ VREF(ndp->ni_dvp);
+ }
+ ndp->ni_startdir = ndp->ni_dvp;
+ ndp->ni_dvp = NULL;
+ }
+
+ /*
+ * nfs_namei() guarentees that fields will not contain garbage
+ * whether an error occurs or not. This allows the caller to track
+ * cleanup state trivially.
+ */
+out:
+ if (error) {
+ zfree(namei_zone, cnp->cn_pnbuf);
+ ndp->ni_vp = NULL;
+ ndp->ni_dvp = NULL;
+ ndp->ni_startdir = NULL;
+ cnp->cn_flags &= ~HASBUF;
+ } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
+ ndp->ni_dvp = NULL;
+ }
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ for (m = m->m_next;m;m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * Make these functions instead of macros, so that the kernel text size
+ * doesn't get too big...
+ */
+void
+nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
+ struct nfsrv_descript *nfsd;
+ int before_ret;
+ register struct vattr *before_vap;
+ int after_ret;
+ struct vattr *after_vap;
+ struct mbuf **mbp;
+ char **bposp;
+{
+ register struct mbuf *mb = *mbp, *mb2;
+ register char *bpos = *bposp;
+ register u_int32_t *tl;
+
+ if (before_ret) {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = nfs_false;
+ } else {
+ nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
+ *tl++ = nfs_true;
+ txdr_hyper(before_vap->va_size, tl);
+ tl += 2;
+ txdr_nfsv3time(&(before_vap->va_mtime), tl);
+ tl += 2;
+ txdr_nfsv3time(&(before_vap->va_ctime), tl);
+ }
+ *bposp = bpos;
+ *mbp = mb;
+ nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
+}
+
+void
+nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
+ struct nfsrv_descript *nfsd;
+ int after_ret;
+ struct vattr *after_vap;
+ struct mbuf **mbp;
+ char **bposp;
+{
+ register struct mbuf *mb = *mbp, *mb2;
+ register char *bpos = *bposp;
+ register u_int32_t *tl;
+ register struct nfs_fattr *fp;
+
+ if (after_ret) {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = nfs_false;
+ } else {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
+ *tl++ = nfs_true;
+ fp = (struct nfs_fattr *)tl;
+ nfsm_srvfattr(nfsd, after_vap, fp);
+ }
+ *mbp = mb;
+ *bposp = bpos;
+}
+
+void
+nfsm_srvfattr(nfsd, vap, fp)
+ register struct nfsrv_descript *nfsd;
+ register struct vattr *vap;
+ register struct nfs_fattr *fp;
+{
+
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink);
+ fp->fa_uid = txdr_unsigned(vap->va_uid);
+ fp->fa_gid = txdr_unsigned(vap->va_gid);
+ if (nfsd->nd_flag & ND_NFSV3) {
+ fp->fa_type = vtonfsv3_type(vap->va_type);
+ fp->fa_mode = vtonfsv3_mode(vap->va_mode);
+ txdr_hyper(vap->va_size, &fp->fa3_size);
+ txdr_hyper(vap->va_bytes, &fp->fa3_used);
+ fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev));
+ fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev));
+ fp->fa3_fsid.nfsuquad[0] = 0;
+ fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
+ fp->fa3_fileid.nfsuquad[0] = 0;
+ fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
+ txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
+ txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
+ txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
+ } else {
+ fp->fa_type = vtonfsv2_type(vap->va_type);
+ fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
+ fp->fa2_size = txdr_unsigned(vap->va_size);
+ fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
+ if (vap->va_type == VFIFO)
+ fp->fa2_rdev = 0xffffffff;
+ else
+ fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
+ fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
+ fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
+ fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
+ txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
+ txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
+ txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
+ }
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+int
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct sockaddr *nam;
+ int *rdonlyp;
+ int kerbflag;
+ int pubflag;
+{
+ struct proc *p = curproc; /* XXX */
+ register struct mount *mp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+#ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */
+ struct sockaddr_int *saddr;
+#endif
+
+ *vpp = (struct vnode *)0;
+
+ if (nfs_ispublicfh(fhp)) {
+ if (!pubflag || !nfs_pub.np_valid)
+ return (ESTALE);
+ fhp = &nfs_pub.np_handle;
+ }
+
+ mp = vfs_getvfs(&fhp->fh_fsid);
+ if (!mp)
+ return (ESTALE);
+ error = VFS_CHECKEXP(mp, nam, &exflags, &credanon);
+ if (error)
+ return (error);
+ error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
+ if (error)
+ return (error);
+#ifdef MNT_EXNORESPORT
+ if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
+ saddr = (struct sockaddr_in *)nam;
+ if (saddr->sin_family == AF_INET &&
+ ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ }
+ }
+#endif
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ if (!kerbflag) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ }
+ } else if (kerbflag) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ cred->cr_ngroups = i;
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+
+ nfsrv_object_create(*vpp);
+
+ if (!lockflag)
+ VOP_UNLOCK(*vpp, 0, p);
+ return (0);
+}
+
+
+/*
+ * WebNFS: check if a filehandle is a public filehandle. For v3, this
+ * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
+ * transformed this to all zeroes in both cases, so check for it.
+ */
+int
+nfs_ispublicfh(fhp)
+ fhandle_t *fhp;
+{
+ char *cp = (char *)fhp;
+ int i;
+
+ for (i = 0; i < NFSX_V3FH; i++)
+ if (*cp++ != 0)
+ return (FALSE);
+ return (TRUE);
+}
+
+#endif /* NFS_NOSERVER */
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+int
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct sockaddr *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = (struct sockaddr_in *)nam;
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = (struct sockaddr_iso *)nam;
+ isoaddr2 = (struct sockaddr_iso *)haddr->had_nam;
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
+
+static nfsuint64 nfs_nullcookie = { { 0, 0 } };
+/*
+ * This function finds the directory cookie that corresponds to the
+ * logical byte offset given.
+ */
+nfsuint64 *
+nfs_getcookie(np, off, add)
+ register struct nfsnode *np;
+ off_t off;
+ int add;
+{
+ register struct nfsdmap *dp, *dp2;
+ register int pos;
+
+ pos = (uoff_t)off / NFS_DIRBLKSIZ;
+ if (pos == 0 || off < 0) {
+#ifdef DIAGNOSTIC
+ if (add)
+ panic("nfs getcookie add at <= 0");
+#endif
+ return (&nfs_nullcookie);
+ }
+ pos--;
+ dp = np->n_cookies.lh_first;
+ if (!dp) {
+ if (add) {
+ MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
+ M_NFSDIROFF, M_WAITOK);
+ dp->ndm_eocookie = 0;
+ LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
+ } else
+ return ((nfsuint64 *)0);
+ }
+ while (pos >= NFSNUMCOOKIES) {
+ pos -= NFSNUMCOOKIES;
+ if (dp->ndm_list.le_next) {
+ if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
+ pos >= dp->ndm_eocookie)
+ return ((nfsuint64 *)0);
+ dp = dp->ndm_list.le_next;
+ } else if (add) {
+ MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
+ M_NFSDIROFF, M_WAITOK);
+ dp2->ndm_eocookie = 0;
+ LIST_INSERT_AFTER(dp, dp2, ndm_list);
+ dp = dp2;
+ } else
+ return ((nfsuint64 *)0);
+ }
+ if (pos >= dp->ndm_eocookie) {
+ if (add)
+ dp->ndm_eocookie = pos + 1;
+ else
+ return ((nfsuint64 *)0);
+ }
+ return (&dp->ndm_cookies[pos]);
+}
+
+/*
+ * Invalidate cached directory information, except for the actual directory
+ * blocks (which are invalidated separately).
+ * Done mainly to avoid the use of stale offset cookies.
+ */
+void
+nfs_invaldir(vp)
+ register struct vnode *vp;
+{
+ register struct nfsnode *np = VTONFS(vp);
+
+#ifdef DIAGNOSTIC
+ if (vp->v_type != VDIR)
+ panic("nfs: invaldir not dir");
+#endif
+ np->n_direofoffset = 0;
+ np->n_cookieverf.nfsuquad[0] = 0;
+ np->n_cookieverf.nfsuquad[1] = 0;
+ if (np->n_cookies.lh_first)
+ np->n_cookies.lh_first->ndm_eocookie = 0;
+}
+
+/*
+ * The write verifier has changed (probably due to a server reboot), so all
+ * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
+ * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
+ * and B_CLUSTEROK flags. Once done the new write verifier can be set for the
+ * mount point.
+ *
+ * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
+ * writes are not clusterable.
+ */
+void
+nfs_clearcommit(mp)
+ struct mount *mp;
+{
+ register struct vnode *vp, *nvp;
+ register struct buf *bp, *nbp;
+ int s;
+
+ s = splbio();
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp) /* Paranoia */
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if (BUF_REFCNT(bp) == 0 &&
+ (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
+ == (B_DELWRI | B_NEEDCOMMIT))
+ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
+ }
+ }
+ splx(s);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Map errnos to NFS error numbers. For Version 3 also filter out error
+ * numbers not specified for the associated procedure.
+ */
+int
+nfsrv_errmap(nd, err)
+ struct nfsrv_descript *nd;
+ register int err;
+{
+ register short *defaulterrp, *errp;
+
+ if (nd->nd_flag & ND_NFSV3) {
+ if (nd->nd_procnum <= NFSPROC_COMMIT) {
+ errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
+ while (*++errp) {
+ if (*errp == err)
+ return (err);
+ else if (*errp > err)
+ break;
+ }
+ return ((int)*defaulterrp);
+ } else
+ return (err & 0xffff);
+ }
+ if (err <= ELAST)
+ return ((int)nfsrv_v2errmap[err - 1]);
+ return (NFSERR_IO);
+}
+
+int
+nfsrv_object_create(vp)
+ struct vnode *vp;
+{
+
+ if (vp == NULL || vp->v_type != VREG)
+ return (1);
+ return (vfs_object_create(vp, curproc,
+ curproc ? curproc->p_ucred : NULL));
+}
+
+/*
+ * Sort the group list in increasing numerical order.
+ * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
+ * that used to be here.)
+ */
+void
+nfsrvw_sort(list, num)
+ register gid_t *list;
+ register int num;
+{
+ register int i, j;
+ gid_t v;
+
+ /* Insertion sort. */
+ for (i = 1; i < num; i++) {
+ v = list[i];
+ /* find correct slot for value v, moving others up */
+ for (j = i; --j >= 0 && v < list[j];)
+ list[j + 1] = list[j];
+ list[j + 1] = v;
+ }
+}
+
+/*
+ * copy credentials making sure that the result can be compared with bcmp().
+ */
+void
+nfsrv_setcred(incred, outcred)
+ register struct ucred *incred, *outcred;
+{
+ register int i;
+
+ bzero((caddr_t)outcred, sizeof (struct ucred));
+ outcred->cr_ref = 1;
+ outcred->cr_uid = incred->cr_uid;
+ outcred->cr_ngroups = incred->cr_ngroups;
+ for (i = 0; i < incred->cr_ngroups; i++)
+ outcred->cr_groups[i] = incred->cr_groups[i];
+ nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
+}
+#endif /* NFS_NOSERVER */
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
new file mode 100644
index 0000000..da18842
--- /dev/null
+++ b/sys/nfs/nfs_syscalls.c
@@ -0,0 +1,1199 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <vm/vm_zone.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/xdr_subs.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+static MALLOC_DEFINE(M_NFSSVC, "NFS srvsock", "Nfs server structure");
+
+/* Global defs. */
+extern int32_t (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
+ struct nfssvc_sock *slp,
+ struct proc *procp,
+ struct mbuf **mreqp));
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+extern struct nfsstats nfsstats;
+extern int nfsrvw_procrastinate;
+extern int nfsrvw_procrastinate_v3;
+static int nuidhash_max = NFS_MAXUIDHASH;
+
+#ifndef NFS_NOSERVER
+static void nfsrv_zapsock __P((struct nfssvc_sock *slp));
+#endif
+static int nfssvc_iod __P((struct proc *));
+
+#define TRUE 1
+#define FALSE 0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+
+SYSCTL_DECL(_vfs_nfs);
+
+#ifndef NFS_NOSERVER
+int nfsd_waiting = 0;
+static struct nfsdrt nfsdrt;
+static int nfs_numnfsd = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static void nfsd_rt __P((int sotype, struct nfsrv_descript *nd,
+ int cacherep));
+static int nfssvc_addsock __P((struct file *, struct sockaddr *,
+ struct proc *));
+static int nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *));
+
+static int nfs_privport = 0;
+SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "");
+SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, "");
+SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, "");
+
+/*
+ * NFS server system calls
+ */
+
+#endif /* NFS_NOSERVER */
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct nfssvc_args {
+ int flag;
+ caddr_t argp;
+};
+#endif
+int
+nfssvc(p, uap)
+ struct proc *p;
+ register struct nfssvc_args *uap;
+{
+#ifndef NFS_NOSERVER
+ struct nameidata nd;
+ struct file *fp;
+ struct sockaddr *nam;
+ struct nfsd_args nfsdarg;
+ struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+ struct nfsd_cargs ncd;
+ struct nfsd *nfsd;
+ struct nfssvc_sock *slp;
+ struct nfsuid *nuidp;
+ struct nfsmount *nmp;
+#endif /* NFS_NOSERVER */
+ int error;
+
+ /*
+ * Must be super user
+ */
+ error = suser(p);
+ if(error)
+ return (error);
+ while (nfssvc_sockhead_flag & SLP_INIT) {
+ nfssvc_sockhead_flag |= SLP_WANTINIT;
+ (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+ }
+ if (uap->flag & NFSSVC_BIOD)
+ error = nfssvc_iod(p);
+#ifdef NFS_NOSERVER
+ else
+ error = ENXIO;
+#else /* !NFS_NOSERVER */
+ else if (uap->flag & NFSSVC_MNTD) {
+ error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
+ if (error)
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+ ncd.ncd_dirp, p);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if ((nd.ni_vp->v_flag & VROOT) == 0)
+ error = EINVAL;
+ nmp = VFSTONFS(nd.ni_vp->v_mount);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ if ((nmp->nm_state & NFSSTA_MNTD) &&
+ (uap->flag & NFSSVC_GOTAUTH) == 0)
+ return (0);
+ nmp->nm_state |= NFSSTA_MNTD;
+ error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+ uap->argp, p);
+ } else if (uap->flag & NFSSVC_ADDSOCK) {
+ error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
+ if (error)
+ return (error);
+ error = getsock(p->p_fd, nfsdarg.sock, &fp);
+ if (error)
+ return (error);
+ /*
+ * Get the client address for connected sockets.
+ */
+ if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+ nam = (struct sockaddr *)0;
+ else {
+ error = getsockaddr(&nam, nfsdarg.name,
+ nfsdarg.namelen);
+ if (error)
+ return (error);
+ }
+ error = nfssvc_addsock(fp, nam, p);
+ } else {
+ error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
+ if (error)
+ return (error);
+ if ((uap->flag & NFSSVC_AUTHIN) &&
+ ((nfsd = nsd->nsd_nfsd)) != NULL &&
+ (nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
+ slp = nfsd->nfsd_slp;
+
+ /*
+ * First check to see if another nfsd has already
+ * added this credential.
+ */
+ for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
+ nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
+ if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
+ (!nfsd->nfsd_nd->nd_nam2 ||
+ netaddr_match(NU_NETFAM(nuidp),
+ &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
+ break;
+ }
+ if (nuidp) {
+ nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
+ nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
+ } else {
+ /*
+ * Nope, so we will.
+ */
+ if (slp->ns_numuids < nuidhash_max) {
+ slp->ns_numuids++;
+ nuidp = (struct nfsuid *)
+ malloc(sizeof (struct nfsuid), M_NFSUID,
+ M_WAITOK);
+ } else
+ nuidp = (struct nfsuid *)0;
+ if ((slp->ns_flag & SLP_VALID) == 0) {
+ if (nuidp)
+ free((caddr_t)nuidp, M_NFSUID);
+ } else {
+ if (nuidp == (struct nfsuid *)0) {
+ nuidp = slp->ns_uidlruhead.tqh_first;
+ LIST_REMOVE(nuidp, nu_hash);
+ TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
+ nu_lru);
+ if (nuidp->nu_flag & NU_NAM)
+ FREE(nuidp->nu_nam, M_SONAME);
+ }
+ nuidp->nu_flag = 0;
+ nuidp->nu_cr = nsd->nsd_cr;
+ if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+ nuidp->nu_cr.cr_ngroups = NGROUPS;
+ nuidp->nu_cr.cr_ref = 1;
+ nuidp->nu_timestamp = nsd->nsd_timestamp;
+ nuidp->nu_expire = time_second + nsd->nsd_ttl;
+ /*
+ * and save the session key in nu_key.
+ */
+ bcopy(nsd->nsd_key, nuidp->nu_key,
+ sizeof (nsd->nsd_key));
+ if (nfsd->nfsd_nd->nd_nam2) {
+ struct sockaddr_in *saddr;
+
+ saddr = (struct sockaddr_in *)
+ nfsd->nfsd_nd->nd_nam2;
+ switch (saddr->sin_family) {
+ case AF_INET:
+ nuidp->nu_flag |= NU_INETADDR;
+ nuidp->nu_inetaddr =
+ saddr->sin_addr.s_addr;
+ break;
+ case AF_ISO:
+ default:
+ nuidp->nu_flag |= NU_NAM;
+ nuidp->nu_nam =
+ dup_sockaddr(nfsd->nfsd_nd->
+ nd_nam2, 1);
+ break;
+ };
+ }
+ TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
+ nu_lru);
+ LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
+ nuidp, nu_hash);
+ nfsrv_setcred(&nuidp->nu_cr,
+ &nfsd->nfsd_nd->nd_cr);
+ nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
+ }
+ }
+ }
+ if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+ nfsd->nfsd_flag |= NFSD_AUTHFAIL;
+ error = nfssvc_nfsd(nsd, uap->argp, p);
+ }
+#endif /* NFS_NOSERVER */
+ if (error == EINTR || error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+#ifndef NFS_NOSERVER
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+static int
+nfssvc_addsock(fp, mynam, p)
+ struct file *fp;
+ struct sockaddr *mynam;
+ struct proc *p;
+{
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ int error, s;
+
+ so = (struct socket *)fp->f_data;
+#if 0
+ tslp = (struct nfssvc_sock *)0;
+ /*
+ * Add it to the list, as required.
+ */
+ if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+ tslp = nfs_udpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ if (mynam != NULL)
+ FREE(mynam, M_SONAME);
+ return (EPERM);
+ }
+#ifdef ISO
+ } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ tslp = nfs_cltpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ if (mynam != NULL)
+ FREE(mynam, M_SONAME);
+ return (EPERM);
+ }
+#endif /* ISO */
+ }
+#endif
+ if (so->so_type == SOCK_STREAM)
+ siz = NFS_MAXPACKET + sizeof (u_long);
+ else
+ siz = NFS_MAXPACKET;
+ error = soreserve(so, siz, siz);
+ if (error) {
+ if (mynam != NULL)
+ FREE(mynam, M_SONAME);
+ return (error);
+ }
+
+ /*
+ * Set protocol specific options { for now TCP only } and
+ * reserve some space. For datagram sockets, this can get called
+ * repeatedly for the same socket, but that isn't harmful.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ struct sockopt sopt;
+ int val;
+
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_KEEPALIVE;
+ sopt.sopt_val = &val;
+ sopt.sopt_valsize = sizeof val;
+ val = 1;
+ sosetopt(so, &sopt);
+ }
+ if (so->so_proto->pr_domain->dom_family == AF_INET &&
+ so->so_proto->pr_protocol == IPPROTO_TCP) {
+ struct sockopt sopt;
+ int val;
+
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = &val;
+ sopt.sopt_valsize = sizeof val;
+ val = 1;
+ sosetopt(so, &sopt);
+ }
+ so->so_rcv.sb_flags &= ~SB_NOINTR;
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_flags &= ~SB_NOINTR;
+ so->so_snd.sb_timeo = 0;
+
+ slp = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+ STAILQ_INIT(&slp->ns_rec);
+ TAILQ_INIT(&slp->ns_uidlruhead);
+ TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
+
+ slp->ns_so = so;
+ slp->ns_nam = mynam;
+ fp->f_count++;
+ slp->ns_fp = fp;
+ s = splnet();
+ so->so_upcallarg = (caddr_t)slp;
+ so->so_upcall = nfsrv_rcv;
+ so->so_rcv.sb_flags |= SB_UPCALL;
+ slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+ nfsrv_wakenfsd(slp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+static int
+nfssvc_nfsd(nsd, argp, p)
+ struct nfsd_srvargs *nsd;
+ caddr_t argp;
+ struct proc *p;
+{
+ register int siz;
+ register struct nfssvc_sock *slp;
+ struct nfsd *nfsd = nsd->nsd_nfsd;
+ struct nfsrv_descript *nd = NULL;
+ struct mbuf *m, *mreq;
+ int error = 0, cacherep, s, sotype, writes_todo;
+ int procrastinate;
+ u_quad_t cur_usec;
+
+#ifndef nolint
+ cacherep = RC_DOIT;
+ writes_todo = 0;
+#endif
+ if (nfsd == (struct nfsd *)0) {
+ nsd->nsd_nfsd = nfsd = (struct nfsd *)
+ malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+ bzero((caddr_t)nfsd, sizeof (struct nfsd));
+ s = splnet();
+ nfsd->nfsd_procp = p;
+ TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
+ nfs_numnfsd++;
+ } else
+ s = splnet();
+
+ /*
+ * Loop getting rpc requests until SIGKILL.
+ */
+ for (;;) {
+ if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
+ while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
+ nfsd->nfsd_flag |= NFSD_WAITING;
+ nfsd_waiting++;
+ error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
+ "nfsd", 0);
+ nfsd_waiting--;
+ if (error)
+ goto done;
+ }
+ if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
+ for (slp = nfssvc_sockhead.tqh_first; slp != 0;
+ slp = slp->ns_chain.tqe_next) {
+ if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+ == (SLP_VALID | SLP_DOREC)) {
+ slp->ns_flag &= ~SLP_DOREC;
+ slp->ns_sref++;
+ nfsd->nfsd_slp = slp;
+ break;
+ }
+ }
+ if (slp == 0)
+ nfsd_head_flag &= ~NFSD_CHECKSLP;
+ }
+ if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
+ continue;
+ if (slp->ns_flag & SLP_VALID) {
+ if (slp->ns_flag & SLP_DISCONN)
+ nfsrv_zapsock(slp);
+ else if (slp->ns_flag & SLP_NEEDQ) {
+ slp->ns_flag &= ~SLP_NEEDQ;
+ (void) nfs_slplock(slp, 1);
+ nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+ M_WAIT);
+ nfs_slpunlock(slp);
+ }
+ error = nfsrv_dorec(slp, nfsd, &nd);
+ cur_usec = nfs_curusec();
+ if (error && slp->ns_tq.lh_first &&
+ slp->ns_tq.lh_first->nd_time <= cur_usec) {
+ error = 0;
+ cacherep = RC_DOIT;
+ writes_todo = 1;
+ } else
+ writes_todo = 0;
+ nfsd->nfsd_flag |= NFSD_REQINPROG;
+ }
+ } else {
+ error = 0;
+ slp = nfsd->nfsd_slp;
+ }
+ if (error || (slp->ns_flag & SLP_VALID) == 0) {
+ if (nd) {
+ free((caddr_t)nd, M_NFSRVDESC);
+ nd = NULL;
+ }
+ nfsd->nfsd_slp = (struct nfssvc_sock *)0;
+ nfsd->nfsd_flag &= ~NFSD_REQINPROG;
+ nfsrv_slpderef(slp);
+ continue;
+ }
+ splx(s);
+ sotype = slp->ns_so->so_type;
+ if (nd) {
+ getmicrotime(&nd->nd_starttime);
+ if (nd->nd_nam2)
+ nd->nd_nam = nd->nd_nam2;
+ else
+ nd->nd_nam = slp->ns_nam;
+
+ /*
+ * Check to see if authorization is needed.
+ */
+ if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
+ nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
+ nsd->nsd_haddr =
+ ((struct sockaddr_in *)
+ nd->nd_nam)->sin_addr.s_addr;
+ nsd->nsd_authlen = nfsd->nfsd_authlen;
+ nsd->nsd_verflen = nfsd->nfsd_verflen;
+ if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
+ nfsd->nfsd_authlen) &&
+ !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
+ nfsd->nfsd_verflen) &&
+ !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
+ return (ENEEDAUTH);
+ cacherep = RC_DROPIT;
+ } else
+ cacherep = nfsrv_getcache(nd, slp, &mreq);
+
+ /*
+ * Check for just starting up for NQNFS and send
+ * fake "try again later" replies to the NQNFS clients.
+ */
+ if (notstarted && nqnfsstarttime <= time_second) {
+ if (modify_flag) {
+ nqnfsstarttime = time_second + nqsrv_writeslack;
+ modify_flag = 0;
+ } else
+ notstarted = 0;
+ }
+ if (notstarted) {
+ if ((nd->nd_flag & ND_NQNFS) == 0)
+ cacherep = RC_DROPIT;
+ else if (nd->nd_procnum != NFSPROC_WRITE) {
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_TRYLATER;
+ cacherep = RC_DOIT;
+ } else
+ modify_flag = 1;
+ } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
+ nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ cacherep = RC_DOIT;
+ } else if (nfs_privport) {
+ /* Check if source port is privileged */
+ u_short port;
+ struct sockaddr *nam = nd->nd_nam;
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)nam;
+ port = ntohs(sin->sin_port);
+ if (port >= IPPORT_RESERVED &&
+ nd->nd_procnum != NFSPROC_NULL) {
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
+ cacherep = RC_DOIT;
+ printf("NFS request from unprivileged port (%s:%d)\n",
+ inet_ntoa(sin->sin_addr), port);
+ }
+ }
+
+ }
+
+ /*
+ * Loop to get all the write rpc relies that have been
+ * gathered together.
+ */
+ do {
+ switch (cacherep) {
+ case RC_DOIT:
+ if (nd && (nd->nd_flag & ND_NFSV3))
+ procrastinate = nfsrvw_procrastinate_v3;
+ else
+ procrastinate = nfsrvw_procrastinate;
+ if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
+ procrastinate > 0 && !notstarted))
+ error = nfsrv_writegather(&nd, slp,
+ nfsd->nfsd_procp, &mreq);
+ else
+ error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
+ slp, nfsd->nfsd_procp, &mreq);
+ if (mreq == NULL)
+ break;
+ if (error != 0 && error != NFSERR_RETVOID) {
+ if (nd->nd_procnum != NQNFSPROC_VACATED)
+ nfsstats.srv_errs++;
+ nfsrv_updatecache(nd, FALSE, mreq);
+ if (nd->nd_nam2)
+ FREE(nd->nd_nam2, M_SONAME);
+ break;
+ }
+ nfsstats.srvrpccnt[nd->nd_procnum]++;
+ nfsrv_updatecache(nd, TRUE, mreq);
+ nd->nd_mrep = (struct mbuf *)0;
+ case RC_REPLY:
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = mreq;
+ m->m_pkthdr.len = siz;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_int32_t *) = htonl(0x80000000 | siz);
+ }
+ if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
+ (void) nfs_slplock(slp, 1);
+ if (slp->ns_flag & SLP_VALID)
+ error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL);
+ else {
+ error = EPIPE;
+ m_freem(m);
+ }
+ if (nfsrtton)
+ nfsd_rt(sotype, nd, cacherep);
+ if (nd->nd_nam2)
+ FREE(nd->nd_nam2, M_SONAME);
+ if (nd->nd_mrep)
+ m_freem(nd->nd_mrep);
+ if (error == EPIPE)
+ nfsrv_zapsock(slp);
+ if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
+ nfs_slpunlock(slp);
+ if (error == EINTR || error == ERESTART) {
+ free((caddr_t)nd, M_NFSRVDESC);
+ nfsrv_slpderef(slp);
+ s = splnet();
+ goto done;
+ }
+ break;
+ case RC_DROPIT:
+ if (nfsrtton)
+ nfsd_rt(sotype, nd, cacherep);
+ m_freem(nd->nd_mrep);
+ if (nd->nd_nam2)
+ FREE(nd->nd_nam2, M_SONAME);
+ break;
+ };
+ if (nd) {
+ FREE((caddr_t)nd, M_NFSRVDESC);
+ nd = NULL;
+ }
+
+ /*
+ * Check to see if there are outstanding writes that
+ * need to be serviced.
+ */
+ cur_usec = nfs_curusec();
+ s = splsoftclock();
+ if (slp->ns_tq.lh_first &&
+ slp->ns_tq.lh_first->nd_time <= cur_usec) {
+ cacherep = RC_DOIT;
+ writes_todo = 1;
+ } else
+ writes_todo = 0;
+ splx(s);
+ } while (writes_todo);
+ s = splnet();
+ if (nfsrv_dorec(slp, nfsd, &nd)) {
+ nfsd->nfsd_flag &= ~NFSD_REQINPROG;
+ nfsd->nfsd_slp = NULL;
+ nfsrv_slpderef(slp);
+ }
+ }
+done:
+ TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
+ splx(s);
+ free((caddr_t)nfsd, M_NFSD);
+ nsd->nsd_nfsd = (struct nfsd *)0;
+ if (--nfs_numnfsd == 0)
+ nfsrv_init(TRUE); /* Reinitialize everything */
+ return (error);
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+static void
+nfsrv_zapsock(slp)
+ register struct nfssvc_sock *slp;
+{
+ register struct nfsuid *nuidp, *nnuidp;
+ register struct nfsrv_descript *nwp, *nnwp;
+ struct socket *so;
+ struct file *fp;
+ struct nfsrv_rec *rec;
+ int s;
+
+ slp->ns_flag &= ~SLP_ALLFLAGS;
+ fp = slp->ns_fp;
+ if (fp) {
+ slp->ns_fp = (struct file *)0;
+ so = slp->ns_so;
+ so->so_rcv.sb_flags &= ~SB_UPCALL;
+ so->so_upcall = NULL;
+ so->so_upcallarg = NULL;
+ soshutdown(so, 2);
+ closef(fp, (struct proc *)0);
+ if (slp->ns_nam)
+ FREE(slp->ns_nam, M_SONAME);
+ m_freem(slp->ns_raw);
+ while ((rec = STAILQ_FIRST(&slp->ns_rec)) != NULL) {
+ STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
+ if (rec->nr_address)
+ FREE(rec->nr_address, M_SONAME);
+ m_freem(rec->nr_packet);
+ free(rec, M_NFSRVDESC);
+ }
+ for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
+ nuidp = nnuidp) {
+ nnuidp = nuidp->nu_lru.tqe_next;
+ LIST_REMOVE(nuidp, nu_hash);
+ TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
+ if (nuidp->nu_flag & NU_NAM)
+ FREE(nuidp->nu_nam, M_SONAME);
+ free((caddr_t)nuidp, M_NFSUID);
+ }
+ s = splsoftclock();
+ for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
+ nnwp = nwp->nd_tq.le_next;
+ LIST_REMOVE(nwp, nd_tq);
+ free((caddr_t)nwp, M_NFSRVDESC);
+ }
+ LIST_INIT(&slp->ns_tq);
+ splx(s);
+ }
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+ register struct nfssvc_sock *slp;
+{
+ if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+ TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
+ free((caddr_t)slp, M_NFSSVC);
+ }
+}
+
+/*
+ * Lock a socket against others.
+ */
+int
+nfs_slplock(slp, wait)
+ register struct nfssvc_sock *slp;
+ int wait;
+{
+ int *statep = &slp->ns_solock;
+
+ if (!wait && (*statep & NFSSTA_SNDLOCK))
+ return(0); /* already locked, fail */
+ while (*statep & NFSSTA_SNDLOCK) {
+ *statep |= NFSSTA_WANTSND;
+ (void) tsleep((caddr_t)statep, PZERO - 1, "nfsslplck", 0);
+ }
+ *statep |= NFSSTA_SNDLOCK;
+ return (1);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_slpunlock(slp)
+ register struct nfssvc_sock *slp;
+{
+ int *statep = &slp->ns_solock;
+
+ if ((*statep & NFSSTA_SNDLOCK) == 0)
+ panic("nfs slpunlock");
+ *statep &= ~NFSSTA_SNDLOCK;
+ if (*statep & NFSSTA_WANTSND) {
+ *statep &= ~NFSSTA_WANTSND;
+ wakeup((caddr_t)statep);
+ }
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+ int terminating;
+{
+ register struct nfssvc_sock *slp, *nslp;
+
+ if (nfssvc_sockhead_flag & SLP_INIT)
+ panic("nfsd init");
+ nfssvc_sockhead_flag |= SLP_INIT;
+ if (terminating) {
+ for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) {
+ nslp = slp->ns_chain.tqe_next;
+ if (slp->ns_flag & SLP_VALID)
+ nfsrv_zapsock(slp);
+ TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
+ free((caddr_t)slp, M_NFSSVC);
+ }
+ nfsrv_cleancache(); /* And clear out server cache */
+ } else
+ nfs_pub.np_valid = 0;
+
+ TAILQ_INIT(&nfssvc_sockhead);
+ nfssvc_sockhead_flag &= ~SLP_INIT;
+ if (nfssvc_sockhead_flag & SLP_WANTINIT) {
+ nfssvc_sockhead_flag &= ~SLP_WANTINIT;
+ wakeup((caddr_t)&nfssvc_sockhead);
+ }
+
+ TAILQ_INIT(&nfsd_head);
+ nfsd_head_flag &= ~NFSD_CHECKSLP;
+
+#if 0
+ nfs_udpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+ STAILQ_INIT(&nfs_udpsock->ns_rec);
+ TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
+ TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
+
+ nfs_cltpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+ STAILQ_INIT(&nfs_cltpsock->ns_rec);
+ TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
+ TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
+#endif
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(sotype, nd, cacherep)
+ int sotype;
+ register struct nfsrv_descript *nd;
+ int cacherep;
+{
+ register struct drt *rt;
+
+ rt = &nfsdrt.drt[nfsdrt.pos];
+ if (cacherep == RC_DOIT)
+ rt->flag = 0;
+ else if (cacherep == RC_REPLY)
+ rt->flag = DRT_CACHEREPLY;
+ else
+ rt->flag = DRT_CACHEDROP;
+ if (sotype == SOCK_STREAM)
+ rt->flag |= DRT_TCP;
+ if (nd->nd_flag & ND_NQNFS)
+ rt->flag |= DRT_NQNFS;
+ else if (nd->nd_flag & ND_NFSV3)
+ rt->flag |= DRT_NFSV3;
+ rt->proc = nd->nd_procnum;
+ if (nd->nd_nam->sa_family == AF_INET)
+ rt->ipadr = ((struct sockaddr_in *)nd->nd_nam)->sin_addr.s_addr;
+ else
+ rt->ipadr = INADDR_ANY;
+ rt->resptime = nfs_curusec() - (nd->nd_starttime.tv_sec * 1000000 + nd->nd_starttime.tv_usec);
+ getmicrotime(&rt->tstamp);
+ nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
+#endif /* NFS_NOSERVER */
+
+static int nfs_defect = 0;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+static int
+nfssvc_iod(p)
+ struct proc *p;
+{
+ register struct buf *bp;
+ register int i, myiod;
+ struct nfsmount *nmp;
+ int error = 0;
+
+ /*
+ * Assign my position or return error if too many already running
+ */
+ myiod = -1;
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_asyncdaemon[i] == 0) {
+ nfs_asyncdaemon[i]++;
+ myiod = i;
+ break;
+ }
+ if (myiod == -1)
+ return (EBUSY);
+ nfs_numasync++;
+ /*
+ * Just loop around doin our stuff until SIGKILL
+ */
+ for (;;) {
+ while (((nmp = nfs_iodmount[myiod]) == NULL
+ || nmp->nm_bufq.tqh_first == NULL)
+ && error == 0) {
+ if (nmp)
+ nmp->nm_bufqiods--;
+ nfs_iodwant[myiod] = p;
+ nfs_iodmount[myiod] = NULL;
+ error = tsleep((caddr_t)&nfs_iodwant[myiod],
+ PWAIT | PCATCH, "nfsidl", 0);
+ }
+ if (error) {
+ nfs_asyncdaemon[myiod] = 0;
+ if (nmp)
+ nmp->nm_bufqiods--;
+ nfs_iodwant[myiod] = NULL;
+ nfs_iodmount[myiod] = NULL;
+ nfs_numasync--;
+ return (error);
+ }
+ while ((bp = nmp->nm_bufq.tqh_first) != NULL) {
+ /* Take one off the front of the list */
+ TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
+ nmp->nm_bufqlen--;
+ if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
+ nmp->nm_bufqwant = FALSE;
+ wakeup(&nmp->nm_bufq);
+ }
+ if (bp->b_flags & B_READ)
+ (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+ else
+ (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+ /*
+ * If there are more than one iod on this mount, then defect
+ * so that the iods can be shared out fairly between the mounts
+ */
+ if (nfs_defect && nmp->nm_bufqiods > 1) {
+ NFS_DPF(ASYNCIO,
+ ("nfssvc_iod: iod %d defecting from mount %p\n",
+ myiod, nmp));
+ nfs_iodmount[myiod] = NULL;
+ nmp->nm_bufqiods--;
+ break;
+ }
+ }
+ }
+}
+
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+int
+nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+ struct ucred *cred;
+ char **auth_str;
+ int *auth_len;
+ char *verf_str;
+ int *verf_len;
+ NFSKERBKEY_T key; /* return session key */
+{
+ int error = 0;
+
+ while ((nmp->nm_state & NFSSTA_WAITAUTH) == 0) {
+ nmp->nm_state |= NFSSTA_WANTAUTH;
+ (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+ "nfsauth1", 2 * hz);
+ error = nfs_sigintr(nmp, rep, rep->r_procp);
+ if (error) {
+ nmp->nm_state &= ~NFSSTA_WANTAUTH;
+ return (error);
+ }
+ }
+ nmp->nm_state &= ~(NFSSTA_WAITAUTH | NFSSTA_WANTAUTH);
+ nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+ nmp->nm_authlen = RPCAUTH_MAXSIZ;
+ nmp->nm_verfstr = verf_str;
+ nmp->nm_verflen = *verf_len;
+ nmp->nm_authuid = cred->cr_uid;
+ wakeup((caddr_t)&nmp->nm_authstr);
+
+ /*
+ * And wait for mount_nfs to do its stuff.
+ */
+ while ((nmp->nm_state & NFSSTA_HASAUTH) == 0 && error == 0) {
+ (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+ "nfsauth2", 2 * hz);
+ error = nfs_sigintr(nmp, rep, rep->r_procp);
+ }
+ if (nmp->nm_state & NFSSTA_AUTHERR) {
+ nmp->nm_state &= ~NFSSTA_AUTHERR;
+ error = EAUTH;
+ }
+ if (error)
+ free((caddr_t)*auth_str, M_TEMP);
+ else {
+ *auth_len = nmp->nm_authlen;
+ *verf_len = nmp->nm_verflen;
+ bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
+ }
+ nmp->nm_state &= ~NFSSTA_HASAUTH;
+ nmp->nm_state |= NFSSTA_WAITAUTH;
+ if (nmp->nm_state & NFSSTA_WANTAUTH) {
+ nmp->nm_state &= ~NFSSTA_WANTAUTH;
+ wakeup((caddr_t)&nmp->nm_authtype);
+ }
+ return (error);
+}
+
+/*
+ * Get a nickname authenticator and verifier.
+ */
+int
+nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len)
+ struct nfsmount *nmp;
+ struct ucred *cred;
+ char **auth_str;
+ int *auth_len;
+ char *verf_str;
+ int verf_len;
+{
+ register struct nfsuid *nuidp;
+ register u_int32_t *nickp, *verfp;
+ struct timeval ktvin, ktvout;
+
+#ifdef DIAGNOSTIC
+ if (verf_len < (4 * NFSX_UNSIGNED))
+ panic("nfs_getnickauth verf too small");
+#endif
+ for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
+ nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
+ if (nuidp->nu_cr.cr_uid == cred->cr_uid)
+ break;
+ }
+ if (!nuidp || nuidp->nu_expire < time_second)
+ return (EACCES);
+
+ /*
+ * Move to the end of the lru list (end of lru == most recently used).
+ */
+ TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
+ TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
+
+ nickp = (u_int32_t *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
+ *nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
+ *nickp = txdr_unsigned(nuidp->nu_nickname);
+ *auth_str = (char *)nickp;
+ *auth_len = 2 * NFSX_UNSIGNED;
+
+ /*
+ * Now we must encrypt the verifier and package it up.
+ */
+ verfp = (u_int32_t *)verf_str;
+ *verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
+ if (time_second > nuidp->nu_timestamp.tv_sec ||
+ (time_second == nuidp->nu_timestamp.tv_sec &&
+ time_second > nuidp->nu_timestamp.tv_usec))
+ getmicrotime(&nuidp->nu_timestamp);
+ else
+ nuidp->nu_timestamp.tv_usec++;
+ ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
+ ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
+
+ /*
+ * Now encrypt the timestamp verifier in ecb mode using the session
+ * key.
+ */
+#ifdef NFSKERB
+ XXX
+#endif
+
+ *verfp++ = ktvout.tv_sec;
+ *verfp++ = ktvout.tv_usec;
+ *verfp = 0;
+ return (0);
+}
+
+/*
+ * Save the current nickname in a hash list entry on the mount point.
+ */
+int
+nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
+ register struct nfsmount *nmp;
+ struct ucred *cred;
+ int len;
+ NFSKERBKEY_T key;
+ struct mbuf **mdp;
+ char **dposp;
+ struct mbuf *mrep;
+{
+ register struct nfsuid *nuidp;
+ register u_int32_t *tl;
+ register int32_t t1;
+ struct mbuf *md = *mdp;
+ struct timeval ktvin, ktvout;
+ u_int32_t nick;
+ char *dpos = *dposp, *cp2;
+ int deltasec, error = 0;
+
+ if (len == (3 * NFSX_UNSIGNED)) {
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ ktvin.tv_sec = *tl++;
+ ktvin.tv_usec = *tl++;
+ nick = fxdr_unsigned(u_int32_t, *tl);
+
+ /*
+ * Decrypt the timestamp in ecb mode.
+ */
+#ifdef NFSKERB
+ XXX
+#endif
+ ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
+ ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
+ deltasec = time_second - ktvout.tv_sec;
+ if (deltasec < 0)
+ deltasec = -deltasec;
+ /*
+ * If ok, add it to the hash list for the mount point.
+ */
+ if (deltasec <= NFS_KERBCLOCKSKEW) {
+ if (nmp->nm_numuids < nuidhash_max) {
+ nmp->nm_numuids++;
+ nuidp = (struct nfsuid *)
+ malloc(sizeof (struct nfsuid), M_NFSUID,
+ M_WAITOK);
+ } else {
+ nuidp = nmp->nm_uidlruhead.tqh_first;
+ LIST_REMOVE(nuidp, nu_hash);
+ TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
+ nu_lru);
+ }
+ nuidp->nu_flag = 0;
+ nuidp->nu_cr.cr_uid = cred->cr_uid;
+ nuidp->nu_expire = time_second + NFS_KERBTTL;
+ nuidp->nu_timestamp = ktvout;
+ nuidp->nu_nickname = nick;
+ bcopy(key, nuidp->nu_key, sizeof (key));
+ TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
+ nu_lru);
+ LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
+ nuidp, nu_hash);
+ }
+ } else
+ nfsm_adv(nfsm_rndup(len));
+nfsmout:
+ *mdp = md;
+ *dposp = dpos;
+ return (error);
+}
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
new file mode 100644
index 0000000..9ddb428
--- /dev/null
+++ b/sys/nfs/nfs_vfsops.c
@@ -0,0 +1,1078 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
+ * $FreeBSD$
+ */
+
+#include "opt_bootp.h"
+
+#include <sys/param.h>
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsdiskless.h>
+#include <nfs/nqnfs.h>
+
+extern int nfs_mountroot __P((struct mount *mp));
+
+extern int nfs_ticks;
+
+MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header");
+MALLOC_DEFINE(M_NFSBIGFH, "NFSV3 bigfh", "NFS version 3 file handle");
+MALLOC_DEFINE(M_NFSD, "NFS daemon", "Nfs server daemon structure");
+MALLOC_DEFINE(M_NFSDIROFF, "NFSV3 diroff", "NFS directory offset data");
+MALLOC_DEFINE(M_NFSRVDESC, "NFSV3 srvdesc", "NFS server socket descriptor");
+MALLOC_DEFINE(M_NFSUID, "NFS uid", "Nfs uid mapping structure");
+MALLOC_DEFINE(M_NQLEASE, "NQNFS Lease", "Nqnfs lease");
+MALLOC_DEFINE(M_NFSHASH, "NFS hash", "NFS hash tables");
+
+vm_zone_t nfsmount_zone;
+
+struct nfsstats nfsstats;
+SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
+SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD,
+ &nfsstats, nfsstats, "");
+#ifdef NFS_DEBUG
+int nfs_debug;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "");
+#endif
+
+static int nfs_iosize __P((struct nfsmount *nmp));
+static void nfs_decode_args __P((struct nfsmount *nmp,
+ struct nfs_args *argp));
+static int mountnfs __P((struct nfs_args *,struct mount *,
+ struct sockaddr *,char *,char *,struct vnode **));
+static int nfs_mount __P(( struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+static int nfs_unmount __P(( struct mount *mp, int mntflags,
+ struct proc *p));
+static int nfs_root __P(( struct mount *mp, struct vnode **vpp));
+static int nfs_statfs __P(( struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+static int nfs_sync __P(( struct mount *mp, int waitfor,
+ struct ucred *cred, struct proc *p));
+
+/*
+ * nfs vfs operations.
+ */
+static struct vfsops nfs_vfsops = {
+ nfs_mount,
+ vfs_stdstart,
+ nfs_unmount,
+ nfs_root,
+ vfs_stdquotactl,
+ nfs_statfs,
+ nfs_sync,
+ vfs_stdvget,
+ vfs_stdfhtovp, /* shouldn't happen */
+ vfs_stdcheckexp,
+ vfs_stdvptofh, /* shouldn't happen */
+ nfs_init,
+ nfs_uninit,
+ vfs_stdextattrctl,
+};
+VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
+
+/*
+ * This structure must be filled in by a primary bootstrap or bootstrap
+ * server for a diskless/dataless machine. It is initialized below just
+ * to ensure that it is allocated to initialized data (.data not .bss).
+ */
+struct nfs_diskless nfs_diskless = { { { 0 } } };
+struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
+int nfs_diskless_valid = 0;
+
+SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
+ &nfs_diskless_valid, 0, "");
+
+SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
+ nfsv3_diskless.root_hostnam, 0, "");
+
+SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
+ &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
+ "%Ssockaddr_in", "");
+
+SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_swappath, CTLFLAG_RD,
+ nfsv3_diskless.swap_hostnam, 0, "");
+
+SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_swapaddr, CTLFLAG_RD,
+ &nfsv3_diskless.swap_saddr, sizeof nfsv3_diskless.swap_saddr,
+ "%Ssockaddr_in","");
+
+
+void nfsargs_ntoh __P((struct nfs_args *));
+static int nfs_mountdiskless __P((char *, char *, int,
+ struct sockaddr_in *, struct nfs_args *,
+ struct proc *, struct vnode **,
+ struct mount **));
+static void nfs_convert_diskless __P((void));
+static void nfs_convert_oargs __P((struct nfs_args *args,
+ struct onfs_args *oargs));
+
+static int
+nfs_iosize(nmp)
+ struct nfsmount* nmp;
+{
+ int iosize;
+
+ /*
+ * Calculate the size used for io buffers. Use the larger
+ * of the two sizes to minimise nfs requests but make sure
+ * that it is at least one VM page to avoid wasting buffer
+ * space.
+ */
+ iosize = max(nmp->nm_rsize, nmp->nm_wsize);
+ if (iosize < PAGE_SIZE) iosize = PAGE_SIZE;
+ return iosize;
+}
+
+static void
+nfs_convert_oargs(args, oargs)
+ struct nfs_args *args;
+ struct onfs_args *oargs;
+{
+ args->version = NFS_ARGSVERSION;
+ args->addr = oargs->addr;
+ args->addrlen = oargs->addrlen;
+ args->sotype = oargs->sotype;
+ args->proto = oargs->proto;
+ args->fh = oargs->fh;
+ args->fhsize = oargs->fhsize;
+ args->flags = oargs->flags;
+ args->wsize = oargs->wsize;
+ args->rsize = oargs->rsize;
+ args->readdirsize = oargs->readdirsize;
+ args->timeo = oargs->timeo;
+ args->retrans = oargs->retrans;
+ args->maxgrouplist = oargs->maxgrouplist;
+ args->readahead = oargs->readahead;
+ args->leaseterm = oargs->leaseterm;
+ args->deadthresh = oargs->deadthresh;
+ args->hostname = oargs->hostname;
+}
+
+static void
+nfs_convert_diskless()
+{
+ bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
+ sizeof(struct ifaliasreq));
+ bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
+ sizeof(struct sockaddr_in));
+ nfs_convert_oargs(&nfsv3_diskless.swap_args,&nfs_diskless.swap_args);
+ nfsv3_diskless.swap_fhsize = NFSX_V2FH;
+ bcopy(nfs_diskless.swap_fh,nfsv3_diskless.swap_fh,NFSX_V2FH);
+ bcopy(&nfs_diskless.swap_saddr,&nfsv3_diskless.swap_saddr,
+ sizeof(struct sockaddr_in));
+ bcopy(nfs_diskless.swap_hostnam,nfsv3_diskless.swap_hostnam, MNAMELEN);
+ nfsv3_diskless.swap_nblks = nfs_diskless.swap_nblks;
+ bcopy(&nfs_diskless.swap_ucred, &nfsv3_diskless.swap_ucred,
+ sizeof(struct ucred));
+ nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
+ nfsv3_diskless.root_fhsize = NFSX_V2FH;
+ bcopy(nfs_diskless.root_fh,nfsv3_diskless.root_fh,NFSX_V2FH);
+ bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
+ sizeof(struct sockaddr_in));
+ bcopy(nfs_diskless.root_hostnam,nfsv3_diskless.root_hostnam, MNAMELEN);
+ nfsv3_diskless.root_time = nfs_diskless.root_time;
+ bcopy(nfs_diskless.my_hostnam,nfsv3_diskless.my_hostnam,
+ MAXHOSTNAMELEN);
+ nfs_diskless_valid = 3;
+}
+
+/*
+ * nfs statfs call
+ */
+int
+nfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ register struct nfs_statfs *sfp;
+ register caddr_t cp;
+ register u_int32_t *tl;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ struct nfsmount *nmp = VFSTONFS(mp);
+ int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct ucred *cred;
+ struct nfsnode *np;
+ u_quad_t tquad;
+
+#ifndef nolint
+ sfp = (struct nfs_statfs *)0;
+#endif
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ if (error)
+ return (error);
+ vp = NFSTOV(np);
+ cred = crget();
+ cred->cr_ngroups = 1;
+ if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (void)nfs_fsinfo(nmp, vp, cred, p);
+ nfsstats.rpccnt[NFSPROC_FSSTAT]++;
+ nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
+ nfsm_fhtom(vp, v3);
+ nfsm_request(vp, NFSPROC_FSSTAT, p, cred);
+ if (v3)
+ nfsm_postop_attr(vp, retattr);
+ if (error) {
+ if (mrep != NULL)
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
+ sbp->f_flags = nmp->nm_flag;
+ sbp->f_iosize = nfs_iosize(nmp);
+ if (v3) {
+ sbp->f_bsize = NFS_FABLKSIZE;
+ tquad = fxdr_hyper(&sfp->sf_tbytes);
+ sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
+ tquad = fxdr_hyper(&sfp->sf_fbytes);
+ sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
+ tquad = fxdr_hyper(&sfp->sf_abytes);
+ sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
+ sbp->f_files = (fxdr_unsigned(int32_t,
+ sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
+ sbp->f_ffree = (fxdr_unsigned(int32_t,
+ sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
+ } else {
+ sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
+ sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
+ sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
+ sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
+ sbp->f_files = 0;
+ sbp->f_ffree = 0;
+ }
+ if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ nfsm_reqdone;
+ vput(vp);
+ crfree(cred);
+ return (error);
+}
+
+/*
+ * nfs version 3 fsinfo rpc call
+ */
+int
+nfs_fsinfo(nmp, vp, cred, p)
+ register struct nfsmount *nmp;
+ register struct vnode *vp;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct nfsv3_fsinfo *fsp;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ register u_int32_t *tl, pref, max;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, retattr;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ u_int64_t maxfsize;
+
+ nfsstats.rpccnt[NFSPROC_FSINFO]++;
+ nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
+ nfsm_fhtom(vp, 1);
+ nfsm_request(vp, NFSPROC_FSINFO, p, cred);
+ nfsm_postop_attr(vp, retattr);
+ if (!error) {
+ nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
+ pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
+ if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
+ nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
+ ~(NFS_FABLKSIZE - 1);
+ max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
+ if (max < nmp->nm_wsize && max > 0) {
+ nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
+ if (nmp->nm_wsize == 0)
+ nmp->nm_wsize = max;
+ }
+ pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
+ if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
+ nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
+ ~(NFS_FABLKSIZE - 1);
+ max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
+ if (max < nmp->nm_rsize && max > 0) {
+ nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
+ if (nmp->nm_rsize == 0)
+ nmp->nm_rsize = max;
+ }
+ pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
+ if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
+ nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
+ ~(NFS_DIRBLKSIZ - 1);
+ if (max < nmp->nm_readdirsize && max > 0) {
+ nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
+ if (nmp->nm_readdirsize == 0)
+ nmp->nm_readdirsize = max;
+ }
+ maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
+ if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
+ nmp->nm_maxfilesize = maxfsize;
+ nmp->nm_state |= NFSSTA_GOTFSINFO;
+ }
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * Mount a remote root fs via. nfs. This depends on the info in the
+ * nfs_diskless structure that has been filled in properly by some primary
+ * bootstrap.
+ * It goes something like this:
+ * - do enough of "ifconfig" by calling ifioctl() so that the system
+ * can talk to the server
+ * - If nfs_diskless.mygateway is filled in, use that address as
+ * a default gateway.
+ * - build the rootfs mount point and call mountnfs() to do the rest.
+ */
+int
+nfs_mountroot(mp)
+ struct mount *mp;
+{
+ struct mount *swap_mp;
+ struct nfsv3_diskless *nd = &nfsv3_diskless;
+ struct socket *so;
+ struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+ int error, i;
+ u_long l;
+ char buf[128];
+
+#if defined(BOOTP_NFSROOT) && defined(BOOTP)
+ bootpc_init(); /* use bootp to get nfs_diskless filled in */
+#endif
+
+ /*
+ * XXX time must be non-zero when we init the interface or else
+ * the arp code will wedge...
+ */
+ while (time_second == 0)
+ tsleep(&time_second, PZERO+8, "arpkludge", 10);
+
+ if (nfs_diskless_valid==1)
+ nfs_convert_diskless();
+
+ /*
+ * XXX splnet, so networks will receive...
+ */
+ splnet();
+
+#ifdef notyet
+ /* Set up swap credentials. */
+ proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid);
+ proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid);
+ if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) >
+ NGROUPS)
+ proc0.p_ucred->cr_ngroups = NGROUPS;
+ for (i = 0; i < proc0.p_ucred->cr_ngroups; i++)
+ proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]);
+#endif
+
+ /*
+ * Do enough of ifconfig(8) so that the critical net interface can
+ * talk to the server.
+ */
+ error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, p);
+ if (error)
+ panic("nfs_mountroot: socreate(%04x): %d",
+ nd->myif.ifra_addr.sa_family, error);
+
+ /*
+ * We might not have been told the right interface, so we pass
+ * over the first ten interfaces of the same kind, until we get
+ * one of them configured.
+ */
+
+ for (i = strlen(nd->myif.ifra_name) - 1;
+ nd->myif.ifra_name[i] >= '0' &&
+ nd->myif.ifra_name[i] <= '9';
+ nd->myif.ifra_name[i] ++) {
+ error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p);
+ if(!error)
+ break;
+ }
+ if (error)
+ panic("nfs_mountroot: SIOCAIFADDR: %d", error);
+ soclose(so);
+
+ /*
+ * If the gateway field is filled in, set it as the default route.
+ */
+ if (nd->mygateway.sin_len != 0) {
+ struct sockaddr_in mask, sin;
+
+ bzero((caddr_t)&mask, sizeof(mask));
+ sin = mask;
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
+ (struct sockaddr *)&nd->mygateway,
+ (struct sockaddr *)&mask,
+ RTF_UP | RTF_GATEWAY, (struct rtentry **)0);
+ if (error)
+ panic("nfs_mountroot: RTM_ADD: %d", error);
+ }
+
+ /*
+ * Create the rootfs mount point.
+ */
+ nd->root_args.fh = nd->root_fh;
+ nd->root_args.fhsize = nd->root_fhsize;
+ l = ntohl(nd->root_saddr.sin_addr.s_addr);
+ snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
+ (l >> 24) & 0xff, (l >> 16) & 0xff,
+ (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam);
+ printf("NFS ROOT: %s\n",buf);
+ if ((error = nfs_mountdiskless(buf, "/", MNT_RDONLY,
+ &nd->root_saddr, &nd->root_args, p, &vp, &mp)) != 0) {
+ if (swap_mp) {
+ mp->mnt_vfc->vfc_refcount--;
+ free(swap_mp, M_MOUNT);
+ }
+ return (error);
+ }
+
+ swap_mp = NULL;
+ if (nd->swap_nblks) {
+
+ /* Convert to DEV_BSIZE instead of Kilobyte */
+ nd->swap_nblks *= 2;
+
+ /*
+ * Create a fake mount point just for the swap vnode so that the
+ * swap file can be on a different server from the rootfs.
+ */
+ nd->swap_args.fh = nd->swap_fh;
+ nd->swap_args.fhsize = nd->swap_fhsize;
+ l = ntohl(nd->swap_saddr.sin_addr.s_addr);
+ snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
+ (l >> 24) & 0xff, (l >> 16) & 0xff,
+ (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam);
+ printf("NFS SWAP: %s\n",buf);
+ if ((error = nfs_mountdiskless(buf, "/swap", 0,
+ &nd->swap_saddr, &nd->swap_args, p, &vp, &swap_mp)) != 0)
+ return (error);
+ vfs_unbusy(swap_mp, p);
+
+ VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size =
+ nd->swap_nblks * DEV_BSIZE ;
+
+ /*
+ * Since the swap file is not the root dir of a file system,
+ * hack it to a regular file.
+ */
+ vp->v_type = VREG;
+ vp->v_flag = 0;
+ VREF(vp);
+ swaponvp(p, vp, NODEV, nd->swap_nblks);
+ }
+
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ rootvp = vp;
+ vfs_unbusy(mp, p);
+
+ /*
+ * This is not really an nfs issue, but it is much easier to
+ * set hostname here and then let the "/etc/rc.xxx" files
+ * mount the right /var based upon its preset value.
+ */
+ bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
+ hostname[MAXHOSTNAMELEN - 1] = '\0';
+ for (i = 0; i < MAXHOSTNAMELEN; i++)
+ if (hostname[i] == '\0')
+ break;
+ inittodr(ntohl(nd->root_time));
+ return (0);
+}
+
+/*
+ * Internal version of mount system call for diskless setup.
+ */
+static int
+nfs_mountdiskless(path, which, mountflag, sin, args, p, vpp, mpp)
+ char *path;
+ char *which;
+ int mountflag;
+ struct sockaddr_in *sin;
+ struct nfs_args *args;
+ struct proc *p;
+ struct vnode **vpp;
+ struct mount **mpp;
+{
+ struct mount *mp;
+ struct sockaddr *nam;
+ int error;
+
+ mp = *mpp;
+
+ if (!mp && (error = vfs_rootmountalloc("nfs", path, &mp))) {
+ printf("nfs_mountroot: NFS not configured");
+ return (error);
+ }
+
+ mp->mnt_kern_flag = 0;
+ mp->mnt_flag = mountflag;
+ nam = dup_sockaddr((struct sockaddr *)sin, 1);
+ if ((error = mountnfs(args, mp, nam, which, path, vpp)) != 0) {
+ printf("nfs_mountroot: mount %s on %s: %d", path, which, error);
+ mp->mnt_vfc->vfc_refcount--;
+ vfs_unbusy(mp, p);
+ free(mp, M_MOUNT);
+ FREE(nam, M_SONAME);
+ return (error);
+ }
+ (void) copystr(which, mp->mnt_stat.f_mntonname, MNAMELEN - 1, 0);
+ *mpp = mp;
+ return (0);
+}
+
+static void
+nfs_decode_args(nmp, argp)
+ struct nfsmount *nmp;
+ struct nfs_args *argp;
+{
+ int s;
+ int adjsock;
+ int maxio;
+
+ s = splnet();
+ /*
+ * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
+ * no sense in that context.
+ */
+ if (argp->sotype == SOCK_STREAM)
+ nmp->nm_flag &= ~NFSMNT_NOCONN;
+
+ /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
+ if ((argp->flags & NFSMNT_NFSV3) == 0)
+ nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
+
+ /* Re-bind if rsrvd port requested and wasn't on one */
+ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
+ && (argp->flags & NFSMNT_RESVPORT);
+ /* Also re-bind if we're switching to/from a connected UDP socket */
+ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
+ (argp->flags & NFSMNT_NOCONN));
+
+ /* Update flags atomically. Don't change the lock bits. */
+ nmp->nm_flag = argp->flags | nmp->nm_flag;
+ splx(s);
+
+ if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
+ nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+ if (nmp->nm_timeo < NFS_MINTIMEO)
+ nmp->nm_timeo = NFS_MINTIMEO;
+ else if (nmp->nm_timeo > NFS_MAXTIMEO)
+ nmp->nm_timeo = NFS_MAXTIMEO;
+ }
+
+ if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
+ nmp->nm_retry = argp->retrans;
+ if (nmp->nm_retry > NFS_MAXREXMIT)
+ nmp->nm_retry = NFS_MAXREXMIT;
+ }
+
+ if (argp->flags & NFSMNT_NFSV3) {
+ if (argp->sotype == SOCK_DGRAM)
+ maxio = NFS_MAXDGRAMDATA;
+ else
+ maxio = NFS_MAXDATA;
+ } else
+ maxio = NFS_V2MAXDATA;
+
+ if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
+ nmp->nm_wsize = argp->wsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
+ if (nmp->nm_wsize <= 0)
+ nmp->nm_wsize = NFS_FABLKSIZE;
+ }
+ if (nmp->nm_wsize > maxio)
+ nmp->nm_wsize = maxio;
+ if (nmp->nm_wsize > MAXBSIZE)
+ nmp->nm_wsize = MAXBSIZE;
+
+ if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
+ nmp->nm_rsize = argp->rsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
+ if (nmp->nm_rsize <= 0)
+ nmp->nm_rsize = NFS_FABLKSIZE;
+ }
+ if (nmp->nm_rsize > maxio)
+ nmp->nm_rsize = maxio;
+ if (nmp->nm_rsize > MAXBSIZE)
+ nmp->nm_rsize = MAXBSIZE;
+
+ if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
+ nmp->nm_readdirsize = argp->readdirsize;
+ }
+ if (nmp->nm_readdirsize > maxio)
+ nmp->nm_readdirsize = maxio;
+ if (nmp->nm_readdirsize > nmp->nm_rsize)
+ nmp->nm_readdirsize = nmp->nm_rsize;
+
+ if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
+ nmp->nm_acregmin = argp->acregmin;
+ else
+ nmp->nm_acregmin = NFS_MINATTRTIMO;
+ if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
+ nmp->nm_acregmax = argp->acregmax;
+ else
+ nmp->nm_acregmax = NFS_MAXATTRTIMO;
+ if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
+ nmp->nm_acdirmin = argp->acdirmin;
+ else
+ nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
+ if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
+ nmp->nm_acdirmax = argp->acdirmax;
+ else
+ nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
+ if (nmp->nm_acdirmin > nmp->nm_acdirmax)
+ nmp->nm_acdirmin = nmp->nm_acdirmax;
+ if (nmp->nm_acregmin > nmp->nm_acregmax)
+ nmp->nm_acregmin = nmp->nm_acregmax;
+
+ if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
+ argp->maxgrouplist <= NFS_MAXGRPS)
+ nmp->nm_numgrps = argp->maxgrouplist;
+ if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
+ argp->readahead <= NFS_MAXRAHEAD)
+ nmp->nm_readahead = argp->readahead;
+ if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
+ argp->leaseterm <= NQ_MAXLEASE)
+ nmp->nm_leaseterm = argp->leaseterm;
+ if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
+ argp->deadthresh <= NQ_NEVERDEAD)
+ nmp->nm_deadthresh = argp->deadthresh;
+
+ adjsock |= ((nmp->nm_sotype != argp->sotype) ||
+ (nmp->nm_soproto != argp->proto));
+ nmp->nm_sotype = argp->sotype;
+ nmp->nm_soproto = argp->proto;
+
+ if (nmp->nm_so && adjsock) {
+ nfs_safedisconnect(nmp);
+ if (nmp->nm_sotype == SOCK_DGRAM)
+ while (nfs_connect(nmp, (struct nfsreq *)0)) {
+ printf("nfs_args: retrying connect\n");
+ (void) tsleep((caddr_t)&lbolt,
+ PSOCK, "nfscon", 0);
+ }
+ }
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ * It seems a bit dumb to copyinstr() the host and path here and then
+ * bcopy() them in mountnfs(), but I wanted to detect errors before
+ * doing the sockargs() call because sockargs() allocates an mbuf and
+ * an error after that means that I have to release the mbuf.
+ */
+/* ARGSUSED */
+static int
+nfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error;
+ struct nfs_args args;
+ struct sockaddr *nam;
+ struct vnode *vp;
+ char pth[MNAMELEN], hst[MNAMELEN];
+ size_t len;
+ u_char nfh[NFSX_V3FHMAX];
+
+ if (path == NULL) {
+ nfs_mountroot(mp);
+ return (0);
+ }
+ error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args));
+ if (error)
+ return (error);
+ if (args.version != NFS_ARGSVERSION) {
+#ifdef COMPAT_PRELITE2
+ /*
+ * If the argument version is unknown, then assume the
+ * caller is a pre-lite2 4.4BSD client and convert its
+ * arguments.
+ */
+ struct onfs_args oargs;
+ error = copyin(data, (caddr_t)&oargs, sizeof (struct onfs_args));
+ if (error)
+ return (error);
+ nfs_convert_oargs(&args,&oargs);
+#else /* !COMPAT_PRELITE2 */
+ return (EPROGMISMATCH);
+#endif /* COMPAT_PRELITE2 */
+ }
+ if (mp->mnt_flag & MNT_UPDATE) {
+ register struct nfsmount *nmp = VFSTONFS(mp);
+
+ if (nmp == NULL)
+ return (EIO);
+ /*
+ * When doing an update, we can't change from or to
+ * v3 and/or nqnfs, or change cookie translation
+ */
+ args.flags = (args.flags &
+ ~(NFSMNT_NFSV3|NFSMNT_NQNFS /*|NFSMNT_XLATECOOKIE*/)) |
+ (nmp->nm_flag &
+ (NFSMNT_NFSV3|NFSMNT_NQNFS /*|NFSMNT_XLATECOOKIE*/));
+ nfs_decode_args(nmp, &args);
+ return (0);
+ }
+ error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
+ if (error)
+ return (error);
+ error = copyinstr(path, pth, MNAMELEN-1, &len);
+ if (error)
+ return (error);
+ bzero(&pth[len], MNAMELEN - len);
+ error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
+ if (error)
+ return (error);
+ bzero(&hst[len], MNAMELEN - len);
+ /* sockargs() call must be after above copyin() calls */
+ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
+ if (error)
+ return (error);
+ args.fh = nfh;
+ error = mountnfs(&args, mp, nam, pth, hst, &vp);
+ return (error);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static int
+mountnfs(argp, mp, nam, pth, hst, vpp)
+ register struct nfs_args *argp;
+ register struct mount *mp;
+ struct sockaddr *nam;
+ char *pth, *hst;
+ struct vnode **vpp;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+ struct vattr attrs;
+
+ if (mp->mnt_flag & MNT_UPDATE) {
+ nmp = VFSTONFS(mp);
+ /* update paths, file handles, etc, here XXX */
+ FREE(nam, M_SONAME);
+ return (0);
+ } else {
+ nmp = zalloc(nfsmount_zone);
+ bzero((caddr_t)nmp, sizeof (struct nfsmount));
+ TAILQ_INIT(&nmp->nm_uidlruhead);
+ TAILQ_INIT(&nmp->nm_bufq);
+ mp->mnt_data = (qaddr_t)nmp;
+ }
+ vfs_getnewfsid(mp);
+ nmp->nm_mountp = mp;
+ if (argp->flags & NFSMNT_NQNFS)
+ /*
+ * We have to set mnt_maxsymlink to a non-zero value so
+ * that COMPAT_43 routines will know that we are setting
+ * the d_type field in directories (and can zero it for
+ * unsuspecting binaries).
+ */
+ mp->mnt_maxsymlinklen = 1;
+
+ /*
+ * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
+ * high, depending on whether we end up with negative offsets in
+ * the client or server somewhere. 2GB-1 may be safer.
+ *
+ * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
+ * that we can handle until we find out otherwise.
+ * XXX Our "safe" limit on the client is what we can store in our
+ * buffer cache using signed(!) block numbers.
+ */
+ if ((argp->flags & NFSMNT_NFSV3) == 0)
+ nmp->nm_maxfilesize = 0xffffffffLL;
+ else
+ nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
+
+ nmp->nm_timeo = NFS_TIMEO;
+ nmp->nm_retry = NFS_RETRANS;
+ nmp->nm_wsize = NFS_WSIZE;
+ nmp->nm_rsize = NFS_RSIZE;
+ nmp->nm_readdirsize = NFS_READDIRSIZE;
+ nmp->nm_numgrps = NFS_MAXGRPS;
+ nmp->nm_readahead = NFS_DEFRAHEAD;
+ nmp->nm_leaseterm = NQ_DEFLEASE;
+ nmp->nm_deadthresh = NQ_DEADTHRESH;
+ CIRCLEQ_INIT(&nmp->nm_timerhead);
+ nmp->nm_inprog = NULLVP;
+ nmp->nm_fhsize = argp->fhsize;
+ bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
+ bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
+ nmp->nm_nam = nam;
+ /* Set up the sockets and per-host congestion */
+ nmp->nm_sotype = argp->sotype;
+ nmp->nm_soproto = argp->proto;
+
+ nfs_decode_args(nmp, argp);
+
+ /*
+ * For Connection based sockets (TCP,...) defer the connect until
+ * the first request, in case the server is not responding.
+ */
+ if (nmp->nm_sotype == SOCK_DGRAM &&
+ (error = nfs_connect(nmp, (struct nfsreq *)0)))
+ goto bad;
+
+ /*
+ * This is silly, but it has to be set so that vinifod() works.
+ * We do not want to do an nfs_statfs() here since we can get
+ * stuck on a dead server and we are holding a lock on the mount
+ * point.
+ */
+ mp->mnt_stat.f_iosize = nfs_iosize(nmp);
+ /*
+ * A reference count is needed on the nfsnode representing the
+ * remote root. If this object is not persistent, then backward
+ * traversals of the mount point (i.e. "..") will not work if
+ * the nfsnode gets flushed out of the cache. Ufs does not have
+ * this problem, because one can identify root inodes by their
+ * number == ROOTINO (2).
+ */
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ if (error)
+ goto bad;
+ *vpp = NFSTOV(np);
+
+ /*
+ * Get file attributes for the mountpoint. This has the side
+ * effect of filling in (*vpp)->v_type with the correct value.
+ */
+ VOP_GETATTR(*vpp, &attrs, curproc->p_ucred, curproc);
+
+ /*
+ * Lose the lock but keep the ref.
+ */
+ VOP_UNLOCK(*vpp, 0, curproc);
+
+ return (0);
+bad:
+ nfs_disconnect(nmp);
+ zfree(nfsmount_zone, nmp);
+ FREE(nam, M_SONAME);
+ return (error);
+}
+
+/*
+ * unmount system call
+ */
+static int
+nfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *vp;
+ int error, flags = 0;
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+ nmp = VFSTONFS(mp);
+ /*
+ * Goes something like this..
+ * - Check for activity on the root vnode (other than ourselves).
+ * - Call vflush() to clear out vnodes for this file system,
+ * except for the root vnode.
+ * - Decrement reference on the vnode representing remote root.
+ * - Close the socket
+ * - Free up the data structures
+ */
+ /*
+ * We need to decrement the ref. count on the nfsnode representing
+ * the remote root. See comment in mountnfs(). The VFS unmount()
+ * has done vput on this vnode, otherwise we would get deadlock!
+ */
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ if (error)
+ return(error);
+ vp = NFSTOV(np);
+ if (vp->v_usecount > 2) {
+ vput(vp);
+ return (EBUSY);
+ }
+
+ /*
+ * Must handshake with nqnfs_clientd() if it is active.
+ */
+ nmp->nm_state |= NFSSTA_DISMINPROG;
+ while (nmp->nm_inprog != NULLVP)
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+ error = vflush(mp, vp, flags);
+ if (error) {
+ vput(vp);
+ nmp->nm_state &= ~NFSSTA_DISMINPROG;
+ return (error);
+ }
+
+ /*
+ * We are now committed to the unmount.
+ * For NQNFS, let the server daemon free the nfsmount structure.
+ */
+ if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB))
+ nmp->nm_state |= NFSSTA_DISMNT;
+
+ /*
+ * There are two reference counts and one lock to get rid of here.
+ */
+ vput(vp);
+ vrele(vp);
+ vgone(vp);
+ nfs_disconnect(nmp);
+ FREE(nmp->nm_nam, M_SONAME);
+
+ if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0)
+ zfree(nfsmount_zone, nmp);
+ return (0);
+}
+
+/*
+ * Return root of a filesystem
+ */
+static int
+nfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+
+ nmp = VFSTONFS(mp);
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ if (error)
+ return (error);
+ vp = NFSTOV(np);
+ if (vp->v_type == VNON)
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ *vpp = vp;
+ return (0);
+}
+
+extern int syncprt;
+
+/*
+ * Flush out the buffer cache
+ */
+/* ARGSUSED */
+static int
+nfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ int error, allerror = 0;
+
+ /*
+ * Force stale buffer cache information to be flushed.
+ */
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * If the vnode that we are about to sync is no longer
+ * associated with this mount point, start over.
+ */
+ if (vp->v_mount != mp)
+ goto loop;
+ if (VOP_ISLOCKED(vp, NULL) || TAILQ_EMPTY(&vp->v_dirtyblkhd) ||
+ waitfor == MNT_LAZY)
+ continue;
+ if (vget(vp, LK_EXCLUSIVE, p))
+ goto loop;
+ error = VOP_FSYNC(vp, cred, waitfor, p);
+ if (error)
+ allerror = error;
+ vput(vp);
+ }
+ return (allerror);
+}
+
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
new file mode 100644
index 0000000..4097d96
--- /dev/null
+++ b/sys/nfs/nfs_vnops.c
@@ -0,0 +1,3354 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
+ * $FreeBSD$
+ */
+
+
+/*
+ * vnode op calls for Sun NFS version 2 and 3
+ */
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/namei.h>
+#include <sys/socket.h>
+#include <sys/vnode.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/lockf.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/conf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+
+#include <miscfs/fifofs/fifo.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+
+/* Defs */
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
+ * calls are not in getblk() and brelse() so that they would not be necessary
+ * here.
+ */
+#ifndef B_VMIO
+#define vfs_busy_pages(bp, f)
+#endif
+
+static int nfsspec_read __P((struct vop_read_args *));
+static int nfsspec_write __P((struct vop_write_args *));
+static int nfsfifo_read __P((struct vop_read_args *));
+static int nfsfifo_write __P((struct vop_write_args *));
+static int nfsspec_close __P((struct vop_close_args *));
+static int nfsfifo_close __P((struct vop_close_args *));
+#define nfs_poll vop_nopoll
+static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
+static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
+static int nfs_lookup __P((struct vop_lookup_args *));
+static int nfs_create __P((struct vop_create_args *));
+static int nfs_mknod __P((struct vop_mknod_args *));
+static int nfs_open __P((struct vop_open_args *));
+static int nfs_close __P((struct vop_close_args *));
+static int nfs_access __P((struct vop_access_args *));
+static int nfs_getattr __P((struct vop_getattr_args *));
+static int nfs_setattr __P((struct vop_setattr_args *));
+static int nfs_read __P((struct vop_read_args *));
+static int nfs_mmap __P((struct vop_mmap_args *));
+static int nfs_fsync __P((struct vop_fsync_args *));
+static int nfs_remove __P((struct vop_remove_args *));
+static int nfs_link __P((struct vop_link_args *));
+static int nfs_rename __P((struct vop_rename_args *));
+static int nfs_mkdir __P((struct vop_mkdir_args *));
+static int nfs_rmdir __P((struct vop_rmdir_args *));
+static int nfs_symlink __P((struct vop_symlink_args *));
+static int nfs_readdir __P((struct vop_readdir_args *));
+static int nfs_bmap __P((struct vop_bmap_args *));
+static int nfs_strategy __P((struct vop_strategy_args *));
+static int nfs_lookitup __P((struct vnode *, const char *, int,
+ struct ucred *, struct proc *, struct nfsnode **));
+static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
+static int nfsspec_access __P((struct vop_access_args *));
+static int nfs_readlink __P((struct vop_readlink_args *));
+static int nfs_print __P((struct vop_print_args *));
+static int nfs_advlock __P((struct vop_advlock_args *));
+static int nfs_bwrite __P((struct vop_bwrite_args *));
+/*
+ * Global vfs data structures for nfs
+ */
+vop_t **nfsv2_vnodeop_p;
+static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_access_desc, (vop_t *) nfs_access },
+ { &vop_advlock_desc, (vop_t *) nfs_advlock },
+ { &vop_bmap_desc, (vop_t *) nfs_bmap },
+ { &vop_bwrite_desc, (vop_t *) nfs_bwrite },
+ { &vop_close_desc, (vop_t *) nfs_close },
+ { &vop_create_desc, (vop_t *) nfs_create },
+ { &vop_fsync_desc, (vop_t *) nfs_fsync },
+ { &vop_getattr_desc, (vop_t *) nfs_getattr },
+ { &vop_getpages_desc, (vop_t *) nfs_getpages },
+ { &vop_putpages_desc, (vop_t *) nfs_putpages },
+ { &vop_inactive_desc, (vop_t *) nfs_inactive },
+ { &vop_lease_desc, (vop_t *) vop_null },
+ { &vop_link_desc, (vop_t *) nfs_link },
+ { &vop_lock_desc, (vop_t *) vop_sharedlock },
+ { &vop_lookup_desc, (vop_t *) nfs_lookup },
+ { &vop_mkdir_desc, (vop_t *) nfs_mkdir },
+ { &vop_mknod_desc, (vop_t *) nfs_mknod },
+ { &vop_mmap_desc, (vop_t *) nfs_mmap },
+ { &vop_open_desc, (vop_t *) nfs_open },
+ { &vop_poll_desc, (vop_t *) nfs_poll },
+ { &vop_print_desc, (vop_t *) nfs_print },
+ { &vop_read_desc, (vop_t *) nfs_read },
+ { &vop_readdir_desc, (vop_t *) nfs_readdir },
+ { &vop_readlink_desc, (vop_t *) nfs_readlink },
+ { &vop_reclaim_desc, (vop_t *) nfs_reclaim },
+ { &vop_remove_desc, (vop_t *) nfs_remove },
+ { &vop_rename_desc, (vop_t *) nfs_rename },
+ { &vop_rmdir_desc, (vop_t *) nfs_rmdir },
+ { &vop_setattr_desc, (vop_t *) nfs_setattr },
+ { &vop_strategy_desc, (vop_t *) nfs_strategy },
+ { &vop_symlink_desc, (vop_t *) nfs_symlink },
+ { &vop_write_desc, (vop_t *) nfs_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
+ { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
+VNODEOP_SET(nfsv2_vnodeop_opv_desc);
+
+/*
+ * Special device vnode ops
+ */
+vop_t **spec_nfsv2nodeop_p;
+static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
+ { &vop_default_desc, (vop_t *) spec_vnoperate },
+ { &vop_access_desc, (vop_t *) nfsspec_access },
+ { &vop_close_desc, (vop_t *) nfsspec_close },
+ { &vop_fsync_desc, (vop_t *) nfs_fsync },
+ { &vop_getattr_desc, (vop_t *) nfs_getattr },
+ { &vop_inactive_desc, (vop_t *) nfs_inactive },
+ { &vop_lock_desc, (vop_t *) vop_sharedlock },
+ { &vop_print_desc, (vop_t *) nfs_print },
+ { &vop_read_desc, (vop_t *) nfsspec_read },
+ { &vop_reclaim_desc, (vop_t *) nfs_reclaim },
+ { &vop_setattr_desc, (vop_t *) nfs_setattr },
+ { &vop_write_desc, (vop_t *) nfsspec_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
+ { &spec_nfsv2nodeop_p, nfsv2_specop_entries };
+VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
+
+vop_t **fifo_nfsv2nodeop_p;
+static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
+ { &vop_default_desc, (vop_t *) fifo_vnoperate },
+ { &vop_access_desc, (vop_t *) nfsspec_access },
+ { &vop_close_desc, (vop_t *) nfsfifo_close },
+ { &vop_fsync_desc, (vop_t *) nfs_fsync },
+ { &vop_getattr_desc, (vop_t *) nfs_getattr },
+ { &vop_inactive_desc, (vop_t *) nfs_inactive },
+ { &vop_lock_desc, (vop_t *) vop_sharedlock },
+ { &vop_print_desc, (vop_t *) nfs_print },
+ { &vop_read_desc, (vop_t *) nfsfifo_read },
+ { &vop_reclaim_desc, (vop_t *) nfs_reclaim },
+ { &vop_setattr_desc, (vop_t *) nfs_setattr },
+ { &vop_write_desc, (vop_t *) nfsfifo_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
+ { &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
+VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
+
+static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
+ struct componentname *cnp,
+ struct vattr *vap));
+static int nfs_removerpc __P((struct vnode *dvp, const char *name,
+ int namelen,
+ struct ucred *cred, struct proc *proc));
+static int nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
+ int fnamelen, struct vnode *tdvp,
+ const char *tnameptr, int tnamelen,
+ struct ucred *cred, struct proc *proc));
+static int nfs_renameit __P((struct vnode *sdvp,
+ struct componentname *scnp,
+ struct sillyrename *sp));
+
+/*
+ * Global variables
+ */
+extern u_int32_t nfs_true, nfs_false;
+extern u_int32_t nfs_xdrneg1;
+extern struct nfsstats nfsstats;
+extern nfstype nfsv3_type[9];
+struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
+int nfs_numasync = 0;
+#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
+
+SYSCTL_DECL(_vfs_nfs);
+
+static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
+ &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
+
+#if 0
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
+ &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
+
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
+ &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
+#endif
+
+#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
+ | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
+ | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
+static int
+nfs3_access_otw(struct vnode *vp,
+ int wmode,
+ struct proc *p,
+ struct ucred *cred)
+{
+ const int v3 = 1;
+ u_int32_t *tl;
+ int error = 0, attrflag;
+
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ caddr_t bpos, dpos, cp2;
+ register int32_t t1, t2;
+ register caddr_t cp;
+ u_int32_t rmode;
+ struct nfsnode *np = VTONFS(vp);
+
+ nfsstats.rpccnt[NFSPROC_ACCESS]++;
+ nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
+ nfsm_fhtom(vp, v3);
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(wmode);
+ nfsm_request(vp, NFSPROC_ACCESS, p, cred);
+ nfsm_postop_attr(vp, attrflag);
+ if (!error) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ rmode = fxdr_unsigned(u_int32_t, *tl);
+ np->n_mode = rmode;
+ np->n_modeuid = cred->cr_uid;
+ np->n_modestamp = time_second;
+ }
+ nfsm_reqdone;
+ return error;
+}
+
+/*
+ * nfs access vnode op.
+ * For nfs version 2, just return ok. File accesses may fail later.
+ * For nfs version 3, use the access rpc to check accessibility. If file modes
+ * are changed on the server, accesses might still fail later.
+ */
+static int
+nfs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ int error = 0;
+ u_int32_t mode, wmode;
+ int v3 = NFS_ISV3(vp);
+ struct nfsnode *np = VTONFS(vp);
+
+ /*
+ * Disallow write attempts on filesystems mounted read-only;
+ * unless the file is a socket, fifo, or a block or character
+ * device resident on the filesystem.
+ */
+ if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ switch (vp->v_type) {
+ case VREG:
+ case VDIR:
+ case VLNK:
+ return (EROFS);
+ default:
+ break;
+ }
+ }
+ /*
+ * For nfs v3, check to see if we have done this recently, and if
+ * so return our cached result instead of making an ACCESS call.
+ * If not, do an access rpc, otherwise you are stuck emulating
+ * ufs_access() locally using the vattr. This may not be correct,
+ * since the server may apply other access criteria such as
+ * client uid-->server uid mapping that we do not know about.
+ */
+ if (v3) {
+ if (ap->a_mode & VREAD)
+ mode = NFSV3ACCESS_READ;
+ else
+ mode = 0;
+ if (vp->v_type != VDIR) {
+ if (ap->a_mode & VWRITE)
+ mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
+ if (ap->a_mode & VEXEC)
+ mode |= NFSV3ACCESS_EXECUTE;
+ } else {
+ if (ap->a_mode & VWRITE)
+ mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
+ NFSV3ACCESS_DELETE);
+ if (ap->a_mode & VEXEC)
+ mode |= NFSV3ACCESS_LOOKUP;
+ }
+ /* XXX safety belt, only make blanket request if caching */
+ if (nfsaccess_cache_timeout > 0) {
+ wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
+ NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
+ NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
+ } else {
+ wmode = mode;
+ }
+
+ /*
+ * Does our cached result allow us to give a definite yes to
+ * this request?
+ */
+ if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
+ (ap->a_cred->cr_uid == np->n_modeuid) &&
+ ((np->n_mode & mode) == mode)) {
+ nfsstats.accesscache_hits++;
+ } else {
+ /*
+ * Either a no, or a don't know. Go to the wire.
+ */
+ nfsstats.accesscache_misses++;
+ error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
+ if (!error) {
+ if ((np->n_mode & mode) != mode) {
+ error = EACCES;
+ }
+ }
+ }
+ return (error);
+ } else {
+ if ((error = nfsspec_access(ap)) != 0)
+ return (error);
+
+ /*
+ * Attempt to prevent a mapped root from accessing a file
+ * which it shouldn't. We try to read a byte from the file
+ * if the user is root and the file is not zero length.
+ * After calling nfsspec_access, we should have the correct
+ * file size cached.
+ */
+ if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
+ && VTONFS(vp)->n_size > 0) {
+ struct iovec aiov;
+ struct uio auio;
+ char buf[1];
+
+ aiov.iov_base = buf;
+ aiov.iov_len = 1;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_resid = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_procp = ap->a_p;
+
+ if (vp->v_type == VREG)
+ error = nfs_readrpc(vp, &auio, ap->a_cred);
+ else if (vp->v_type == VDIR) {
+ char* bp;
+ bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
+ aiov.iov_base = bp;
+ aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
+ error = nfs_readdirrpc(vp, &auio, ap->a_cred);
+ free(bp, M_TEMP);
+ } else if (vp->v_type == VLNK)
+ error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
+ else
+ error = EACCES;
+ }
+ return (error);
+ }
+}
+
+/*
+ * nfs open vnode op
+ * Check to see if the type is ok
+ * and that deletion is not in progress.
+ * For paged in text files, you will need to flush the page cache
+ * if consistency is lost.
+ */
+/* ARGSUSED */
+static int
+nfs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ struct vattr vattr;
+ int error;
+
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+#ifdef DIAGNOSTIC
+ printf("open eacces vtyp=%d\n",vp->v_type);
+#endif
+ return (EACCES);
+ }
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, ND_READ)) {
+ do {
+ error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
+ ap->a_p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ }
+ } else {
+ if (np->n_flag & NMODIFIED) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ np->n_attrstamp = 0;
+ if (vp->v_type == VDIR)
+ np->n_direofoffset = 0;
+ error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ np->n_mtime = vattr.va_mtime.tv_sec;
+ } else {
+ error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.tv_sec) {
+ if (vp->v_type == VDIR)
+ np->n_direofoffset = 0;
+ if ((error = nfs_vinvalbuf(vp, V_SAVE,
+ ap->a_cred, ap->a_p, 1)) == EINTR)
+ return (error);
+ np->n_mtime = vattr.va_mtime.tv_sec;
+ }
+ }
+ }
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ np->n_attrstamp = 0; /* For Open/Close consistency */
+ return (0);
+}
+
+/*
+ * nfs close vnode op
+ * What an NFS client should do upon close after writing is a debatable issue.
+ * Most NFS clients push delayed writes to the server upon close, basically for
+ * two reasons:
+ * 1 - So that any write errors may be reported back to the client process
+ * doing the close system call. By far the two most likely errors are
+ * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
+ * 2 - To put a worst case upper bound on cache inconsistency between
+ * multiple clients for the file.
+ * There is also a consistency problem for Version 2 of the protocol w.r.t.
+ * not being able to tell if other clients are writing a file concurrently,
+ * since there is no way of knowing if the changed modify time in the reply
+ * is only due to the write for this client.
+ * (NFS Version 3 provides weak cache consistency data in the reply that
+ * should be sufficient to detect and handle this case.)
+ *
+ * The current code does the following:
+ * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
+ * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
+ * or commit them (this satisfies 1 and 2 except for the
+ * case where the server crashes after this close but
+ * before the commit RPC, which is felt to be "good
+ * enough". Changing the last argument to nfs_flush() to
+ * a 1 would force a commit operation, if it is felt a
+ * commit is necessary now.
+ * for NQNFS - do nothing now, since 2 is dealt with via leases and
+ * 1 should be dealt with via an fsync() system call for
+ * cases where write errors are important.
+ */
+/* ARGSUSED */
+static int
+nfs_close(ap)
+ struct vop_close_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ int error = 0;
+
+ if (vp->v_type == VREG) {
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
+ (np->n_flag & NMODIFIED)) {
+ if (NFS_ISV3(vp)) {
+ error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
+ np->n_flag &= ~NMODIFIED;
+ } else
+ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+ np->n_attrstamp = 0;
+ }
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ error = np->n_error;
+ }
+ }
+ return (error);
+}
+
+/*
+ * nfs getattr call from vfs.
+ */
+static int
+nfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register caddr_t cp;
+ register u_int32_t *tl;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int v3 = NFS_ISV3(vp);
+
+ /*
+ * Update local times for special files.
+ */
+ if (np->n_flag & (NACC | NUPD))
+ np->n_flag |= NCHG;
+ /*
+ * First look in the cache.
+ */
+ if (nfs_getattrcache(vp, ap->a_vap) == 0)
+ return (0);
+
+ if (v3 && nfsaccess_cache_timeout > 0) {
+ nfsstats.accesscache_misses++;
+ nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred);
+ if (nfs_getattrcache(vp, ap->a_vap) == 0)
+ return (0);
+ }
+
+ nfsstats.rpccnt[NFSPROC_GETATTR]++;
+ nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
+ nfsm_fhtom(vp, v3);
+ nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+ if (!error) {
+ nfsm_loadattr(vp, ap->a_vap);
+ }
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs setattr call.
+ */
+static int
+nfs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap = ap->a_vap;
+ int error = 0;
+ u_quad_t tsize;
+
+#ifndef nolint
+ tsize = (u_quad_t)0;
+#endif
+
+ /*
+ * Setting of flags is not supported.
+ */
+ if (vap->va_flags != VNOVAL)
+ return (EOPNOTSUPP);
+
+ /*
+ * Disallow write attempts if the filesystem is mounted read-only.
+ */
+ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+ vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY))
+ return (EROFS);
+ if (vap->va_size != VNOVAL) {
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ case VCHR:
+ case VBLK:
+ case VSOCK:
+ case VFIFO:
+ if (vap->va_mtime.tv_sec == VNOVAL &&
+ vap->va_atime.tv_sec == VNOVAL &&
+ vap->va_mode == (mode_t)VNOVAL &&
+ vap->va_uid == (uid_t)VNOVAL &&
+ vap->va_gid == (gid_t)VNOVAL)
+ return (0);
+ vap->va_size = VNOVAL;
+ break;
+ default:
+ /*
+ * Disallow write attempts if the filesystem is
+ * mounted read-only.
+ */
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ vnode_pager_setsize(vp, vap->va_size);
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size == 0)
+ error = nfs_vinvalbuf(vp, 0,
+ ap->a_cred, ap->a_p, 1);
+ else
+ error = nfs_vinvalbuf(vp, V_SAVE,
+ ap->a_cred, ap->a_p, 1);
+ if (error) {
+ vnode_pager_setsize(vp, np->n_size);
+ return (error);
+ }
+ }
+ tsize = np->n_size;
+ np->n_size = np->n_vattr.va_size = vap->va_size;
+ };
+ } else if ((vap->va_mtime.tv_sec != VNOVAL ||
+ vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
+ vp->v_type == VREG &&
+ (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
+ if (error && vap->va_size != VNOVAL) {
+ np->n_size = np->n_vattr.va_size = tsize;
+ vnode_pager_setsize(vp, np->n_size);
+ }
+ return (error);
+}
+
+/*
+ * Do an nfs setattr rpc.
+ */
+static int
+nfs_setattrrpc(vp, vap, cred, procp)
+ register struct vnode *vp;
+ register struct vattr *vap;
+ struct ucred *cred;
+ struct proc *procp;
+{
+ register struct nfsv2_sattr *sp;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ u_int32_t *tl;
+ int error = 0, wccflag = NFSV3_WCCRATTR;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int v3 = NFS_ISV3(vp);
+
+ nfsstats.rpccnt[NFSPROC_SETATTR]++;
+ nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
+ nfsm_fhtom(vp, v3);
+ if (v3) {
+ nfsm_v3attrbuild(vap, TRUE);
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl = nfs_false;
+ } else {
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ if (vap->va_mode == (mode_t)VNOVAL)
+ sp->sa_mode = nfs_xdrneg1;
+ else
+ sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
+ if (vap->va_uid == (uid_t)VNOVAL)
+ sp->sa_uid = nfs_xdrneg1;
+ else
+ sp->sa_uid = txdr_unsigned(vap->va_uid);
+ if (vap->va_gid == (gid_t)VNOVAL)
+ sp->sa_gid = nfs_xdrneg1;
+ else
+ sp->sa_gid = txdr_unsigned(vap->va_gid);
+ sp->sa_size = txdr_unsigned(vap->va_size);
+ txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
+ txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
+ }
+ nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
+ if (v3) {
+ nfsm_wcc_data(vp, wccflag);
+ } else
+ nfsm_loadattr(vp, (struct vattr *)0);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the rpc
+ */
+static int
+nfs_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ int flags = cnp->cn_flags;
+ struct vnode *newvp;
+ u_int32_t *tl;
+ caddr_t cp;
+ int32_t t1, t2;
+ struct nfsmount *nmp;
+ caddr_t bpos, dpos, cp2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ long len;
+ nfsfh_t *fhp;
+ struct nfsnode *np;
+ int lockparent, wantparent, error = 0, attrflag, fhsize;
+ int v3 = NFS_ISV3(dvp);
+ struct proc *p = cnp->cn_proc;
+
+ *vpp = NULLVP;
+ if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+ return (EROFS);
+ if (dvp->v_type != VDIR)
+ return (ENOTDIR);
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+ nmp = VFSTONFS(dvp->v_mount);
+ np = VTONFS(dvp);
+ if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
+ struct vattr vattr;
+ int vpid;
+
+ if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) {
+ *vpp = NULLVP;
+ return (error);
+ }
+
+ newvp = *vpp;
+ vpid = newvp->v_id;
+ /*
+ * See the comment starting `Step through' in ufs/ufs_lookup.c
+ * for an explanation of the locking protocol
+ */
+ if (dvp == newvp) {
+ VREF(newvp);
+ error = 0;
+ } else if (flags & ISDOTDOT) {
+ VOP_UNLOCK(dvp, 0, p);
+ error = vget(newvp, LK_EXCLUSIVE, p);
+ if (!error && lockparent && (flags & ISLASTCN))
+ error = vn_lock(dvp, LK_EXCLUSIVE, p);
+ } else {
+ error = vget(newvp, LK_EXCLUSIVE, p);
+ if (!lockparent || error || !(flags & ISLASTCN))
+ VOP_UNLOCK(dvp, 0, p);
+ }
+ if (!error) {
+ if (vpid == newvp->v_id) {
+ if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
+ && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+ cache_purge(newvp);
+ }
+ vput(newvp);
+ if (lockparent && dvp != newvp && (flags & ISLASTCN))
+ VOP_UNLOCK(dvp, 0, p);
+ }
+ error = vn_lock(dvp, LK_EXCLUSIVE, p);
+ *vpp = NULLVP;
+ if (error)
+ return (error);
+ }
+ error = 0;
+ newvp = NULLVP;
+ nfsstats.lookupcache_misses++;
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ len = cnp->cn_namelen;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP,
+ NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+ if (error) {
+ nfsm_postop_attr(dvp, attrflag);
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ nfsm_getfh(fhp, fhsize, v3);
+
+ /*
+ * Handle RENAME case...
+ */
+ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
+ if (NFS_CMPFH(np, fhp, fhsize)) {
+ m_freem(mrep);
+ return (EISDIR);
+ }
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ if (error) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ if (v3) {
+ nfsm_postop_attr(newvp, attrflag);
+ nfsm_postop_attr(dvp, attrflag);
+ } else
+ nfsm_loadattr(newvp, (struct vattr *)0);
+ *vpp = newvp;
+ m_freem(mrep);
+ cnp->cn_flags |= SAVENAME;
+ if (!lockparent)
+ VOP_UNLOCK(dvp, 0, p);
+ return (0);
+ }
+
+ if (flags & ISDOTDOT) {
+ VOP_UNLOCK(dvp, 0, p);
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ if (error) {
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ if (lockparent && (flags & ISLASTCN) &&
+ (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
+ vput(newvp);
+ return (error);
+ }
+ } else if (NFS_CMPFH(np, fhp, fhsize)) {
+ VREF(dvp);
+ newvp = dvp;
+ } else {
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ if (error) {
+ m_freem(mrep);
+ return (error);
+ }
+ if (!lockparent || !(flags & ISLASTCN))
+ VOP_UNLOCK(dvp, 0, p);
+ newvp = NFSTOV(np);
+ }
+ if (v3) {
+ nfsm_postop_attr(newvp, attrflag);
+ nfsm_postop_attr(dvp, attrflag);
+ } else
+ nfsm_loadattr(newvp, (struct vattr *)0);
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ if ((cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+ np->n_ctime = np->n_vattr.va_ctime.tv_sec;
+ cache_enter(dvp, newvp, cnp);
+ }
+ *vpp = newvp;
+ nfsm_reqdone;
+ if (error) {
+ if (newvp != NULLVP) {
+ vrele(newvp);
+ *vpp = NULLVP;
+ }
+ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+ (flags & ISLASTCN) && error == ENOENT) {
+ if (!lockparent)
+ VOP_UNLOCK(dvp, 0, p);
+ if (dvp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else
+ error = EJUSTRETURN;
+ }
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ }
+ return (error);
+}
+
+/*
+ * nfs read call.
+ * Just call nfs_bioread() to do the work.
+ */
+static int
+nfs_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VREG)
+ return (EPERM);
+ return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
+}
+
+/*
+ * nfs readlink call
+ */
+static int
+nfs_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+ return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Do a readlink rpc.
+ * Called by nfs_doio() from below the buffer cache.
+ */
+int
+nfs_readlinkrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, len, attrflag;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int v3 = NFS_ISV3(vp);
+
+ nfsstats.rpccnt[NFSPROC_READLINK]++;
+ nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
+ nfsm_fhtom(vp, v3);
+ nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+ if (v3)
+ nfsm_postop_attr(vp, attrflag);
+ if (!error) {
+ nfsm_strsiz(len, NFS_MAXPATHLEN);
+ if (len == NFS_MAXPATHLEN) {
+ struct nfsnode *np = VTONFS(vp);
+ if (np->n_size && np->n_size < NFS_MAXPATHLEN)
+ len = np->n_size;
+ }
+ nfsm_mtouio(uiop, len);
+ }
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs read rpc call
+ * Ditto above
+ */
+int
+nfs_readrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ int error = 0, len, retlen, tsiz, eof, attrflag;
+ int v3 = NFS_ISV3(vp);
+
+#ifndef nolint
+ eof = 0;
+#endif
+ nmp = VFSTONFS(vp->v_mount);
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_READ]++;
+ len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
+ nfsm_fhtom(vp, v3);
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
+ if (v3) {
+ txdr_hyper(uiop->uio_offset, tl);
+ *(tl + 2) = txdr_unsigned(len);
+ } else {
+ *tl++ = txdr_unsigned(uiop->uio_offset);
+ *tl++ = txdr_unsigned(len);
+ *tl = 0;
+ }
+ nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+ if (v3) {
+ nfsm_postop_attr(vp, attrflag);
+ if (error) {
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ eof = fxdr_unsigned(int, *(tl + 1));
+ } else
+ nfsm_loadattr(vp, (struct vattr *)0);
+ nfsm_strsiz(retlen, nmp->nm_rsize);
+ nfsm_mtouio(uiop, retlen);
+ m_freem(mrep);
+ tsiz -= retlen;
+ if (v3) {
+ if (eof || retlen == 0)
+ tsiz = 0;
+ } else if (retlen < len)
+ tsiz = 0;
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * nfs write call
+ */
+int
+nfs_writerpc(vp, uiop, cred, iomode, must_commit)
+ register struct vnode *vp;
+ register struct uio *uiop;
+ struct ucred *cred;
+ int *iomode, *must_commit;
+{
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2, backup;
+ caddr_t bpos, dpos, cp2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
+ int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
+
+#ifndef DIAGNOSTIC
+ if (uiop->uio_iovcnt != 1)
+ panic("nfs: writerpc iovcnt > 1");
+#endif
+ *must_commit = 0;
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_WRITE]++;
+ len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_WRITE,
+ NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
+ nfsm_fhtom(vp, v3);
+ if (v3) {
+ nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+ txdr_hyper(uiop->uio_offset, tl);
+ tl += 2;
+ *tl++ = txdr_unsigned(len);
+ *tl++ = txdr_unsigned(*iomode);
+ *tl = txdr_unsigned(len);
+ } else {
+ register u_int32_t x;
+
+ nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+ /* Set both "begin" and "current" to non-garbage. */
+ x = txdr_unsigned((u_int32_t)uiop->uio_offset);
+ *tl++ = x; /* "begin offset" */
+ *tl++ = x; /* "current offset" */
+ x = txdr_unsigned(len);
+ *tl++ = x; /* total to this offset */
+ *tl = x; /* size of this write */
+ }
+ nfsm_uiotom(uiop, len);
+ nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+ if (v3) {
+ wccflag = NFSV3_WCCCHK;
+ nfsm_wcc_data(vp, wccflag);
+ if (!error) {
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
+ + NFSX_V3WRITEVERF);
+ rlen = fxdr_unsigned(int, *tl++);
+ if (rlen == 0) {
+ error = NFSERR_IO;
+ m_freem(mrep);
+ break;
+ } else if (rlen < len) {
+ backup = len - rlen;
+ uiop->uio_iov->iov_base -= backup;
+ uiop->uio_iov->iov_len += backup;
+ uiop->uio_offset -= backup;
+ uiop->uio_resid += backup;
+ len = rlen;
+ }
+ commit = fxdr_unsigned(int, *tl++);
+
+ /*
+ * Return the lowest committment level
+ * obtained by any of the RPCs.
+ */
+ if (committed == NFSV3WRITE_FILESYNC)
+ committed = commit;
+ else if (committed == NFSV3WRITE_DATASYNC &&
+ commit == NFSV3WRITE_UNSTABLE)
+ committed = commit;
+ if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
+ bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
+ NFSX_V3WRITEVERF);
+ nmp->nm_state |= NFSSTA_HASWRITEVERF;
+ } else if (bcmp((caddr_t)tl,
+ (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
+ *must_commit = 1;
+ bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
+ NFSX_V3WRITEVERF);
+ }
+ }
+ } else
+ nfsm_loadattr(vp, (struct vattr *)0);
+ if (wccflag)
+ VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
+ m_freem(mrep);
+ if (error)
+ break;
+ tsiz -= len;
+ }
+nfsmout:
+ if (vp->v_mount->mnt_flag & MNT_ASYNC)
+ committed = NFSV3WRITE_FILESYNC;
+ *iomode = committed;
+ if (error)
+ uiop->uio_resid = tsiz;
+ return (error);
+}
+
+/*
+ * nfs mknod rpc
+ * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
+ * mode set to specify the file type and the size field for rdev.
+ */
+static int
+nfs_mknodrpc(dvp, vpp, cnp, vap)
+ register struct vnode *dvp;
+ register struct vnode **vpp;
+ register struct componentname *cnp;
+ register struct vattr *vap;
+{
+ register struct nfsv2_sattr *sp;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ struct vnode *newvp = (struct vnode *)0;
+ struct nfsnode *np = (struct nfsnode *)0;
+ struct vattr vattr;
+ char *cp2;
+ caddr_t bpos, dpos;
+ int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ u_int32_t rdev;
+ int v3 = NFS_ISV3(dvp);
+
+ if (vap->va_type == VCHR || vap->va_type == VBLK)
+ rdev = txdr_unsigned(vap->va_rdev);
+ else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
+ rdev = nfs_xdrneg1;
+ else {
+ return (EOPNOTSUPP);
+ }
+ if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_MKNOD]++;
+ nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
+ + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ if (v3) {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ *tl++ = vtonfsv3_type(vap->va_type);
+ nfsm_v3attrbuild(vap, FALSE);
+ if (vap->va_type == VCHR || vap->va_type == VBLK) {
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(umajor(vap->va_rdev));
+ *tl = txdr_unsigned(uminor(vap->va_rdev));
+ }
+ } else {
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = nfs_xdrneg1;
+ sp->sa_gid = nfs_xdrneg1;
+ sp->sa_size = rdev;
+ txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
+ txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
+ }
+ nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
+ if (!error) {
+ nfsm_mtofh(dvp, newvp, v3, gotvp);
+ if (!gotvp) {
+ if (newvp) {
+ vput(newvp);
+ newvp = (struct vnode *)0;
+ }
+ error = nfs_lookitup(dvp, cnp->cn_nameptr,
+ cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
+ if (!error)
+ newvp = NFSTOV(np);
+ }
+ }
+ if (v3)
+ nfsm_wcc_data(dvp, wccflag);
+ nfsm_reqdone;
+ if (error) {
+ if (newvp)
+ vput(newvp);
+ } else {
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(dvp, newvp, cnp);
+ *vpp = newvp;
+ }
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ if (!wccflag)
+ VTONFS(dvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs mknod vop
+ * just call nfs_mknodrpc() to do the work.
+ */
+/* ARGSUSED */
+static int
+nfs_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ return nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
+}
+
+static u_long create_verf;
+/*
+ * nfs file create call
+ */
+static int
+nfs_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ struct nfsnode *np = (struct nfsnode *)0;
+ struct vnode *newvp = (struct vnode *)0;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+ int v3 = NFS_ISV3(dvp);
+
+ /*
+ * Oops, not for me..
+ */
+ if (vap->va_type == VSOCK)
+ return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
+
+ if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
+ return (error);
+ }
+ if (vap->va_vaflags & VA_EXCLUSIVE)
+ fmode |= O_EXCL;
+again:
+ nfsstats.rpccnt[NFSPROC_CREATE]++;
+ nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ if (v3) {
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+ if (fmode & O_EXCL) {
+ *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
+ nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
+#ifdef INET
+ if (!TAILQ_EMPTY(&in_ifaddrhead))
+ *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
+ else
+#endif
+ *tl++ = create_verf;
+ *tl = ++create_verf;
+ } else {
+ *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
+ nfsm_v3attrbuild(vap, FALSE);
+ }
+ } else {
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = nfs_xdrneg1;
+ sp->sa_gid = nfs_xdrneg1;
+ sp->sa_size = 0;
+ txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
+ txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
+ }
+ nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+ if (!error) {
+ nfsm_mtofh(dvp, newvp, v3, gotvp);
+ if (!gotvp) {
+ if (newvp) {
+ vput(newvp);
+ newvp = (struct vnode *)0;
+ }
+ error = nfs_lookitup(dvp, cnp->cn_nameptr,
+ cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
+ if (!error)
+ newvp = NFSTOV(np);
+ }
+ }
+ if (v3)
+ nfsm_wcc_data(dvp, wccflag);
+ nfsm_reqdone;
+ if (error) {
+ if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
+ fmode &= ~O_EXCL;
+ goto again;
+ }
+ if (newvp)
+ vput(newvp);
+ } else if (v3 && (fmode & O_EXCL))
+ error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
+ if (!error) {
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(dvp, newvp, cnp);
+ *ap->a_vpp = newvp;
+ }
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ if (!wccflag)
+ VTONFS(dvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs file remove call
+ * To try and make nfs semantics closer to ufs semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If v_usecount > 1
+ * If a rename is not already in the works
+ * call nfs_sillyrename() to set it up
+ * else
+ * do the remove rpc
+ */
+static int
+nfs_remove(ap)
+ struct vop_remove_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_dvp;
+ struct vnode * a_vp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsnode *np = VTONFS(vp);
+ int error = 0;
+ struct vattr vattr;
+
+#ifndef DIAGNOSTIC
+ if ((cnp->cn_flags & HASBUF) == 0)
+ panic("nfs_remove: no name");
+ if (vp->v_usecount < 1)
+ panic("nfs_remove: bad v_usecount");
+#endif
+ if (vp->v_type == VDIR)
+ error = EPERM;
+ else if (vp->v_usecount == 1 || (np->n_sillyrename &&
+ VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
+ vattr.va_nlink > 1)) {
+ /*
+ * Purge the name cache so that the chance of a lookup for
+ * the name succeeding while the remove is in progress is
+ * minimized. Without node locking it can still happen, such
+ * that an I/O op returns ESTALE, but since you get this if
+ * another host removes the file..
+ */
+ cache_purge(vp);
+ /*
+ * throw away biocache buffers, mainly to avoid
+ * unnecessary delayed writes later.
+ */
+ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
+ /* Do the rpc */
+ if (error != EINTR)
+ error = nfs_removerpc(dvp, cnp->cn_nameptr,
+ cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
+ /*
+ * Kludge City: If the first reply to the remove rpc is lost..
+ * the reply to the retransmitted request will be ENOENT
+ * since the file was in fact removed
+ * Therefore, we cheat and return success.
+ */
+ if (error == ENOENT)
+ error = 0;
+ } else if (!np->n_sillyrename)
+ error = nfs_sillyrename(dvp, vp, cnp);
+ np->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs file remove rpc called from nfs_inactive
+ */
+int
+nfs_removeit(sp)
+ register struct sillyrename *sp;
+{
+
+ return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
+ (struct proc *)0));
+}
+
+/*
+ * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
+ */
+static int
+nfs_removerpc(dvp, name, namelen, cred, proc)
+ register struct vnode *dvp;
+ const char *name;
+ int namelen;
+ struct ucred *cred;
+ struct proc *proc;
+{
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, wccflag = NFSV3_WCCRATTR;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int v3 = NFS_ISV3(dvp);
+
+ nfsstats.rpccnt[NFSPROC_REMOVE]++;
+ nfsm_reqhead(dvp, NFSPROC_REMOVE,
+ NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
+ if (v3)
+ nfsm_wcc_data(dvp, wccflag);
+ nfsm_reqdone;
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ if (!wccflag)
+ VTONFS(dvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename call
+ */
+static int
+nfs_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ register struct vnode *fvp = ap->a_fvp;
+ register struct vnode *tvp = ap->a_tvp;
+ register struct vnode *fdvp = ap->a_fdvp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *tcnp = ap->a_tcnp;
+ register struct componentname *fcnp = ap->a_fcnp;
+ int error;
+
+#ifndef DIAGNOSTIC
+ if ((tcnp->cn_flags & HASBUF) == 0 ||
+ (fcnp->cn_flags & HASBUF) == 0)
+ panic("nfs_rename: no name");
+#endif
+ /* Check for cross-device rename */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out;
+ }
+
+ /*
+ * We have to flush B_DELWRI data prior to renaming
+ * the file. If we don't, the delayed-write buffers
+ * can be flushed out later after the file has gone stale
+ * under NFSV3. NFSV2 does not have this problem because
+ * ( as far as I can tell ) it flushes dirty buffers more
+ * often.
+ */
+
+ VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc);
+ if (tvp)
+ VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc);
+
+ /*
+ * If the tvp exists and is in use, sillyrename it before doing the
+ * rename of the new file over it.
+ * XXX Can't sillyrename a directory.
+ */
+ if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
+ tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
+ vput(tvp);
+ tvp = NULL;
+ }
+
+ error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
+ tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
+ tcnp->cn_proc);
+
+ if (fvp->v_type == VDIR) {
+ if (tvp != NULL && tvp->v_type == VDIR)
+ cache_purge(tdvp);
+ cache_purge(fdvp);
+ }
+
+out:
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ vrele(fdvp);
+ vrele(fvp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename rpc called from nfs_remove() above
+ */
+static int
+nfs_renameit(sdvp, scnp, sp)
+ struct vnode *sdvp;
+ struct componentname *scnp;
+ register struct sillyrename *sp;
+{
+ return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
+ sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
+}
+
+/*
+ * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
+ */
+static int
+nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
+ register struct vnode *fdvp;
+ const char *fnameptr;
+ int fnamelen;
+ register struct vnode *tdvp;
+ const char *tnameptr;
+ int tnamelen;
+ struct ucred *cred;
+ struct proc *proc;
+{
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int v3 = NFS_ISV3(fdvp);
+
+ nfsstats.rpccnt[NFSPROC_RENAME]++;
+ nfsm_reqhead(fdvp, NFSPROC_RENAME,
+ (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
+ nfsm_rndup(tnamelen));
+ nfsm_fhtom(fdvp, v3);
+ nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
+ nfsm_fhtom(tdvp, v3);
+ nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
+ nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
+ if (v3) {
+ nfsm_wcc_data(fdvp, fwccflag);
+ nfsm_wcc_data(tdvp, twccflag);
+ }
+ nfsm_reqdone;
+ VTONFS(fdvp)->n_flag |= NMODIFIED;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ if (!fwccflag)
+ VTONFS(fdvp)->n_attrstamp = 0;
+ if (!twccflag)
+ VTONFS(tdvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs hard link create call
+ */
+static int
+nfs_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_tdvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int v3;
+
+ if (vp->v_mount != tdvp->v_mount) {
+ return (EXDEV);
+ }
+
+ /*
+ * Push all writes to the server, so that the attribute cache
+ * doesn't get "out of sync" with the server.
+ * XXX There should be a better way!
+ */
+ VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
+
+ v3 = NFS_ISV3(vp);
+ nfsstats.rpccnt[NFSPROC_LINK]++;
+ nfsm_reqhead(vp, NFSPROC_LINK,
+ NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(vp, v3);
+ nfsm_fhtom(tdvp, v3);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+ if (v3) {
+ nfsm_postop_attr(vp, attrflag);
+ nfsm_wcc_data(tdvp, wccflag);
+ }
+ nfsm_reqdone;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ if (!attrflag)
+ VTONFS(vp)->n_attrstamp = 0;
+ if (!wccflag)
+ VTONFS(tdvp)->n_attrstamp = 0;
+ /*
+ * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs symbolic link create call
+ */
+static int
+nfs_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vnode *newvp = (struct vnode *)0;
+ int v3 = NFS_ISV3(dvp);
+
+ nfsstats.rpccnt[NFSPROC_SYMLINK]++;
+ slen = strlen(ap->a_target);
+ nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
+ nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ if (v3) {
+ nfsm_v3attrbuild(vap, FALSE);
+ }
+ nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
+ if (!v3) {
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
+ sp->sa_uid = nfs_xdrneg1;
+ sp->sa_gid = nfs_xdrneg1;
+ sp->sa_size = nfs_xdrneg1;
+ txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
+ txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
+ }
+
+ /*
+ * Issue the NFS request and get the rpc response.
+ *
+ * Only NFSv3 responses returning an error of 0 actually return
+ * a file handle that can be converted into newvp without having
+ * to do an extra lookup rpc.
+ */
+ nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+ if (v3) {
+ if (error == 0)
+ nfsm_mtofh(dvp, newvp, v3, gotvp);
+ nfsm_wcc_data(dvp, wccflag);
+ }
+
+ /*
+ * out code jumps -> here, mrep is also freed.
+ */
+
+ nfsm_reqdone;
+
+ /*
+ * If we get an EEXIST error, silently convert it to no-error
+ * in case of an NFS retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+
+ /*
+ * If we do not have (or no longer have) an error, and we could
+ * not extract the newvp from the response due to the request being
+ * NFSv2 or the error being EEXIST. We have to do a lookup in order
+ * to obtain a newvp to return.
+ */
+ if (error == 0 && newvp == NULL) {
+ struct nfsnode *np = NULL;
+
+ error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
+ cnp->cn_cred, cnp->cn_proc, &np);
+ if (!error)
+ newvp = NFSTOV(np);
+ }
+ if (error) {
+ if (newvp)
+ vput(newvp);
+ } else {
+ *ap->a_vpp = newvp;
+ }
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ if (!wccflag)
+ VTONFS(dvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs make dir call
+ */
+static int
+nfs_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ register int len;
+ struct nfsnode *np = (struct nfsnode *)0;
+ struct vnode *newvp = (struct vnode *)0;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, wccflag = NFSV3_WCCRATTR;
+ int gotvp = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+ int v3 = NFS_ISV3(dvp);
+
+ if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
+ return (error);
+ }
+ len = cnp->cn_namelen;
+ nfsstats.rpccnt[NFSPROC_MKDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_MKDIR,
+ NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ if (v3) {
+ nfsm_v3attrbuild(vap, FALSE);
+ } else {
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
+ sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
+ sp->sa_uid = nfs_xdrneg1;
+ sp->sa_gid = nfs_xdrneg1;
+ sp->sa_size = nfs_xdrneg1;
+ txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
+ txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
+ }
+ nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+ if (!error)
+ nfsm_mtofh(dvp, newvp, v3, gotvp);
+ if (v3)
+ nfsm_wcc_data(dvp, wccflag);
+ nfsm_reqdone;
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ if (!wccflag)
+ VTONFS(dvp)->n_attrstamp = 0;
+ /*
+ * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
+ * if we can succeed in looking up the directory.
+ */
+ if (error == EEXIST || (!error && !gotvp)) {
+ if (newvp) {
+ vrele(newvp);
+ newvp = (struct vnode *)0;
+ }
+ error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
+ cnp->cn_proc, &np);
+ if (!error) {
+ newvp = NFSTOV(np);
+ if (newvp->v_type != VDIR)
+ error = EEXIST;
+ }
+ }
+ if (error) {
+ if (newvp)
+ vrele(newvp);
+ } else
+ *ap->a_vpp = newvp;
+ return (error);
+}
+
+/*
+ * nfs remove directory call
+ */
+static int
+nfs_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, wccflag = NFSV3_WCCRATTR;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int v3 = NFS_ISV3(dvp);
+
+ if (dvp == vp)
+ return (EINVAL);
+ nfsstats.rpccnt[NFSPROC_RMDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_RMDIR,
+ NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+ if (v3)
+ nfsm_wcc_data(dvp, wccflag);
+ nfsm_reqdone;
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ if (!wccflag)
+ VTONFS(dvp)->n_attrstamp = 0;
+ cache_purge(dvp);
+ cache_purge(vp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs readdir call
+ */
+static int
+nfs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct uio *uio = ap->a_uio;
+ int tresid, error;
+ struct vattr vattr;
+
+ if (vp->v_type != VDIR)
+ return (EPERM);
+ /*
+ * First, check for hit on the EOF offset cache
+ */
+ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
+ (np->n_flag & NMODIFIED) == 0) {
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKCACHABLE(vp, ND_READ)) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
+ np->n_mtime == vattr.va_mtime.tv_sec) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ }
+
+ /*
+ * Call nfs_bioread() to do the real work.
+ */
+ tresid = uio->uio_resid;
+ error = nfs_bioread(vp, uio, 0, ap->a_cred);
+
+ if (!error && uio->uio_resid == tresid)
+ nfsstats.direofcache_misses++;
+ return (error);
+}
+
+/*
+ * Readdir rpc call.
+ * Called from below the buffer cache by nfs_doio().
+ */
+int
+nfs_readdirrpc(vp, uiop, cred)
+ struct vnode *vp;
+ register struct uio *uiop;
+ struct ucred *cred;
+
+{
+ register int len, left;
+ register struct dirent *dp = NULL;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ register nfsuint64 *cookiep;
+ caddr_t bpos, dpos, cp2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ nfsuint64 cookie;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ struct nfsnode *dnp = VTONFS(vp);
+ u_quad_t fileno;
+ int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
+ int attrflag;
+ int v3 = NFS_ISV3(vp);
+
+#ifndef DIAGNOSTIC
+ if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
+ (uiop->uio_resid & (DIRBLKSIZ - 1)))
+ panic("nfs readdirrpc bad uio");
+#endif
+
+ /*
+ * If there is no cookie, assume directory was stale.
+ */
+ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
+ if (cookiep)
+ cookie = *cookiep;
+ else
+ return (NFSERR_BAD_COOKIE);
+ /*
+ * Loop around doing readdir rpc's of size nm_readdirsize
+ * truncated to a multiple of DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && bigenough) {
+ nfsstats.rpccnt[NFSPROC_READDIR]++;
+ nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
+ NFSX_READDIR(v3));
+ nfsm_fhtom(vp, v3);
+ if (v3) {
+ nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+ *tl++ = cookie.nfsuquad[0];
+ *tl++ = cookie.nfsuquad[1];
+ *tl++ = dnp->n_cookieverf.nfsuquad[0];
+ *tl++ = dnp->n_cookieverf.nfsuquad[1];
+ } else {
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ *tl++ = cookie.nfsuquad[0];
+ }
+ *tl = txdr_unsigned(nmp->nm_readdirsize);
+ nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+ if (v3) {
+ nfsm_postop_attr(vp, attrflag);
+ if (!error) {
+ nfsm_dissect(tl, u_int32_t *,
+ 2 * NFSX_UNSIGNED);
+ dnp->n_cookieverf.nfsuquad[0] = *tl++;
+ dnp->n_cookieverf.nfsuquad[1] = *tl;
+ } else {
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ }
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+ while (more_dirs && bigenough) {
+ if (v3) {
+ nfsm_dissect(tl, u_int32_t *,
+ 3 * NFSX_UNSIGNED);
+ fileno = fxdr_hyper(tl);
+ len = fxdr_unsigned(int, *(tl + 2));
+ } else {
+ nfsm_dissect(tl, u_int32_t *,
+ 2 * NFSX_UNSIGNED);
+ fileno = fxdr_unsigned(u_quad_t, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ }
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ tlen = nfsm_rndup(len);
+ if (tlen == len)
+ tlen += 4; /* To ensure null termination */
+ left = DIRBLKSIZ - blksiz;
+ if ((tlen + DIRHDSIZ) > left) {
+ dp->d_reclen += left;
+ uiop->uio_iov->iov_base += left;
+ uiop->uio_iov->iov_len -= left;
+ uiop->uio_offset += left;
+ uiop->uio_resid -= left;
+ blksiz = 0;
+ }
+ if ((tlen + DIRHDSIZ) > uiop->uio_resid)
+ bigenough = 0;
+ if (bigenough) {
+ dp = (struct dirent *)uiop->uio_iov->iov_base;
+ dp->d_fileno = (int)fileno;
+ dp->d_namlen = len;
+ dp->d_reclen = tlen + DIRHDSIZ;
+ dp->d_type = DT_UNKNOWN;
+ blksiz += dp->d_reclen;
+ if (blksiz == DIRBLKSIZ)
+ blksiz = 0;
+ uiop->uio_offset += DIRHDSIZ;
+ uiop->uio_resid -= DIRHDSIZ;
+ uiop->uio_iov->iov_base += DIRHDSIZ;
+ uiop->uio_iov->iov_len -= DIRHDSIZ;
+ nfsm_mtouio(uiop, len);
+ cp = uiop->uio_iov->iov_base;
+ tlen -= len;
+ *cp = '\0'; /* null terminate */
+ uiop->uio_iov->iov_base += tlen;
+ uiop->uio_iov->iov_len -= tlen;
+ uiop->uio_offset += tlen;
+ uiop->uio_resid -= tlen;
+ } else
+ nfsm_adv(nfsm_rndup(len));
+ if (v3) {
+ nfsm_dissect(tl, u_int32_t *,
+ 3 * NFSX_UNSIGNED);
+ } else {
+ nfsm_dissect(tl, u_int32_t *,
+ 2 * NFSX_UNSIGNED);
+ }
+ if (bigenough) {
+ cookie.nfsuquad[0] = *tl++;
+ if (v3)
+ cookie.nfsuquad[1] = *tl++;
+ } else if (v3)
+ tl += 2;
+ else
+ tl++;
+ more_dirs = fxdr_unsigned(int, *tl);
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+ }
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (blksiz > 0) {
+ left = DIRBLKSIZ - blksiz;
+ dp->d_reclen += left;
+ uiop->uio_iov->iov_base += left;
+ uiop->uio_iov->iov_len -= left;
+ uiop->uio_offset += left;
+ uiop->uio_resid -= left;
+ }
+
+ /*
+ * We are now either at the end of the directory or have filled the
+ * block.
+ */
+ if (bigenough)
+ dnp->n_direofoffset = uiop->uio_offset;
+ else {
+ if (uiop->uio_resid > 0)
+ printf("EEK! readdirrpc resid > 0\n");
+ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
+ *cookiep = cookie;
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
+ */
+int
+nfs_readdirplusrpc(vp, uiop, cred)
+ struct vnode *vp;
+ register struct uio *uiop;
+ struct ucred *cred;
+{
+ register int len, left;
+ register struct dirent *dp;
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ register struct vnode *newvp;
+ register nfsuint64 *cookiep;
+ caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
+ struct nameidata nami, *ndp = &nami;
+ struct componentname *cnp = &ndp->ni_cnd;
+ nfsuint64 cookie;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ struct nfsnode *dnp = VTONFS(vp), *np;
+ nfsfh_t *fhp;
+ u_quad_t fileno;
+ int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
+ int attrflag, fhsize;
+
+#ifndef nolint
+ dp = (struct dirent *)0;
+#endif
+#ifndef DIAGNOSTIC
+ if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
+ (uiop->uio_resid & (DIRBLKSIZ - 1)))
+ panic("nfs readdirplusrpc bad uio");
+#endif
+ ndp->ni_dvp = vp;
+ newvp = NULLVP;
+
+ /*
+ * If there is no cookie, assume directory was stale.
+ */
+ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
+ if (cookiep)
+ cookie = *cookiep;
+ else
+ return (NFSERR_BAD_COOKIE);
+ /*
+ * Loop around doing readdir rpc's of size nm_readdirsize
+ * truncated to a multiple of DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && bigenough) {
+ nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
+ nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
+ NFSX_FH(1) + 6 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp, 1);
+ nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
+ *tl++ = cookie.nfsuquad[0];
+ *tl++ = cookie.nfsuquad[1];
+ *tl++ = dnp->n_cookieverf.nfsuquad[0];
+ *tl++ = dnp->n_cookieverf.nfsuquad[1];
+ *tl++ = txdr_unsigned(nmp->nm_readdirsize);
+ *tl = txdr_unsigned(nmp->nm_rsize);
+ nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
+ nfsm_postop_attr(vp, attrflag);
+ if (error) {
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ dnp->n_cookieverf.nfsuquad[0] = *tl++;
+ dnp->n_cookieverf.nfsuquad[1] = *tl++;
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+ while (more_dirs && bigenough) {
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ fileno = fxdr_hyper(tl);
+ len = fxdr_unsigned(int, *(tl + 2));
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ tlen = nfsm_rndup(len);
+ if (tlen == len)
+ tlen += 4; /* To ensure null termination*/
+ left = DIRBLKSIZ - blksiz;
+ if ((tlen + DIRHDSIZ) > left) {
+ dp->d_reclen += left;
+ uiop->uio_iov->iov_base += left;
+ uiop->uio_iov->iov_len -= left;
+ uiop->uio_offset += left;
+ uiop->uio_resid -= left;
+ blksiz = 0;
+ }
+ if ((tlen + DIRHDSIZ) > uiop->uio_resid)
+ bigenough = 0;
+ if (bigenough) {
+ dp = (struct dirent *)uiop->uio_iov->iov_base;
+ dp->d_fileno = (int)fileno;
+ dp->d_namlen = len;
+ dp->d_reclen = tlen + DIRHDSIZ;
+ dp->d_type = DT_UNKNOWN;
+ blksiz += dp->d_reclen;
+ if (blksiz == DIRBLKSIZ)
+ blksiz = 0;
+ uiop->uio_offset += DIRHDSIZ;
+ uiop->uio_resid -= DIRHDSIZ;
+ uiop->uio_iov->iov_base += DIRHDSIZ;
+ uiop->uio_iov->iov_len -= DIRHDSIZ;
+ cnp->cn_nameptr = uiop->uio_iov->iov_base;
+ cnp->cn_namelen = len;
+ nfsm_mtouio(uiop, len);
+ cp = uiop->uio_iov->iov_base;
+ tlen -= len;
+ *cp = '\0';
+ uiop->uio_iov->iov_base += tlen;
+ uiop->uio_iov->iov_len -= tlen;
+ uiop->uio_offset += tlen;
+ uiop->uio_resid -= tlen;
+ } else
+ nfsm_adv(nfsm_rndup(len));
+ nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ if (bigenough) {
+ cookie.nfsuquad[0] = *tl++;
+ cookie.nfsuquad[1] = *tl++;
+ } else
+ tl += 2;
+
+ /*
+ * Since the attributes are before the file handle
+ * (sigh), we must skip over the attributes and then
+ * come back and get them.
+ */
+ attrflag = fxdr_unsigned(int, *tl);
+ if (attrflag) {
+ dpossav1 = dpos;
+ mdsav1 = md;
+ nfsm_adv(NFSX_V3FATTR);
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ doit = fxdr_unsigned(int, *tl);
+ if (doit) {
+ nfsm_getfh(fhp, fhsize, 1);
+ if (NFS_CMPFH(dnp, fhp, fhsize)) {
+ VREF(vp);
+ newvp = vp;
+ np = dnp;
+ } else {
+ error = nfs_nget(vp->v_mount, fhp,
+ fhsize, &np);
+ if (error)
+ doit = 0;
+ else
+ newvp = NFSTOV(np);
+ }
+ }
+ if (doit && bigenough) {
+ dpossav2 = dpos;
+ dpos = dpossav1;
+ mdsav2 = md;
+ md = mdsav1;
+ nfsm_loadattr(newvp, (struct vattr *)0);
+ dpos = dpossav2;
+ md = mdsav2;
+ dp->d_type =
+ IFTODT(VTTOIF(np->n_vattr.va_type));
+ ndp->ni_vp = newvp;
+ cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
+ }
+ } else {
+ /* Just skip over the file handle */
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ i = fxdr_unsigned(int, *tl);
+ nfsm_adv(nfsm_rndup(i));
+ }
+ if (newvp != NULLVP) {
+ if (newvp == vp)
+ vrele(newvp);
+ else
+ vput(newvp);
+ newvp = NULLVP;
+ }
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+ }
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (blksiz > 0) {
+ left = DIRBLKSIZ - blksiz;
+ dp->d_reclen += left;
+ uiop->uio_iov->iov_base += left;
+ uiop->uio_iov->iov_len -= left;
+ uiop->uio_offset += left;
+ uiop->uio_resid -= left;
+ }
+
+ /*
+ * We are now either at the end of the directory or have filled the
+ * block.
+ */
+ if (bigenough)
+ dnp->n_direofoffset = uiop->uio_offset;
+ else {
+ if (uiop->uio_resid > 0)
+ printf("EEK! readdirplusrpc resid > 0\n");
+ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
+ *cookiep = cookie;
+ }
+nfsmout:
+ if (newvp != NULLVP) {
+ if (newvp == vp)
+ vrele(newvp);
+ else
+ vput(newvp);
+ newvp = NULLVP;
+ }
+ return (error);
+}
+
+/*
+ * Silly rename. To make the NFS filesystem that is stateless look a little
+ * more like the "ufs" a remove of an active vnode is translated to a rename
+ * to a funny looking filename that is removed by nfs_inactive on the
+ * nfsnode. There is the potential for another process on a different client
+ * to create the same funny name between the nfs_lookitup() fails and the
+ * nfs_rename() completes, but...
+ */
+static int
+nfs_sillyrename(dvp, vp, cnp)
+ struct vnode *dvp, *vp;
+ struct componentname *cnp;
+{
+ register struct sillyrename *sp;
+ struct nfsnode *np;
+ int error;
+ short pid;
+
+ cache_purge(dvp);
+ np = VTONFS(vp);
+#ifndef DIAGNOSTIC
+ if (vp->v_type == VDIR)
+ panic("nfs: sillyrename dir");
+#endif
+ MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
+ M_NFSREQ, M_WAITOK);
+ sp->s_cred = crdup(cnp->cn_cred);
+ sp->s_dvp = dvp;
+ VREF(dvp);
+
+ /* Fudge together a funny name */
+ pid = cnp->cn_proc->p_pid;
+ sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
+
+ /* Try lookitups until we get one that isn't there */
+ while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
+ cnp->cn_proc, (struct nfsnode **)0) == 0) {
+ sp->s_name[4]++;
+ if (sp->s_name[4] > 'z') {
+ error = EINVAL;
+ goto bad;
+ }
+ }
+ error = nfs_renameit(dvp, cnp, sp);
+ if (error)
+ goto bad;
+ error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
+ cnp->cn_proc, &np);
+ np->n_sillyrename = sp;
+ return (0);
+bad:
+ vrele(sp->s_dvp);
+ crfree(sp->s_cred);
+ free((caddr_t)sp, M_NFSREQ);
+ return (error);
+}
+
+/*
+ * Look up a file name and optionally either update the file handle or
+ * allocate an nfsnode, depending on the value of npp.
+ * npp == NULL --> just do the lookup
+ * *npp == NULL --> allocate a new nfsnode and make sure attributes are
+ * handled too
+ * *npp != NULL --> update the file handle in the vnode
+ */
+static int
+nfs_lookitup(dvp, name, len, cred, procp, npp)
+ register struct vnode *dvp;
+ const char *name;
+ int len;
+ struct ucred *cred;
+ struct proc *procp;
+ struct nfsnode **npp;
+{
+ register u_int32_t *tl;
+ register caddr_t cp;
+ register int32_t t1, t2;
+ struct vnode *newvp = (struct vnode *)0;
+ struct nfsnode *np, *dnp = VTONFS(dvp);
+ caddr_t bpos, dpos, cp2;
+ int error = 0, fhlen, attrflag;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ nfsfh_t *nfhp;
+ int v3 = NFS_ISV3(dvp);
+
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP,
+ NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
+ nfsm_fhtom(dvp, v3);
+ nfsm_strtom(name, len, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
+ if (npp && !error) {
+ nfsm_getfh(nfhp, fhlen, v3);
+ if (*npp) {
+ np = *npp;
+ if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
+ free((caddr_t)np->n_fhp, M_NFSBIGFH);
+ np->n_fhp = &np->n_fh;
+ } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
+ np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
+ bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
+ np->n_fhsize = fhlen;
+ newvp = NFSTOV(np);
+ } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
+ VREF(dvp);
+ newvp = dvp;
+ } else {
+ error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
+ if (error) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ }
+ if (v3) {
+ nfsm_postop_attr(newvp, attrflag);
+ if (!attrflag && *npp == NULL) {
+ m_freem(mrep);
+ if (newvp == dvp)
+ vrele(newvp);
+ else
+ vput(newvp);
+ return (ENOENT);
+ }
+ } else
+ nfsm_loadattr(newvp, (struct vattr *)0);
+ }
+ nfsm_reqdone;
+ if (npp && *npp == NULL) {
+ if (error) {
+ if (newvp) {
+ if (newvp == dvp)
+ vrele(newvp);
+ else
+ vput(newvp);
+ }
+ } else
+ *npp = np;
+ }
+ return (error);
+}
+
+/*
+ * Nfs Version 3 commit rpc
+ */
+int
+nfs_commit(vp, offset, cnt, cred, procp)
+ struct vnode *vp;
+ u_quad_t offset;
+ int cnt;
+ struct ucred *cred;
+ struct proc *procp;
+{
+ register caddr_t cp;
+ register u_int32_t *tl;
+ register int32_t t1, t2;
+ register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ caddr_t bpos, dpos, cp2;
+ int error = 0, wccflag = NFSV3_WCCRATTR;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
+ return (0);
+ nfsstats.rpccnt[NFSPROC_COMMIT]++;
+ nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
+ nfsm_fhtom(vp, 1);
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+ txdr_hyper(offset, tl);
+ tl += 2;
+ *tl = txdr_unsigned(cnt);
+ nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
+ nfsm_wcc_data(vp, wccflag);
+ if (!error) {
+ nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
+ if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
+ NFSX_V3WRITEVERF)) {
+ bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
+ NFSX_V3WRITEVERF);
+ error = NFSERR_STALEWRITEVERF;
+ }
+ }
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * Kludge City..
+ * - make nfs_bmap() essentially a no-op that does no translation
+ * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
+ * (Maybe I could use the process's page mapping, but I was concerned that
+ * Kernel Write might not be enabled and also figured copyout() would do
+ * a lot more work than bcopy() and also it currently happens in the
+ * context of the swapper process (2).
+ */
+static int
+nfs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ int *a_runb;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0;
+ if (ap->a_runb != NULL)
+ *ap->a_runb = 0;
+ return (0);
+}
+
+/*
+ * Strategy routine.
+ * For async requests when nfsiod(s) are running, queue the request by
+ * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
+ * request.
+ */
+static int
+nfs_strategy(ap)
+ struct vop_strategy_args *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ struct ucred *cr;
+ struct proc *p;
+ int error = 0;
+
+ KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
+ KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
+
+ if (bp->b_flags & B_PHYS)
+ panic("nfs physio");
+
+ if (bp->b_flags & B_ASYNC)
+ p = (struct proc *)0;
+ else
+ p = curproc; /* XXX */
+
+ if (bp->b_flags & B_READ)
+ cr = bp->b_rcred;
+ else
+ cr = bp->b_wcred;
+
+ /*
+ * If the op is asynchronous and an i/o daemon is waiting
+ * queue the request, wake it up and wait for completion
+ * otherwise just do it ourselves.
+ */
+ if ((bp->b_flags & B_ASYNC) == 0 ||
+ nfs_asyncio(bp, NOCRED, p))
+ error = nfs_doio(bp, cr, p);
+ return (error);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+static int
+nfs_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * fsync vnode op. Just call nfs_flush() with commit == 1.
+ */
+/* ARGSUSED */
+static int
+nfs_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_vp;
+ struct ucred * a_cred;
+ int a_waitfor;
+ struct proc * a_p;
+ } */ *ap;
+{
+
+ return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
+}
+
+/*
+ * Flush all the blocks associated with a vnode.
+ * Walk through the buffer pool and push any dirty pages
+ * associated with the vnode.
+ */
+static int
+nfs_flush(vp, cred, waitfor, p, commit)
+ register struct vnode *vp;
+ struct ucred *cred;
+ int waitfor;
+ struct proc *p;
+ int commit;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct buf *bp;
+ register int i;
+ struct buf *nbp;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
+ int passone = 1;
+ u_quad_t off, endoff, toff;
+ struct ucred* wcred = NULL;
+ struct buf **bvec = NULL;
+#ifndef NFS_COMMITBVECSIZ
+#define NFS_COMMITBVECSIZ 20
+#endif
+ struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
+ int bvecsize = 0, bveccount;
+
+ if (nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ if (!commit)
+ passone = 0;
+ /*
+ * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
+ * server, but nas not been committed to stable storage on the server
+ * yet. On the first pass, the byte range is worked out and the commit
+ * rpc is done. On the second pass, nfs_writebp() is called to do the
+ * job.
+ */
+again:
+ off = (u_quad_t)-1;
+ endoff = 0;
+ bvecpos = 0;
+ if (NFS_ISV3(vp) && commit) {
+ s = splbio();
+ /*
+ * Count up how many buffers waiting for a commit.
+ */
+ bveccount = 0;
+ for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if (BUF_REFCNT(bp) == 0 &&
+ (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
+ == (B_DELWRI | B_NEEDCOMMIT))
+ bveccount++;
+ }
+ /*
+ * Allocate space to remember the list of bufs to commit. It is
+ * important to use M_NOWAIT here to avoid a race with nfs_write.
+ * If we can't get memory (for whatever reason), we will end up
+ * committing the buffers one-by-one in the loop below.
+ */
+ if (bveccount > NFS_COMMITBVECSIZ) {
+ if (bvec != NULL && bvec != bvec_on_stack)
+ free(bvec, M_TEMP);
+ bvec = (struct buf **)
+ malloc(bveccount * sizeof(struct buf *),
+ M_TEMP, M_NOWAIT);
+ if (bvec == NULL) {
+ bvec = bvec_on_stack;
+ bvecsize = NFS_COMMITBVECSIZ;
+ } else
+ bvecsize = bveccount;
+ } else {
+ bvec = bvec_on_stack;
+ bvecsize = NFS_COMMITBVECSIZ;
+ }
+ for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if (bvecpos >= bvecsize)
+ break;
+ if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
+ (B_DELWRI | B_NEEDCOMMIT) ||
+ BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
+ continue;
+ bremfree(bp);
+ /*
+ * Work out if all buffers are using the same cred
+ * so we can deal with them all with one commit.
+ *
+ * NOTE: we are not clearing B_DONE here, so we have
+ * to do it later on in this routine if we intend to
+ * initiate I/O on the bp.
+ */
+ if (wcred == NULL)
+ wcred = bp->b_wcred;
+ else if (wcred != bp->b_wcred)
+ wcred = NOCRED;
+ bp->b_flags |= B_WRITEINPROG;
+ vfs_busy_pages(bp, 1);
+
+ /*
+ * bp is protected by being locked, but nbp is not
+ * and vfs_busy_pages() may sleep. We have to
+ * recalculate nbp.
+ */
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+
+ /*
+ * A list of these buffers is kept so that the
+ * second loop knows which buffers have actually
+ * been committed. This is necessary, since there
+ * may be a race between the commit rpc and new
+ * uncommitted writes on the file.
+ */
+ bvec[bvecpos++] = bp;
+ toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
+ bp->b_dirtyoff;
+ if (toff < off)
+ off = toff;
+ toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
+ if (toff > endoff)
+ endoff = toff;
+ }
+ splx(s);
+ }
+ if (bvecpos > 0) {
+ /*
+ * Commit data on the server, as required.
+ * If all bufs are using the same wcred, then use that with
+ * one call for all of them, otherwise commit each one
+ * separately.
+ */
+ if (wcred != NOCRED)
+ retv = nfs_commit(vp, off, (int)(endoff - off),
+ wcred, p);
+ else {
+ retv = 0;
+ for (i = 0; i < bvecpos; i++) {
+ off_t off, size;
+ bp = bvec[i];
+ off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
+ bp->b_dirtyoff;
+ size = (u_quad_t)(bp->b_dirtyend
+ - bp->b_dirtyoff);
+ retv = nfs_commit(vp, off, (int)size,
+ bp->b_wcred, p);
+ if (retv) break;
+ }
+ }
+
+ if (retv == NFSERR_STALEWRITEVERF)
+ nfs_clearcommit(vp->v_mount);
+
+ /*
+ * Now, either mark the blocks I/O done or mark the
+ * blocks dirty, depending on whether the commit
+ * succeeded.
+ */
+ for (i = 0; i < bvecpos; i++) {
+ bp = bvec[i];
+ bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK);
+ if (retv) {
+ /*
+ * Error, leave B_DELWRI intact
+ */
+ vfs_unbusy_pages(bp);
+ brelse(bp);
+ } else {
+ /*
+ * Success, remove B_DELWRI ( bundirty() ).
+ *
+ * b_dirtyoff/b_dirtyend seem to be NFS
+ * specific. We should probably move that
+ * into bundirty(). XXX
+ */
+ s = splbio();
+ vp->v_numoutput++;
+ bp->b_flags |= B_ASYNC;
+ bundirty(bp);
+ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ splx(s);
+ biodone(bp);
+ }
+ }
+ }
+
+ /*
+ * Start/do any write(s) that are required.
+ */
+loop:
+ s = splbio();
+ for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
+ if (waitfor != MNT_WAIT || passone)
+ continue;
+ error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
+ "nfsfsync", slpflag, slptimeo);
+ splx(s);
+ if (error == 0)
+ panic("nfs_fsync: inconsistent lock");
+ if (error == ENOLCK)
+ goto loop;
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+ error = EINTR;
+ goto done;
+ }
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ goto loop;
+ }
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("nfs_fsync: not dirty");
+ if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
+ BUF_UNLOCK(bp);
+ continue;
+ }
+ bremfree(bp);
+ if (passone || !commit)
+ bp->b_flags |= B_ASYNC;
+ else
+ bp->b_flags |= B_ASYNC | B_WRITEINPROG;
+ splx(s);
+ VOP_BWRITE(bp->b_vp, bp);
+ goto loop;
+ }
+ splx(s);
+ if (passone) {
+ passone = 0;
+ goto again;
+ }
+ if (waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ error = tsleep((caddr_t)&vp->v_numoutput,
+ slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
+ if (error) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+ error = EINTR;
+ goto done;
+ }
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ }
+ if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
+ goto loop;
+ }
+ }
+ if (np->n_flag & NWRITEERR) {
+ error = np->n_error;
+ np->n_flag &= ~NWRITEERR;
+ }
+done:
+ if (bvec != NULL && bvec != bvec_on_stack)
+ free(bvec, M_TEMP);
+ return (error);
+}
+
+/*
+ * NFS advisory byte-level locks.
+ * Currently unsupported.
+ */
+static int
+nfs_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * The following kludge is to allow diskless support to work
+ * until a real NFS lockd is implemented. Basically, just pretend
+ * that this is a local lock.
+ */
+ return (lf_advlock(ap, &(np->n_lockf), np->n_size));
+}
+
+/*
+ * Print out the contents of an nfsnode.
+ */
+static int
+nfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+
+ printf("tag VT_NFS, fileid %ld fsid 0x%x",
+ np->n_vattr.va_fileid, np->n_vattr.va_fsid);
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+ printf("\n");
+ return (0);
+}
+
+/*
+ * Just call nfs_writebp() with the force argument set to 1.
+ *
+ * NOTE: B_DONE may or may not be set in a_bp on call.
+ */
+static int
+nfs_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct vnode *a_bp;
+ } */ *ap;
+{
+ return (nfs_writebp(ap->a_bp, 1, curproc));
+}
+
+/*
+ * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
+ * the force flag is one and it also handles the B_NEEDCOMMIT flag. We set
+ * B_CACHE if this is a VMIO buffer.
+ */
+int
+nfs_writebp(bp, force, procp)
+ register struct buf *bp;
+ int force;
+ struct proc *procp;
+{
+ int s;
+ int oldflags = bp->b_flags;
+#if 0
+ int retv = 1;
+ off_t off;
+#endif
+
+ if (BUF_REFCNT(bp) == 0)
+ panic("bwrite: buffer is not locked???");
+
+ if (bp->b_flags & B_INVAL) {
+ brelse(bp);
+ return(0);
+ }
+
+ bp->b_flags |= B_CACHE;
+
+ /*
+ * Undirty the bp. We will redirty it later if the I/O fails.
+ */
+
+ s = splbio();
+ bundirty(bp);
+ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
+
+ bp->b_vp->v_numoutput++;
+ curproc->p_stats->p_ru.ru_oublock++;
+ splx(s);
+
+ vfs_busy_pages(bp, 1);
+ if (force)
+ bp->b_flags |= B_WRITEINPROG;
+ BUF_KERNPROC(bp);
+ VOP_STRATEGY(bp->b_vp, bp);
+
+ if( (oldflags & B_ASYNC) == 0) {
+ int rtval = biowait(bp);
+
+ if (oldflags & B_DELWRI) {
+ s = splbio();
+ reassignbuf(bp, bp->b_vp);
+ splx(s);
+ }
+
+ brelse(bp);
+ return (rtval);
+ }
+
+ return (0);
+}
+
+/*
+ * nfs special file access vnode op.
+ * Essentially just get vattr and then imitate iaccess() since the device is
+ * local to the client.
+ */
+static int
+nfsspec_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vattr *vap;
+ register gid_t *gp;
+ register struct ucred *cred = ap->a_cred;
+ struct vnode *vp = ap->a_vp;
+ mode_t mode = ap->a_mode;
+ struct vattr vattr;
+ register int i;
+ int error;
+
+ /*
+ * Disallow write attempts on filesystems mounted read-only;
+ * unless the file is a socket, fifo, or a block or character
+ * device resident on the filesystem.
+ */
+ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ switch (vp->v_type) {
+ case VREG:
+ case VDIR:
+ case VLNK:
+ return (EROFS);
+ default:
+ break;
+ }
+ }
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (cred->cr_uid == 0)
+ return (0);
+ vap = &vattr;
+ error = VOP_GETATTR(vp, vap, cred, ap->a_p);
+ if (error)
+ return (error);
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (cred->cr_uid != vap->va_uid) {
+ mode >>= 3;
+ gp = cred->cr_groups;
+ for (i = 0; i < cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ mode >>= 3;
+found:
+ ;
+ }
+ error = (vap->va_mode & mode) == mode ? 0 : EACCES;
+ return (error);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+static int
+nfsspec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ getnanotime(&np->n_atim);
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+static int
+nfsspec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ getnanotime(&np->n_mtim);
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the nfsnode then do device close.
+ */
+static int
+nfsspec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+
+ if (np->n_flag & (NACC | NUPD)) {
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC)
+ vattr.va_atime = np->n_atim;
+ if (np->n_flag & NUPD)
+ vattr.va_mtime = np->n_mtim;
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+/*
+ * Read wrapper for fifos.
+ */
+static int
+nfsfifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ getnanotime(&np->n_atim);
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifos.
+ */
+static int
+nfsfifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ getnanotime(&np->n_mtim);
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifos.
+ *
+ * Update the times on the nfsnode then do fifo close.
+ */
+static int
+nfsfifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+ struct timespec ts;
+
+ if (np->n_flag & (NACC | NUPD)) {
+ getnanotime(&ts);
+ if (np->n_flag & NACC)
+ np->n_atim = ts;
+ if (np->n_flag & NUPD)
+ np->n_mtim = ts;
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC)
+ vattr.va_atime = np->n_atim;
+ if (np->n_flag & NUPD)
+ vattr.va_mtime = np->n_mtim;
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
diff --git a/sys/nfs/nfsdiskless.h b/sys/nfs/nfsdiskless.h
new file mode 100644
index 0000000..487e0bf
--- /dev/null
+++ b/sys/nfs/nfsdiskless.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsdiskless.h 8.2 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NFSDISKLESS_H_
+#define _NFS_NFSDISKLESS_H_
+
+/*
+ * Structure that must be initialized for a diskless nfs client.
+ * This structure is used by nfs_mountroot() to set up the root vnode,
+ * and to do a partial ifconfig(8) and route(8) so that the critical net
+ * interface can communicate with the server.
+ * The primary bootstrap is expected to fill in the appropriate fields before
+ * starting the kernel. Whether or not the swap area is nfs mounted is
+ * determined by the value in swdevt[0]. (equal to NODEV --> swap over nfs)
+ * Currently only works for AF_INET protocols.
+ * NB: All fields are stored in net byte order to avoid hassles with
+ * client/server byte ordering differences.
+ */
+
+/*
+ * I have defined a new structure that can handle an NFS Version 3 file handle
+ * but the kernel still expects the old Version 2 one to be provided. The
+ * changes required in nfs_vfsops.c for using the new are documented there in
+ * comments. (I felt that breaking network booting code by changing this
+ * structure would not be prudent at this time, since almost all servers are
+ * still Version 2 anyhow.)
+ */
+struct nfsv3_diskless {
+ struct ifaliasreq myif; /* Default interface */
+ struct sockaddr_in mygateway; /* Default gateway */
+ struct nfs_args swap_args; /* Mount args for swap file */
+ int swap_fhsize; /* Size of file handle */
+ u_char swap_fh[NFSX_V3FHMAX]; /* Swap file's file handle */
+ struct sockaddr_in swap_saddr; /* Address of swap server */
+ char swap_hostnam[MNAMELEN]; /* Host name for mount pt */
+ int swap_nblks; /* Size of server swap file */
+ struct ucred swap_ucred; /* Swap credentials */
+ struct nfs_args root_args; /* Mount args for root fs */
+ int root_fhsize; /* Size of root file handle */
+ u_char root_fh[NFSX_V3FHMAX]; /* File handle of root dir */
+ struct sockaddr_in root_saddr; /* Address of root server */
+ char root_hostnam[MNAMELEN]; /* Host name for mount pt */
+ long root_time; /* Timestamp of root fs */
+ char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */
+};
+
+/*
+ * Old arguments to mount NFS
+ */
+struct onfs_args {
+ struct sockaddr *addr; /* file server address */
+ int addrlen; /* length of address */
+ int sotype; /* Socket type */
+ int proto; /* and Protocol */
+ u_char *fh; /* File handle to be mounted */
+ int fhsize; /* Size, in bytes, of fh */
+ int flags; /* flags */
+ int wsize; /* write size in bytes */
+ int rsize; /* read size in bytes */
+ int readdirsize; /* readdir size in bytes */
+ int timeo; /* initial timeout in .1 secs */
+ int retrans; /* times to retry send */
+ int maxgrouplist; /* Max. size of group list */
+ int readahead; /* # of blocks to readahead */
+ int leaseterm; /* Term (sec) of lease */
+ int deadthresh; /* Retrans threshold */
+ char *hostname; /* server's name */
+};
+
+struct nfs_diskless {
+ struct ifaliasreq myif; /* Default interface */
+ struct sockaddr_in mygateway; /* Default gateway */
+ struct onfs_args swap_args; /* Mount args for swap file */
+ u_char swap_fh[NFSX_V2FH]; /* Swap file's file handle */
+ struct sockaddr_in swap_saddr; /* Address of swap server */
+ char swap_hostnam[MNAMELEN]; /* Host name for mount pt */
+ int swap_nblks; /* Size of server swap file */
+ struct ucred swap_ucred; /* Swap credentials */
+ struct onfs_args root_args; /* Mount args for root fs */
+ u_char root_fh[NFSX_V2FH]; /* File handle of root dir */
+ struct sockaddr_in root_saddr; /* Address of root server */
+ char root_hostnam[MNAMELEN]; /* Host name for mount pt */
+ long root_time; /* Timestamp of root fs */
+ char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */
+};
+
+#endif
diff --git a/sys/nfs/nfsm_subs.h b/sys/nfs/nfsm_subs.h
new file mode 100644
index 0000000..b13c009
--- /dev/null
+++ b/sys/nfs/nfsm_subs.h
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.2 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NFSM_SUBS_H_
+#define _NFS_NFSM_SUBS_H_
+
+struct ucred;
+struct vnode;
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+struct mbuf *nfsm_reqh __P((struct vnode *vp, u_long procid, int hsiz,
+ caddr_t *bposp));
+struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
+ int auth_type, int auth_len, char *auth_str,
+ int verf_len, char *verf_str,
+ struct mbuf *mrest, int mrest_len,
+ struct mbuf **mbp, u_int32_t *xidp));
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ do { \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat; \
+ } while (0)
+#define NFSMADV(m, s) \
+ do { \
+ (m)->m_data += (s); \
+ } while (0)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ do { \
+ if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); \
+ } while (0)
+
+#define nfsm_dissect(a, c, s) \
+ do { \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if ((t1 = nfsm_disct(&md, &dpos, (s), t1, &cp2)) != 0){ \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } \
+ } while (0)
+
+#define nfsm_fhtom(v, v3) \
+ do { \
+ if (v3) { \
+ t2 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl, u_int32_t *, t2); \
+ *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); \
+ *(tl + ((t2>>2) - 2)) = 0; \
+ bcopy((caddr_t)VTONFS(v)->n_fhp,(caddr_t)tl, \
+ VTONFS(v)->n_fhsize); \
+ } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, \
+ (caddr_t)VTONFS(v)->n_fhp, \
+ VTONFS(v)->n_fhsize)) != 0) { \
+ error = t2; \
+ m_freem(mreq); \
+ goto nfsmout; \
+ } \
+ } else { \
+ nfsm_build(cp, caddr_t, NFSX_V2FH); \
+ bcopy((caddr_t)VTONFS(v)->n_fhp, cp, NFSX_V2FH); \
+ } \
+ } while (0)
+
+#define nfsm_srvfhtom(f, v3) \
+ do { \
+ if (v3) { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FH);\
+ *tl++ = txdr_unsigned(NFSX_V3FH); \
+ bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \
+ } else { \
+ nfsm_build(cp, caddr_t, NFSX_V2FH); \
+ bcopy((caddr_t)(f), cp, NFSX_V2FH); \
+ } \
+ } while (0)
+
+#define nfsm_srvpostop_fh(f) \
+ do { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_V3FH); \
+ *tl++ = nfs_true; \
+ *tl++ = txdr_unsigned(NFSX_V3FH); \
+ bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \
+ } while (0)
+
+#define nfsm_mtofh(d, v, v3, f) \
+ do { \
+ struct nfsnode *ttnp; nfsfh_t *ttfhp; int ttfhsize; \
+ if (v3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (f) = fxdr_unsigned(int, *tl); \
+ } else \
+ (f) = 1; \
+ if (f) { \
+ nfsm_getfh(ttfhp, ttfhsize, (v3)); \
+ if ((t1 = nfs_nget((d)->v_mount, ttfhp, ttfhsize, \
+ &ttnp)) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(ttnp); \
+ } \
+ if (v3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (f) \
+ (f) = fxdr_unsigned(int, *tl); \
+ else if (fxdr_unsigned(int, *tl)) \
+ nfsm_adv(NFSX_V3FATTR); \
+ } \
+ if (f) \
+ nfsm_loadattr((v), (struct vattr *)0); \
+ } while (0)
+
+#define nfsm_getfh(f, s, v3) \
+ do { \
+ if (v3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int, *tl)) <= 0 || \
+ (s) > NFSX_V3FHMAX) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } \
+ } else \
+ (s) = NFSX_V2FH; \
+ nfsm_dissect((f), nfsfh_t *, nfsm_rndup(s)); \
+ } while (0)
+
+#define nfsm_loadattr(v, a) \
+ do { \
+ struct vnode *ttvp = (v); \
+ if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a))) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = ttvp; \
+ } while (0)
+
+#define nfsm_postop_attr(v, f) \
+ do { \
+ struct vnode *ttvp = (v); \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (((f) = fxdr_unsigned(int, *tl)) != 0) { \
+ if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \
+ (struct vattr *)0)) != 0) { \
+ error = t1; \
+ (f) = 0; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = ttvp; \
+ } \
+ } while (0)
+
+/* Used as (f) for nfsm_wcc_data() */
+#define NFSV3_WCCRATTR 0
+#define NFSV3_WCCCHK 1
+
+#define nfsm_wcc_data(v, f) \
+ do { \
+ int ttattrf, ttretf = 0; \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED); \
+ if (f) \
+ ttretf = (VTONFS(v)->n_mtime == \
+ fxdr_unsigned(u_int32_t, *(tl + 2))); \
+ } \
+ nfsm_postop_attr((v), ttattrf); \
+ if (f) { \
+ (f) = ttretf; \
+ } else { \
+ (f) = ttattrf; \
+ } \
+ } while (0)
+
+/* If full is true, set all fields, otherwise just set mode and time fields */
+#define nfsm_v3attrbuild(a, full) \
+ do { \
+ if ((a)->va_mode != (mode_t)VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ *tl = txdr_unsigned((a)->va_mode); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((full) && (a)->va_uid != (uid_t)VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ *tl = txdr_unsigned((a)->va_uid); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((full) && (a)->va_gid != (gid_t)VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ *tl = txdr_unsigned((a)->va_gid); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((full) && (a)->va_size != VNOVAL) { \
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); \
+ *tl++ = nfs_true; \
+ txdr_hyper((a)->va_size, tl); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = nfs_false; \
+ } \
+ if ((a)->va_atime.tv_sec != VNOVAL) { \
+ if ((a)->va_atime.tv_sec != time_second) { \
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\
+ *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\
+ txdr_nfsv3time(&(a)->va_atime, tl); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \
+ } \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \
+ } \
+ if ((a)->va_mtime.tv_sec != VNOVAL) { \
+ if ((a)->va_mtime.tv_sec != time_second) { \
+ nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\
+ *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\
+ txdr_nfsv3time(&(a)->va_mtime, tl); \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \
+ } \
+ } else { \
+ nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \
+ *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \
+ } \
+ } while (0)
+
+
+#define nfsm_strsiz(s,m) \
+ do { \
+ nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int32_t,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_srvstrsiz(s,m) \
+ do { \
+ nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int32_t,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } \
+ } while (0)
+
+#define nfsm_srvnamesiz(s) \
+ do { \
+ nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(int32_t,*tl)) > NFS_MAXNAMLEN) \
+ error = NFSERR_NAMETOL; \
+ if ((s) <= 0) \
+ error = EBADRPC; \
+ if (error) \
+ nfsm_reply(0); \
+ } while (0)
+
+#define nfsm_mtouio(p,s) \
+ do {\
+ if ((s) > 0 && \
+ (t1 = nfsm_mbuftouio(&md,(p),(s),&dpos)) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_uiotom(p,s) \
+ do { \
+ if ((t1 = nfsm_uiotombuf((p),&mb,(s),&bpos)) != 0) { \
+ error = t1; \
+ m_freem(mreq); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_reqhead(v,a,s) \
+ do { \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos); \
+ } while (0)
+
+#define nfsm_reqdone \
+ do { \
+ m_freem(mrep); \
+ nfsmout: \
+ } while (0)
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ do { \
+ if ((error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) != 0) { \
+ if (error & NFSERR_RETERR) \
+ error &= ~NFSERR_RETERR; \
+ else \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_strtom(a,s,m) \
+ do {\
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_int32_t *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((const char *)(a), (caddr_t)tl, (s)); \
+ } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, (a), (s))) != 0) { \
+ error = t2; \
+ m_freem(mreq); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_srvdone \
+ do { \
+ nfsmout: \
+ return (error); \
+ } while (0)
+
+#define nfsm_reply(s) \
+ do { \
+ nfsd->nd_repstat = error; \
+ if (error && !(nfsd->nd_flag & ND_NFSV3)) \
+ (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ if (mrep != NULL) { \
+ m_freem(mrep); \
+ mrep = NULL; \
+ } \
+ mreq = *mrq; \
+ if (error && (!(nfsd->nd_flag & ND_NFSV3) || \
+ error == EBADRPC)) { \
+ error = 0; \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_writereply(s, v3) \
+ do { \
+ nfsd->nd_repstat = error; \
+ if (error && !(v3)) \
+ (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \
+ &mreq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \
+ &mreq, &mb, &bpos); \
+ } while (0)
+
+#define nfsm_adv(s) \
+ do { \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if ((t1 = nfs_adv(&md, &dpos, (s), t1)) != 0) { \
+ error = t1; \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ } while (0)
+
+#define nfsm_srvmtofh(f) \
+ do { \
+ int fhlen; \
+ if (nfsd->nd_flag & ND_NFSV3) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ fhlen = fxdr_unsigned(int, *tl); \
+ if (fhlen != 0 && fhlen != NFSX_V3FH) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } \
+ } else { \
+ fhlen = NFSX_V2FH; \
+ } \
+ if (fhlen != 0) { \
+ nfsm_dissect(tl, u_int32_t *, fhlen); \
+ bcopy((caddr_t)tl, (caddr_t)(f), fhlen); \
+ } else {\
+ bzero((caddr_t)(f), NFSX_V3FH); \
+ } \
+ } while (0)
+
+#define nfsm_clget \
+ do { \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_int32_t *)bp; \
+ } while (0)
+
+#define nfsm_srvfillattr(a, f) \
+ do { \
+ nfsm_srvfattr(nfsd, (a), (f)); \
+ } while (0)
+
+#define nfsm_srvwcc_data(br, b, ar, a) \
+ do { \
+ nfsm_srvwcc(nfsd, (br), (b), (ar), (a), &mb, &bpos); \
+ } while (0)
+
+#define nfsm_srvpostop_attr(r, a) \
+ do { \
+ nfsm_srvpostopattr(nfsd, (r), (a), &mb, &bpos); \
+ } while (0)
+
+#define nfsm_srvsattr(a) \
+ do { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (a)->va_mode = nfstov_mode(*tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (a)->va_uid = fxdr_unsigned(uid_t, *tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ (a)->va_gid = fxdr_unsigned(gid_t, *tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ if (*tl == nfs_true) { \
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ (a)->va_size = fxdr_hyper(tl); \
+ } \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ switch (fxdr_unsigned(int, *tl)) { \
+ case NFSV3SATTRTIME_TOCLIENT: \
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ fxdr_nfsv3time(tl, &(a)->va_atime); \
+ break; \
+ case NFSV3SATTRTIME_TOSERVER: \
+ getnanotime(&(a)->va_atime); \
+ break; \
+ }; \
+ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \
+ switch (fxdr_unsigned(int, *tl)) { \
+ case NFSV3SATTRTIME_TOCLIENT: \
+ nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \
+ fxdr_nfsv3time(tl, &(a)->va_mtime); \
+ break; \
+ case NFSV3SATTRTIME_TOSERVER: \
+ getnanotime(&(a)->va_mtime); \
+ break; \
+ } \
+ } while (0)
+
+#endif
diff --git a/sys/nfs/nfsmount.h b/sys/nfs/nfsmount.h
new file mode 100644
index 0000000..77ac7e3
--- /dev/null
+++ b/sys/nfs/nfsmount.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsmount.h 8.3 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NFSMOUNT_H_
+#define _NFS_NFSMOUNT_H_
+
+/*
+ * Mount structure.
+ * One allocated on every NFS mount.
+ * Holds NFS specific information for mount.
+ */
+struct nfsmount {
+ int nm_flag; /* Flags for soft/hard... */
+ int nm_state; /* Internal state flags */
+ struct mount *nm_mountp; /* Vfs structure for this filesystem */
+ int nm_numgrps; /* Max. size of groupslist */
+ u_char nm_fh[NFSX_V3FHMAX]; /* File handle of root dir */
+ int nm_fhsize; /* Size of root file handle */
+ struct socket *nm_so; /* Rpc socket */
+ int nm_sotype; /* Type of socket */
+ int nm_soproto; /* and protocol */
+ int nm_soflags; /* pr_flags for socket protocol */
+ struct sockaddr *nm_nam; /* Addr of server */
+ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */
+ int nm_retry; /* Max retries */
+ int nm_srtt[4]; /* Timers for rpcs */
+ int nm_sdrtt[4];
+ int nm_sent; /* Request send count */
+ int nm_cwnd; /* Request send window */
+ int nm_timeouts; /* Request timeouts */
+ int nm_deadthresh; /* Threshold of timeouts-->dead server*/
+ int nm_rsize; /* Max size of read rpc */
+ int nm_wsize; /* Max size of write rpc */
+ int nm_readdirsize; /* Size of a readdir rpc */
+ int nm_readahead; /* Num. of blocks to readahead */
+ int nm_leaseterm; /* Term (sec) for NQNFS lease */
+ int nm_acdirmin; /* Directory attr cache min lifetime */
+ int nm_acdirmax; /* Directory attr cache max lifetime */
+ int nm_acregmin; /* Reg file attr cache min lifetime */
+ int nm_acregmax; /* Reg file attr cache max lifetime */
+ CIRCLEQ_HEAD(, nfsnode) nm_timerhead; /* Head of lease timer queue */
+ struct vnode *nm_inprog; /* Vnode in prog by nqnfs_clientd() */
+ uid_t nm_authuid; /* Uid for authenticator */
+ int nm_authtype; /* Authenticator type */
+ int nm_authlen; /* and length */
+ char *nm_authstr; /* Authenticator string */
+ char *nm_verfstr; /* and the verifier */
+ int nm_verflen;
+ u_char nm_verf[NFSX_V3WRITEVERF]; /* V3 write verifier */
+ NFSKERBKEY_T nm_key; /* and the session key */
+ int nm_numuids; /* Number of nfsuid mappings */
+ TAILQ_HEAD(, nfsuid) nm_uidlruhead; /* Lists of nfsuid mappings */
+ LIST_HEAD(, nfsuid) nm_uidhashtbl[NFS_MUIDHASHSIZ];
+ TAILQ_HEAD(, buf) nm_bufq; /* async io buffer queue */
+ short nm_bufqlen; /* number of buffers in queue */
+ short nm_bufqwant; /* process wants to add to the queue */
+ int nm_bufqiods; /* number of iods processing queue */
+ u_int64_t nm_maxfilesize; /* maximum file size */
+};
+
+#if defined(KERNEL) || defined(_KERNEL)
+/*
+ * Convert mount ptr to nfsmount ptr.
+ */
+#define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data))
+
+#endif /* KERNEL */
+
+#endif
diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
new file mode 100644
index 0000000..e654bc0
--- /dev/null
+++ b/sys/nfs/nfsnode.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsnode.h 8.9 (Berkeley) 5/14/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NFSNODE_H_
+#define _NFS_NFSNODE_H_
+
+#if !defined(_NFS_NFS_H_) && !defined(KERNEL)
+#include <nfs/nfs.h>
+#endif
+
+/*
+ * Silly rename structure that hangs off the nfsnode until the name
+ * can be removed by nfs_inactive()
+ */
+struct sillyrename {
+ struct ucred *s_cred;
+ struct vnode *s_dvp;
+ long s_namlen;
+ char s_name[20];
+};
+
+/*
+ * This structure is used to save the logical directory offset to
+ * NFS cookie mappings.
+ * The mappings are stored in a list headed
+ * by n_cookies, as required.
+ * There is one mapping for each NFS_DIRBLKSIZ bytes of directory information
+ * stored in increasing logical offset byte order.
+ */
+#define NFSNUMCOOKIES 31
+
+struct nfsdmap {
+ LIST_ENTRY(nfsdmap) ndm_list;
+ int ndm_eocookie;
+ nfsuint64 ndm_cookies[NFSNUMCOOKIES];
+};
+
+/*
+ * The nfsnode is the nfs equivalent to ufs's inode. Any similarity
+ * is purely coincidental.
+ * There is a unique nfsnode allocated for each active file,
+ * each current directory, each mounted-on file, text file, and the root.
+ * An nfsnode is 'named' by its file handle. (nget/nfs_node.c)
+ * If this structure exceeds 256 bytes (it is currently 256 using 4.4BSD-Lite
+ * type definitions), file handles of > 32 bytes should probably be split out
+ * into a separate MALLOC()'d data structure. (Reduce the size of nfsfh_t by
+ * changing the definition in nfsproto.h of NFS_SMALLFH.)
+ * NB: Hopefully the current order of the fields is such that everything will
+ * be well aligned and, therefore, tightly packed.
+ */
+struct nfsnode {
+ LIST_ENTRY(nfsnode) n_hash; /* Hash chain */
+ CIRCLEQ_ENTRY(nfsnode) n_timer; /* Nqnfs timer chain */
+ u_quad_t n_size; /* Current size of file */
+ u_quad_t n_brev; /* Modify rev when cached */
+ u_quad_t n_lrev; /* Modify rev for lease */
+ struct vattr n_vattr; /* Vnode attribute cache */
+ time_t n_attrstamp; /* Attr. cache timestamp */
+ u_int32_t n_mode; /* ACCESS mode cache */
+ uid_t n_modeuid; /* credentials having mode */
+ time_t n_modestamp; /* mode cache timestamp */
+ time_t n_mtime; /* Prev modify time. */
+ time_t n_ctime; /* Prev create time. */
+ time_t n_expiry; /* Lease expiry time */
+ nfsfh_t *n_fhp; /* NFS File Handle */
+ struct vnode *n_vnode; /* associated vnode */
+ struct lockf *n_lockf; /* Locking record of file */
+ int n_error; /* Save write error value */
+ union {
+ struct timespec nf_atim; /* Special file times */
+ nfsuint64 nd_cookieverf; /* Cookie verifier (dir only) */
+ } n_un1;
+ union {
+ struct timespec nf_mtim;
+ off_t nd_direof; /* Dir. EOF offset cache */
+ } n_un2;
+ union {
+ struct sillyrename *nf_silly; /* Ptr to silly rename struct */
+ LIST_HEAD(, nfsdmap) nd_cook; /* cookies */
+ } n_un3;
+ short n_fhsize; /* size in bytes, of fh */
+ short n_flag; /* Flag for locking.. */
+ nfsfh_t n_fh; /* Small File Handle */
+ struct lock n_rslock;
+};
+
+#define n_atim n_un1.nf_atim
+#define n_mtim n_un2.nf_mtim
+#define n_sillyrename n_un3.nf_silly
+#define n_cookieverf n_un1.nd_cookieverf
+#define n_direofoffset n_un2.nd_direof
+#define n_cookies n_un3.nd_cook
+
+/*
+ * Flags for n_flag
+ */
+#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */
+#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */
+#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
+#define NWRITEERR 0x0008 /* Flag write errors so close will know */
+#define NQNFSNONCACHE 0x0020 /* Non-cachable lease */
+#define NQNFSWRITE 0x0040 /* Write lease */
+#define NQNFSEVICTED 0x0080 /* Has been evicted */
+#define NACC 0x0100 /* Special file accessed */
+#define NUPD 0x0200 /* Special file updated */
+#define NCHG 0x0400 /* Special file times changed */
+#define NLOCKED 0x0800 /* node is locked */
+#define NWANTED 0x0100 /* someone wants to lock */
+
+/*
+ * Convert between nfsnode pointers and vnode pointers
+ */
+#define VTONFS(vp) ((struct nfsnode *)(vp)->v_data)
+#define NFSTOV(np) ((struct vnode *)(np)->n_vnode)
+
+/*
+ * Queue head for nfsiod's
+ */
+extern TAILQ_HEAD(nfs_bufq, buf) nfs_bufq;
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
+
+#if defined(KERNEL) || defined(_KERNEL)
+
+/*
+ * nfs_rslock - Attempt to obtain lock on nfsnode
+ *
+ * Attempt to obtain a lock on the passed nfsnode, returning ENOLCK
+ * if the lock could not be obtained due to our having to sleep. This
+ * function is generally used to lock around code that modifies an
+ * NFS file's size. In order to avoid deadlocks the lock
+ * should not be obtained while other locks are being held.
+ */
+
+static __inline
+int
+nfs_rslock(struct nfsnode *np, struct proc *p)
+{
+ return(lockmgr(&np->n_rslock, LK_EXCLUSIVE | LK_CANRECURSE | LK_SLEEPFAIL, NULL, p));
+}
+
+static __inline
+void
+nfs_rsunlock(struct nfsnode *np, struct proc *p)
+{
+ (void)lockmgr(&np->n_rslock, LK_RELEASE, NULL, p);
+}
+
+extern vop_t **fifo_nfsv2nodeop_p;
+extern vop_t **nfsv2_vnodeop_p;
+extern vop_t **spec_nfsv2nodeop_p;
+
+/*
+ * Prototypes for NFS vnode operations
+ */
+int nfs_getpages __P((struct vop_getpages_args *));
+int nfs_putpages __P((struct vop_putpages_args *));
+int nfs_write __P((struct vop_write_args *));
+int nqnfs_vop_lease_check __P((struct vop_lease_args *));
+int nfs_inactive __P((struct vop_inactive_args *));
+int nfs_reclaim __P((struct vop_reclaim_args *));
+
+/* other stuff */
+int nfs_removeit __P((struct sillyrename *));
+int nfs_nget __P((struct mount *,nfsfh_t *,int,struct nfsnode **));
+nfsuint64 *nfs_getcookie __P((struct nfsnode *, off_t, int));
+void nfs_invaldir __P((struct vnode *));
+
+#define nqnfs_lease_updatetime nfs_lease_updatetime
+
+#endif /* KERNEL */
+
+#endif
diff --git a/sys/nfs/nfsproto.h b/sys/nfs/nfsproto.h
new file mode 100644
index 0000000..cbb2832
--- /dev/null
+++ b/sys/nfs/nfsproto.h
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsproto.h 8.2 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+#ifndef _NFS_NFSPROTO_H_
+#define _NFS_NFSPROTO_H_
+
+/*
+ * nfs definitions as per the Version 2 and 3 specs
+ */
+
+/*
+ * Constants as defined in the Sun NFS Version 2 and 3 specs.
+ * "NFS: Network File System Protocol Specification" RFC1094
+ * and in the "NFS: Network File System Version 3 Protocol
+ * Specification"
+ */
+
+#define NFS_PORT 2049
+#define NFS_PROG 100003
+#define NFS_VER2 2
+#define NFS_VER3 3
+#define NFS_V2MAXDATA 8192
+#define NFS_MAXDGRAMDATA 16384
+#define NFS_MAXDATA 32768
+#define NFS_MAXPATHLEN 1024
+#define NFS_MAXNAMLEN 255
+#define NFS_MAXPKTHDR 404
+#define NFS_MAXPACKET (NFS_MAXPKTHDR + NFS_MAXDATA)
+#define NFS_MINPACKET 20
+#define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */
+
+/* Stat numbers for rpc returns (version 2 and 3) */
+#define NFS_OK 0
+#define NFSERR_PERM 1
+#define NFSERR_NOENT 2
+#define NFSERR_IO 5
+#define NFSERR_NXIO 6
+#define NFSERR_ACCES 13
+#define NFSERR_EXIST 17
+#define NFSERR_XDEV 18 /* Version 3 only */
+#define NFSERR_NODEV 19
+#define NFSERR_NOTDIR 20
+#define NFSERR_ISDIR 21
+#define NFSERR_INVAL 22 /* Version 3 only */
+#define NFSERR_FBIG 27
+#define NFSERR_NOSPC 28
+#define NFSERR_ROFS 30
+#define NFSERR_MLINK 31 /* Version 3 only */
+#define NFSERR_NAMETOL 63
+#define NFSERR_NOTEMPTY 66
+#define NFSERR_DQUOT 69
+#define NFSERR_STALE 70
+#define NFSERR_REMOTE 71 /* Version 3 only */
+#define NFSERR_WFLUSH 99 /* Version 2 only */
+#define NFSERR_BADHANDLE 10001 /* The rest Version 3 only */
+#define NFSERR_NOT_SYNC 10002
+#define NFSERR_BAD_COOKIE 10003
+#define NFSERR_NOTSUPP 10004
+#define NFSERR_TOOSMALL 10005
+#define NFSERR_SERVERFAULT 10006
+#define NFSERR_BADTYPE 10007
+#define NFSERR_JUKEBOX 10008
+#define NFSERR_TRYLATER NFSERR_JUKEBOX
+#define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */
+
+#define NFSERR_RETVOID 0x20000000 /* Return void, not error */
+#define NFSERR_AUTHERR 0x40000000 /* Mark an authentication error */
+#define NFSERR_RETERR 0x80000000 /* Mark an error return for V3 */
+
+/* Sizes in bytes of various nfs rpc components */
+#define NFSX_UNSIGNED 4
+
+/* specific to NFS Version 2 */
+#define NFSX_V2FH 32
+#define NFSX_V2FATTR 68
+#define NFSX_V2SATTR 32
+#define NFSX_V2COOKIE 4
+#define NFSX_V2STATFS 20
+
+/* specific to NFS Version 3 */
+#define NFSX_V3FH (sizeof (fhandle_t)) /* size this server uses */
+#define NFSX_V3FHMAX 64 /* max. allowed by protocol */
+#define NFSX_V3FATTR 84
+#define NFSX_V3SATTR 60 /* max. all fields filled in */
+#define NFSX_V3SRVSATTR (sizeof (struct nfsv3_sattr))
+#define NFSX_V3POSTOPATTR (NFSX_V3FATTR + NFSX_UNSIGNED)
+#define NFSX_V3WCCDATA (NFSX_V3POSTOPATTR + 8 * NFSX_UNSIGNED)
+#define NFSX_V3COOKIEVERF 8
+#define NFSX_V3WRITEVERF 8
+#define NFSX_V3CREATEVERF 8
+#define NFSX_V3STATFS 52
+#define NFSX_V3FSINFO 48
+#define NFSX_V3PATHCONF 24
+
+/* variants for both versions */
+#define NFSX_FH(v3) ((v3) ? (NFSX_V3FHMAX + NFSX_UNSIGNED) : \
+ NFSX_V2FH)
+#define NFSX_SRVFH(v3) ((v3) ? NFSX_V3FH : NFSX_V2FH)
+#define NFSX_FATTR(v3) ((v3) ? NFSX_V3FATTR : NFSX_V2FATTR)
+#define NFSX_PREOPATTR(v3) ((v3) ? (7 * NFSX_UNSIGNED) : 0)
+#define NFSX_POSTOPATTR(v3) ((v3) ? (NFSX_V3FATTR + NFSX_UNSIGNED) : 0)
+#define NFSX_POSTOPORFATTR(v3) ((v3) ? (NFSX_V3FATTR + NFSX_UNSIGNED) : \
+ NFSX_V2FATTR)
+#define NFSX_WCCDATA(v3) ((v3) ? NFSX_V3WCCDATA : 0)
+#define NFSX_WCCORFATTR(v3) ((v3) ? NFSX_V3WCCDATA : NFSX_V2FATTR)
+#define NFSX_SATTR(v3) ((v3) ? NFSX_V3SATTR : NFSX_V2SATTR)
+#define NFSX_COOKIEVERF(v3) ((v3) ? NFSX_V3COOKIEVERF : 0)
+#define NFSX_WRITEVERF(v3) ((v3) ? NFSX_V3WRITEVERF : 0)
+#define NFSX_READDIR(v3) ((v3) ? (5 * NFSX_UNSIGNED) : \
+ (2 * NFSX_UNSIGNED))
+#define NFSX_STATFS(v3) ((v3) ? NFSX_V3STATFS : NFSX_V2STATFS)
+
+/* nfs rpc procedure numbers (before version mapping) */
+#define NFSPROC_NULL 0
+#define NFSPROC_GETATTR 1
+#define NFSPROC_SETATTR 2
+#define NFSPROC_LOOKUP 3
+#define NFSPROC_ACCESS 4
+#define NFSPROC_READLINK 5
+#define NFSPROC_READ 6
+#define NFSPROC_WRITE 7
+#define NFSPROC_CREATE 8
+#define NFSPROC_MKDIR 9
+#define NFSPROC_SYMLINK 10
+#define NFSPROC_MKNOD 11
+#define NFSPROC_REMOVE 12
+#define NFSPROC_RMDIR 13
+#define NFSPROC_RENAME 14
+#define NFSPROC_LINK 15
+#define NFSPROC_READDIR 16
+#define NFSPROC_READDIRPLUS 17
+#define NFSPROC_FSSTAT 18
+#define NFSPROC_FSINFO 19
+#define NFSPROC_PATHCONF 20
+#define NFSPROC_COMMIT 21
+
+/* And leasing (nqnfs) procedure numbers (must be last) */
+#define NQNFSPROC_GETLEASE 22
+#define NQNFSPROC_VACATED 23
+#define NQNFSPROC_EVICTED 24
+
+#define NFSPROC_NOOP 25
+#define NFS_NPROCS 26
+
+/* Actual Version 2 procedure numbers */
+#define NFSV2PROC_NULL 0
+#define NFSV2PROC_GETATTR 1
+#define NFSV2PROC_SETATTR 2
+#define NFSV2PROC_NOOP 3
+#define NFSV2PROC_ROOT NFSV2PROC_NOOP /* Obsolete */
+#define NFSV2PROC_LOOKUP 4
+#define NFSV2PROC_READLINK 5
+#define NFSV2PROC_READ 6
+#define NFSV2PROC_WRITECACHE NFSV2PROC_NOOP /* Obsolete */
+#define NFSV2PROC_WRITE 8
+#define NFSV2PROC_CREATE 9
+#define NFSV2PROC_REMOVE 10
+#define NFSV2PROC_RENAME 11
+#define NFSV2PROC_LINK 12
+#define NFSV2PROC_SYMLINK 13
+#define NFSV2PROC_MKDIR 14
+#define NFSV2PROC_RMDIR 15
+#define NFSV2PROC_READDIR 16
+#define NFSV2PROC_STATFS 17
+
+/*
+ * Constants used by the Version 3 protocol for various RPCs
+ */
+#define NFSV3SATTRTIME_DONTCHANGE 0
+#define NFSV3SATTRTIME_TOSERVER 1
+#define NFSV3SATTRTIME_TOCLIENT 2
+
+#define NFSV3ACCESS_READ 0x01
+#define NFSV3ACCESS_LOOKUP 0x02
+#define NFSV3ACCESS_MODIFY 0x04
+#define NFSV3ACCESS_EXTEND 0x08
+#define NFSV3ACCESS_DELETE 0x10
+#define NFSV3ACCESS_EXECUTE 0x20
+
+#define NFSV3WRITE_UNSTABLE 0
+#define NFSV3WRITE_DATASYNC 1
+#define NFSV3WRITE_FILESYNC 2
+
+#define NFSV3CREATE_UNCHECKED 0
+#define NFSV3CREATE_GUARDED 1
+#define NFSV3CREATE_EXCLUSIVE 2
+
+#define NFSV3FSINFO_LINK 0x01
+#define NFSV3FSINFO_SYMLINK 0x02
+#define NFSV3FSINFO_HOMOGENEOUS 0x08
+#define NFSV3FSINFO_CANSETTIME 0x10
+
+/* Conversion macros */
+#define vtonfsv2_mode(t,m) \
+ txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \
+ MAKEIMODE((t), (m)))
+#define vtonfsv3_mode(m) txdr_unsigned((m) & ALLPERMS)
+#define nfstov_mode(a) (fxdr_unsigned(u_int32_t, (a)) & ALLPERMS)
+#define vtonfsv2_type(a) txdr_unsigned(nfsv2_type[((int32_t)(a))])
+#define vtonfsv3_type(a) txdr_unsigned(nfsv3_type[((int32_t)(a))])
+#define nfsv2tov_type(a) nv2tov_type[fxdr_unsigned(u_int32_t,(a))&0x7]
+#define nfsv3tov_type(a) nv3tov_type[fxdr_unsigned(u_int32_t,(a))&0x7]
+
+/* File types */
+typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5,
+ NFSOCK=6, NFFIFO=7 } nfstype;
+
+/* Structs for common parts of the rpc's */
+/*
+ * File Handle (32 bytes for version 2), variable up to 64 for version 3.
+ * File Handles of up to NFS_SMALLFH in size are stored directly in the
+ * nfs node, whereas larger ones are malloc'd. (This never happens when
+ * NFS_SMALLFH is set to 64.)
+ * NFS_SMALLFH should be in the range of 32 to 64 and be divisible by 4.
+ */
+#ifndef NFS_SMALLFH
+#define NFS_SMALLFH 64
+#endif
+union nfsfh {
+ fhandle_t fh_generic;
+ u_char fh_bytes[NFS_SMALLFH];
+};
+typedef union nfsfh nfsfh_t;
+
+struct nfsv2_time {
+ u_int32_t nfsv2_sec;
+ u_int32_t nfsv2_usec;
+};
+typedef struct nfsv2_time nfstime2;
+
+struct nfsv3_time {
+ u_int32_t nfsv3_sec;
+ u_int32_t nfsv3_nsec;
+};
+typedef struct nfsv3_time nfstime3;
+
+/*
+ * Quads are defined as arrays of 2 longs to ensure dense packing for the
+ * protocol and to facilitate xdr conversion.
+ */
+struct nfs_uquad {
+ u_int32_t nfsuquad[2];
+};
+typedef struct nfs_uquad nfsuint64;
+
+/*
+ * Used to convert between two u_longs and a u_quad_t.
+ */
+union nfs_quadconvert {
+ u_int32_t lval[2];
+ u_quad_t qval;
+};
+typedef union nfs_quadconvert nfsquad_t;
+
+/*
+ * NFS Version 3 special file number.
+ */
+struct nfsv3_spec {
+ u_int32_t specdata1;
+ u_int32_t specdata2;
+};
+typedef struct nfsv3_spec nfsv3spec;
+
+/*
+ * File attributes and setable attributes. These structures cover both
+ * NFS version 2 and the version 3 protocol. Note that the union is only
+ * used so that one pointer can refer to both variants. These structures
+ * go out on the wire and must be densely packed, so no quad data types
+ * are used. (all fields are longs or u_longs or structures of same)
+ * NB: You can't do sizeof(struct nfs_fattr), you must use the
+ * NFSX_FATTR(v3) macro.
+ */
+struct nfs_fattr {
+ u_int32_t fa_type;
+ u_int32_t fa_mode;
+ u_int32_t fa_nlink;
+ u_int32_t fa_uid;
+ u_int32_t fa_gid;
+ union {
+ struct {
+ u_int32_t nfsv2fa_size;
+ u_int32_t nfsv2fa_blocksize;
+ u_int32_t nfsv2fa_rdev;
+ u_int32_t nfsv2fa_blocks;
+ u_int32_t nfsv2fa_fsid;
+ u_int32_t nfsv2fa_fileid;
+ nfstime2 nfsv2fa_atime;
+ nfstime2 nfsv2fa_mtime;
+ nfstime2 nfsv2fa_ctime;
+ } fa_nfsv2;
+ struct {
+ nfsuint64 nfsv3fa_size;
+ nfsuint64 nfsv3fa_used;
+ nfsv3spec nfsv3fa_rdev;
+ nfsuint64 nfsv3fa_fsid;
+ nfsuint64 nfsv3fa_fileid;
+ nfstime3 nfsv3fa_atime;
+ nfstime3 nfsv3fa_mtime;
+ nfstime3 nfsv3fa_ctime;
+ } fa_nfsv3;
+ } fa_un;
+};
+
+/* and some ugly defines for accessing union components */
+#define fa2_size fa_un.fa_nfsv2.nfsv2fa_size
+#define fa2_blocksize fa_un.fa_nfsv2.nfsv2fa_blocksize
+#define fa2_rdev fa_un.fa_nfsv2.nfsv2fa_rdev
+#define fa2_blocks fa_un.fa_nfsv2.nfsv2fa_blocks
+#define fa2_fsid fa_un.fa_nfsv2.nfsv2fa_fsid
+#define fa2_fileid fa_un.fa_nfsv2.nfsv2fa_fileid
+#define fa2_atime fa_un.fa_nfsv2.nfsv2fa_atime
+#define fa2_mtime fa_un.fa_nfsv2.nfsv2fa_mtime
+#define fa2_ctime fa_un.fa_nfsv2.nfsv2fa_ctime
+#define fa3_size fa_un.fa_nfsv3.nfsv3fa_size
+#define fa3_used fa_un.fa_nfsv3.nfsv3fa_used
+#define fa3_rdev fa_un.fa_nfsv3.nfsv3fa_rdev
+#define fa3_fsid fa_un.fa_nfsv3.nfsv3fa_fsid
+#define fa3_fileid fa_un.fa_nfsv3.nfsv3fa_fileid
+#define fa3_atime fa_un.fa_nfsv3.nfsv3fa_atime
+#define fa3_mtime fa_un.fa_nfsv3.nfsv3fa_mtime
+#define fa3_ctime fa_un.fa_nfsv3.nfsv3fa_ctime
+
+struct nfsv2_sattr {
+ u_int32_t sa_mode;
+ u_int32_t sa_uid;
+ u_int32_t sa_gid;
+ u_int32_t sa_size;
+ nfstime2 sa_atime;
+ nfstime2 sa_mtime;
+};
+
+/*
+ * NFS Version 3 sattr structure for the new node creation case.
+ */
+struct nfsv3_sattr {
+ u_int32_t sa_modetrue;
+ u_int32_t sa_mode;
+ u_int32_t sa_uidfalse;
+ u_int32_t sa_gidfalse;
+ u_int32_t sa_sizefalse;
+ u_int32_t sa_atimetype;
+ nfstime3 sa_atime;
+ u_int32_t sa_mtimetype;
+ nfstime3 sa_mtime;
+};
+
+struct nfs_statfs {
+ union {
+ struct {
+ u_int32_t nfsv2sf_tsize;
+ u_int32_t nfsv2sf_bsize;
+ u_int32_t nfsv2sf_blocks;
+ u_int32_t nfsv2sf_bfree;
+ u_int32_t nfsv2sf_bavail;
+ } sf_nfsv2;
+ struct {
+ nfsuint64 nfsv3sf_tbytes;
+ nfsuint64 nfsv3sf_fbytes;
+ nfsuint64 nfsv3sf_abytes;
+ nfsuint64 nfsv3sf_tfiles;
+ nfsuint64 nfsv3sf_ffiles;
+ nfsuint64 nfsv3sf_afiles;
+ u_int32_t nfsv3sf_invarsec;
+ } sf_nfsv3;
+ } sf_un;
+};
+
+#define sf_tsize sf_un.sf_nfsv2.nfsv2sf_tsize
+#define sf_bsize sf_un.sf_nfsv2.nfsv2sf_bsize
+#define sf_blocks sf_un.sf_nfsv2.nfsv2sf_blocks
+#define sf_bfree sf_un.sf_nfsv2.nfsv2sf_bfree
+#define sf_bavail sf_un.sf_nfsv2.nfsv2sf_bavail
+#define sf_tbytes sf_un.sf_nfsv3.nfsv3sf_tbytes
+#define sf_fbytes sf_un.sf_nfsv3.nfsv3sf_fbytes
+#define sf_abytes sf_un.sf_nfsv3.nfsv3sf_abytes
+#define sf_tfiles sf_un.sf_nfsv3.nfsv3sf_tfiles
+#define sf_ffiles sf_un.sf_nfsv3.nfsv3sf_ffiles
+#define sf_afiles sf_un.sf_nfsv3.nfsv3sf_afiles
+#define sf_invarsec sf_un.sf_nfsv3.nfsv3sf_invarsec
+
+struct nfsv3_fsinfo {
+ u_int32_t fs_rtmax;
+ u_int32_t fs_rtpref;
+ u_int32_t fs_rtmult;
+ u_int32_t fs_wtmax;
+ u_int32_t fs_wtpref;
+ u_int32_t fs_wtmult;
+ u_int32_t fs_dtpref;
+ nfsuint64 fs_maxfilesize;
+ nfstime3 fs_timedelta;
+ u_int32_t fs_properties;
+};
+
+struct nfsv3_pathconf {
+ u_int32_t pc_linkmax;
+ u_int32_t pc_namemax;
+ u_int32_t pc_notrunc;
+ u_int32_t pc_chownrestricted;
+ u_int32_t pc_caseinsensitive;
+ u_int32_t pc_casepreserving;
+};
+
+#endif
diff --git a/sys/nfs/nfsrtt.h b/sys/nfs/nfsrtt.h
new file mode 100644
index 0000000..4473086
--- /dev/null
+++ b/sys/nfs/nfsrtt.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsrtt.h 8.2 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NFSRTT_H_
+#define _NFS_NFSRTT_H_
+
+/*
+ * Definitions for performance monitor.
+ * The client and server logging are turned on by setting the global
+ * constant "nfsrtton" to 1.
+ */
+#define NFSRTTLOGSIZ 128
+
+/*
+ * Circular log of client side rpc activity. Each log entry is for one
+ * rpc filled in upon completion. (ie. in order of completion)
+ * The "pos" is the table index for the "next" entry, therefore the
+ * list goes from nfsrtt.rttl[pos] --> nfsrtt.rttl[pos - 1] in
+ * chronological order of completion.
+ */
+struct nfsrtt {
+ int pos; /* Position in array for next entry */
+ struct rttl {
+ u_int32_t proc; /* NFS procedure number */
+ int rtt; /* Measured round trip time */
+ int rto; /* Round Trip Timeout */
+ int sent; /* # rpcs in progress */
+ int cwnd; /* Send window */
+ int srtt; /* Ave Round Trip Time */
+ int sdrtt; /* Ave mean deviation of RTT */
+ fsid_t fsid; /* Fsid for mount point */
+ struct timeval tstamp; /* Timestamp of log entry */
+ } rttl[NFSRTTLOGSIZ];
+};
+
+/*
+ * And definitions for server side performance monitor.
+ * The log organization is the same as above except it is filled in at the
+ * time the server sends the rpc reply.
+ */
+
+/*
+ * Bits for the flags field.
+ */
+#define DRT_NQNFS 0x01 /* Rpc used Nqnfs protocol */
+#define DRT_TCP 0x02 /* Client used TCP transport */
+#define DRT_CACHEREPLY 0x04 /* Reply was from recent request cache */
+#define DRT_CACHEDROP 0x08 /* Rpc request dropped, due to recent reply */
+#define DRT_NFSV3 0x10 /* Rpc used NFS Version 3 */
+
+/*
+ * Server log structure
+ * NB: ipadr == INADDR_ANY indicates a client using a non IP protocol.
+ * (ISO perhaps?)
+ */
+struct nfsdrt {
+ int pos; /* Position of next log entry */
+ struct drt {
+ int flag; /* Bits as defined above */
+ u_int32_t proc; /* NFS procedure number */
+ u_int32_t ipadr; /* IP address of client */
+ int resptime; /* Response time (usec) */
+ struct timeval tstamp; /* Timestamp of log entry */
+ } drt[NFSRTTLOGSIZ];
+};
+
+#endif
diff --git a/sys/nfs/nfsrvcache.h b/sys/nfs/nfsrvcache.h
new file mode 100644
index 0000000..eec5850
--- /dev/null
+++ b/sys/nfs/nfsrvcache.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsrvcache.h 8.3 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NFSRVCACHE_H_
+#define _NFS_NFSRVCACHE_H_
+
+#include <sys/queue.h>
+
+/*
+ * Definitions for the server recent request cache
+ */
+
+#define NFSRVCACHESIZ 64
+
+struct nfsrvcache {
+ TAILQ_ENTRY(nfsrvcache) rc_lru; /* LRU chain */
+ LIST_ENTRY(nfsrvcache) rc_hash; /* Hash chain */
+ u_int32_t rc_xid; /* rpc id number */
+ union {
+ struct mbuf *ru_repmb; /* Reply mbuf list OR */
+ int ru_repstat; /* Reply status */
+ } rc_un;
+ union nethostaddr rc_haddr; /* Host address */
+ u_int32_t rc_proc; /* rpc proc number */
+ u_char rc_state; /* Current state of request */
+ u_char rc_flag; /* Flag bits */
+};
+
+#define rc_reply rc_un.ru_repmb
+#define rc_status rc_un.ru_repstat
+#define rc_inetaddr rc_haddr.had_inetaddr
+#define rc_nam rc_haddr.had_nam
+
+/* Cache entry states */
+#define RC_UNUSED 0
+#define RC_INPROG 1
+#define RC_DONE 2
+
+/* Return values */
+#define RC_DROPIT 0
+#define RC_REPLY 1
+#define RC_DOIT 2
+#define RC_CHECKIT 3
+
+/* Flag bits */
+#define RC_LOCKED 0x01
+#define RC_WANTED 0x02
+#define RC_REPSTATUS 0x04
+#define RC_REPMBUF 0x08
+#define RC_NQNFS 0x10
+#define RC_INETADDR 0x20
+#define RC_NAM 0x40
+
+#endif
diff --git a/sys/nfs/nfsv2.h b/sys/nfs/nfsv2.h
new file mode 100644
index 0000000..6f8f85e
--- /dev/null
+++ b/sys/nfs/nfsv2.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsv2.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#include <nfs/nfsproto.h>
diff --git a/sys/nfs/nqnfs.h b/sys/nfs/nqnfs.h
new file mode 100644
index 0000000..2ba78ae
--- /dev/null
+++ b/sys/nfs/nqnfs.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nqnfs.h 8.3 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_NQNFS_H_
+#define _NFS_NQNFS_H_
+
+/*
+ * Definitions for NQNFS (Not Quite NFS) cache consistency protocol.
+ */
+
+/* Tunable constants */
+#define NQ_CLOCKSKEW 3 /* Clock skew factor (sec) */
+#define NQ_WRITESLACK 5 /* Delay for write cache flushing */
+#define NQ_MAXLEASE 60 /* Max lease duration (sec) */
+#define NQ_MINLEASE 5 /* Min lease duration (sec) */
+#define NQ_DEFLEASE 30 /* Default lease duration (sec) */
+#define NQ_RENEWAL 3 /* Time before expiry (sec) to renew */
+#define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */
+#define NQ_MAXNUMLEASE 2048 /* Upper bound on number of server leases */
+#define NQ_DEADTHRESH NQ_NEVERDEAD /* Default nm_deadthresh */
+#define NQ_NEVERDEAD 9 /* Greater than max. nm_timeouts */
+#define NQLCHSZ 256 /* Server hash table size */
+
+#define NQNFS_PROG 300105 /* As assigned by Sun */
+#define NQNFS_VER3 3
+#define NQNFS_EVICTSIZ 156 /* Size of eviction request in bytes */
+
+#if defined(KERNEL) || defined(_KERNEL)
+/*
+ * Definitions used for saving the "last lease expires" time in Non-volatile
+ * RAM on the server. The default definitions below assume that NOVRAM is not
+ * available.
+ */
+#ifdef HASNVRAM
+# undef HASNVRAM
+#endif
+#define NQSTORENOVRAM(t)
+#define NQLOADNOVRAM(t)
+
+/*
+ * Defn and structs used on the server to maintain state for current leases.
+ * The list of host(s) that hold the lease are kept as nqhost structures.
+ * The first one lives in nqlease and any others are held in a linked
+ * list of nqm structures hanging off of nqlease.
+ *
+ * Each nqlease structure is chained into two lists. The first is a list
+ * ordered by increasing expiry time for nqsrv_timer() and the second is a chain
+ * hashed on lc_fh.
+ */
+#define LC_MOREHOSTSIZ 10
+
+struct nqhost {
+ union {
+ struct {
+ u_int16_t udp_flag;
+ u_int16_t udp_port;
+ union nethostaddr udp_haddr;
+ } un_udp;
+ struct {
+ u_int16_t connless_flag;
+ u_int16_t connless_spare;
+ union nethostaddr connless_haddr;
+ } un_connless;
+ struct {
+ u_int16_t conn_flag;
+ u_int16_t conn_spare;
+ struct nfssvc_sock *conn_slp;
+ } un_conn;
+ } lph_un;
+};
+#define lph_flag lph_un.un_udp.udp_flag
+#define lph_port lph_un.un_udp.udp_port
+#define lph_haddr lph_un.un_udp.udp_haddr
+#define lph_inetaddr lph_un.un_udp.udp_haddr.had_inetaddr
+#define lph_claddr lph_un.un_connless.connless_haddr
+#define lph_nam lph_un.un_connless.connless_haddr.had_nam
+#define lph_slp lph_un.un_conn.conn_slp
+
+struct nqlease {
+ LIST_ENTRY(nqlease) lc_hash; /* Fhandle hash list */
+ CIRCLEQ_ENTRY(nqlease) lc_timer; /* Timer queue list */
+ time_t lc_expiry; /* Expiry time (sec) */
+ struct nqhost lc_host; /* Host that got lease */
+ struct nqm *lc_morehosts; /* Other hosts that share read lease */
+ fsid_t lc_fsid; /* Fhandle */
+ char lc_fiddata[MAXFIDSZ];
+ struct vnode *lc_vp; /* Soft reference to associated vnode */
+};
+#define lc_flag lc_host.lph_un.un_udp.udp_flag
+
+/* lc_flag bits */
+#define LC_VALID 0x0001 /* Host address valid */
+#define LC_WRITE 0x0002 /* Write cache */
+#define LC_NONCACHABLE 0x0004 /* Non-cachable lease */
+#define LC_LOCKED 0x0008 /* Locked */
+#define LC_WANTED 0x0010 /* Lock wanted */
+#define LC_EXPIREDWANTED 0x0020 /* Want lease when expired */
+#define LC_UDP 0x0040 /* Host address for udp socket */
+#define LC_CLTP 0x0080 /* Host address for other connectionless */
+#define LC_LOCAL 0x0100 /* Host is server */
+#define LC_VACATED 0x0200 /* Host has vacated lease */
+#define LC_WRITTEN 0x0400 /* Recently wrote to the leased file */
+#define LC_SREF 0x0800 /* Holds a nfssvc_sock reference */
+
+struct nqm {
+ struct nqm *lpm_next;
+ struct nqhost lpm_hosts[LC_MOREHOSTSIZ];
+};
+
+/*
+ * Special value for slp for local server calls.
+ */
+#define NQLOCALSLP ((struct nfssvc_sock *) -1)
+
+/*
+ * Server side macros.
+ */
+#define nqsrv_getl(v, l) \
+ (void) nqsrv_getlease((v), &nfsd->nd_duration, \
+ ((nfsd->nd_flag & ND_LEASE) ? (nfsd->nd_flag & ND_LEASE) : \
+ ((l) | ND_CHECK)), \
+ slp, procp, nfsd->nd_nam, &cache, &frev, cred)
+
+/*
+ * Client side macros that check for a valid lease.
+ */
+#define NQNFS_CKINVALID(v, n, f) \
+ ((time_second > (n)->n_expiry && \
+ VFSTONFS((v)->v_mount)->nm_timeouts < VFSTONFS((v)->v_mount)->nm_deadthresh) \
+ || ((f) == ND_WRITE && ((n)->n_flag & NQNFSWRITE) == 0))
+
+#define NQNFS_CKCACHABLE(v, f) \
+ ((time_second <= VTONFS(v)->n_expiry || \
+ VFSTONFS((v)->v_mount)->nm_timeouts >= VFSTONFS((v)->v_mount)->nm_deadthresh) \
+ && (VTONFS(v)->n_flag & NQNFSNONCACHE) == 0 && \
+ ((f) == ND_READ || (VTONFS(v)->n_flag & NQNFSWRITE)))
+
+#define NQNFS_NEEDLEASE(v, p) \
+ (time_second > VTONFS(v)->n_expiry ? \
+ ((VTONFS(v)->n_flag & NQNFSEVICTED) ? 0 : nqnfs_piggy[p]) : \
+ (((time_second + NQ_RENEWAL) > VTONFS(v)->n_expiry && \
+ nqnfs_piggy[p]) ? \
+ ((VTONFS(v)->n_flag & NQNFSWRITE) ? \
+ ND_WRITE : nqnfs_piggy[p]) : 0))
+
+/*
+ * List head for timer queue.
+ */
+extern CIRCLEQ_HEAD(nqtimerhead, nqlease) nqtimerhead;
+
+/*
+ * List head for the file handle hash table.
+ */
+#define NQFHHASH(f) \
+ (&nqfhhashtbl[(*((u_int32_t *)(f))) & nqfhhash])
+extern LIST_HEAD(nqfhhashhead, nqlease) *nqfhhashtbl;
+extern u_long nqfhhash;
+
+/*
+ * Nqnfs return status numbers.
+ */
+#define NQNFS_EXPIRED 500
+#define NQNFS_TRYLATER 501
+
+void nqnfs_lease_check __P((struct vnode *, struct proc *, struct ucred *, int));
+void nqnfs_lease_updatetime __P((int));
+int nqsrv_getlease __P((struct vnode *, u_int32_t *, int,
+ struct nfssvc_sock *, struct proc *,
+ struct sockaddr *, int *, u_quad_t *,
+ struct ucred *));
+int nqnfs_getlease __P((struct vnode *,int,struct ucred *,struct proc *));
+int nqnfs_callback __P((struct nfsmount *,struct mbuf *,struct mbuf *,caddr_t));
+int nqnfs_clientd __P((struct nfsmount *,struct ucred *,struct nfsd_cargs *,int,caddr_t,struct proc *));
+struct nfsnode;
+void nqnfs_clientlease __P((struct nfsmount *, struct nfsnode *, int, int, time_t, u_quad_t));
+void nqnfs_serverd __P((void));
+int nqnfsrv_getlease __P((struct nfsrv_descript *, struct nfssvc_sock *, struct proc *, struct mbuf **));
+int nqnfsrv_vacated __P((struct nfsrv_descript *, struct nfssvc_sock *, struct proc *, struct mbuf **));
+#endif
+
+#endif
diff --git a/sys/nfs/rpcv2.h b/sys/nfs/rpcv2.h
new file mode 100644
index 0000000..525df09
--- /dev/null
+++ b/sys/nfs/rpcv2.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)rpcv2.h 8.2 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_RPCV2_H_
+#define _NFS_RPCV2_H_
+
+/*
+ * Definitions for Sun RPC Version 2, from
+ * "RPC: Remote Procedure Call Protocol Specification" RFC1057
+ */
+
+/* Version # */
+#define RPC_VER2 2
+
+/* Authentication */
+#define RPCAUTH_NULL 0
+#define RPCAUTH_UNIX 1
+#define RPCAUTH_SHORT 2
+#define RPCAUTH_KERB4 4
+#define RPCAUTH_NQNFS 300000
+#define RPCAUTH_MAXSIZ 400
+#define RPCVERF_MAXSIZ 12 /* For Kerb, can actually be 400 */
+#define RPCAUTH_UNIXGIDS 16
+
+/*
+ * Constants associated with authentication flavours.
+ */
+#define RPCAKN_FULLNAME 0
+#define RPCAKN_NICKNAME 1
+
+/* Rpc Constants */
+#define RPC_CALL 0
+#define RPC_REPLY 1
+#define RPC_MSGACCEPTED 0
+#define RPC_MSGDENIED 1
+#define RPC_PROGUNAVAIL 1
+#define RPC_PROGMISMATCH 2
+#define RPC_PROCUNAVAIL 3
+#define RPC_GARBAGE 4 /* I like this one */
+#define RPC_MISMATCH 0
+#define RPC_AUTHERR 1
+
+/* Authentication failures */
+#define AUTH_BADCRED 1
+#define AUTH_REJECTCRED 2
+#define AUTH_BADVERF 3
+#define AUTH_REJECTVERF 4
+#define AUTH_TOOWEAK 5 /* Give em wheaties */
+
+/* Sizes of rpc header parts */
+#define RPC_SIZ 24
+#define RPC_REPLYSIZ 28
+
+/* RPC Prog definitions */
+#define RPCPROG_MNT 100005
+#define RPCMNT_VER1 1
+#define RPCMNT_VER3 3
+#define RPCMNT_MOUNT 1
+#define RPCMNT_DUMP 2
+#define RPCMNT_UMOUNT 3
+#define RPCMNT_UMNTALL 4
+#define RPCMNT_EXPORT 5
+#define RPCMNT_NAMELEN 255
+#define RPCMNT_PATHLEN 1024
+#define RPCPROG_NFS 100003
+
+/*
+ * Structures used for RPCAUTH_KERB4.
+ */
+struct nfsrpc_fullverf {
+ u_int32_t t1;
+ u_int32_t t2;
+ u_int32_t w2;
+};
+
+struct nfsrpc_fullblock {
+ u_int32_t t1;
+ u_int32_t t2;
+ u_int32_t w1;
+ u_int32_t w2;
+};
+
+struct nfsrpc_nickverf {
+ u_int32_t kind;
+ struct nfsrpc_fullverf verf;
+};
+
+/*
+ * and their sizes in bytes.. If sizeof (struct nfsrpc_xx) != these
+ * constants, well then things will break in mount_nfs and nfsd.
+ */
+#define RPCX_FULLVERF 12
+#define RPCX_FULLBLOCK 16
+#define RPCX_NICKVERF 16
+
+#ifdef NFSKERB
+XXX
+#else
+typedef u_char NFSKERBKEY_T[2];
+typedef u_char NFSKERBKEYSCHED_T[2];
+#endif
+#define NFS_KERBSRV "rcmd" /* Kerberos Service for NFS */
+#define NFS_KERBTTL (30 * 60) /* Credential ttl (sec) */
+#define NFS_KERBCLOCKSKEW (5 * 60) /* Clock skew (sec) */
+#define NFS_KERBW1(t) (*((u_long *)(&((t).dat[((t).length + 3) & ~0x3]))))
+#endif
diff --git a/sys/nfs/xdr_subs.h b/sys/nfs/xdr_subs.h
new file mode 100644
index 0000000..82ee38a
--- /dev/null
+++ b/sys/nfs/xdr_subs.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)xdr_subs.h 8.3 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+
+#ifndef _NFS_XDR_SUBS_H_
+#define _NFS_XDR_SUBS_H_
+
+/*
+ * Macros used for conversion to/from xdr representation by nfs...
+ * These use the MACHINE DEPENDENT routines ntohl, htonl
+ * As defined by "XDR: External Data Representation Standard" RFC1014
+ *
+ * To simplify the implementation, we use ntohl/htonl even on big-endian
+ * machines, and count on them being `#define'd away. Some of these
+ * might be slightly more efficient as quad_t copies on a big-endian,
+ * but we cannot count on their alignment anyway.
+ */
+
+#define fxdr_unsigned(t, v) ((t)ntohl((int32_t)(v)))
+#define txdr_unsigned(v) (htonl((int32_t)(v)))
+
+#define fxdr_nfsv2time(f, t) \
+do { \
+ (t)->tv_sec = ntohl(((struct nfsv2_time *)(f))->nfsv2_sec); \
+ if (((struct nfsv2_time *)(f))->nfsv2_usec != 0xffffffff) \
+ (t)->tv_nsec = 1000 * ntohl(((struct nfsv2_time *)(f))->nfsv2_usec); \
+ else \
+ (t)->tv_nsec = 0; \
+} while (0)
+#define txdr_nfsv2time(f, t) \
+do { \
+ ((struct nfsv2_time *)(t))->nfsv2_sec = htonl((f)->tv_sec); \
+ if ((f)->tv_nsec != -1) \
+ ((struct nfsv2_time *)(t))->nfsv2_usec = htonl((f)->tv_nsec / 1000); \
+ else \
+ ((struct nfsv2_time *)(t))->nfsv2_usec = 0xffffffff; \
+} while (0)
+
+#define fxdr_nfsv3time(f, t) \
+do { \
+ (t)->tv_sec = ntohl(((struct nfsv3_time *)(f))->nfsv3_sec); \
+ (t)->tv_nsec = ntohl(((struct nfsv3_time *)(f))->nfsv3_nsec); \
+} while (0)
+#define txdr_nfsv3time(f, t) \
+do { \
+ ((struct nfsv3_time *)(t))->nfsv3_sec = htonl((f)->tv_sec); \
+ ((struct nfsv3_time *)(t))->nfsv3_nsec = htonl((f)->tv_nsec); \
+} while (0)
+
+#define fxdr_hyper(f) \
+ ((((u_quad_t)ntohl(((u_int32_t *)(f))[0])) << 32) | \
+ (u_quad_t)(ntohl(((u_int32_t *)(f))[1])))
+#define txdr_hyper(f, t) \
+do { \
+ ((u_int32_t *)(t))[0] = htonl((u_int32_t)((f) >> 32)); \
+ ((u_int32_t *)(t))[1] = htonl((u_int32_t)((f) & 0xffffffff)); \
+} while (0)
+
+#endif
OpenPOWER on IntegriCloud