diff options
Diffstat (limited to 'sys/nfs')
-rw-r--r-- | sys/nfs/bootp_subr.c | 1257 | ||||
-rw-r--r-- | sys/nfs/krpc.h | 31 | ||||
-rw-r--r-- | sys/nfs/krpc_subr.c | 481 | ||||
-rw-r--r-- | sys/nfs/nfs.h | 739 | ||||
-rw-r--r-- | sys/nfs/nfs_bio.c | 1555 | ||||
-rw-r--r-- | sys/nfs/nfs_common.c | 2272 | ||||
-rw-r--r-- | sys/nfs/nfs_common.h | 564 | ||||
-rw-r--r-- | sys/nfs/nfs_node.c | 390 | ||||
-rw-r--r-- | sys/nfs/nfs_nqlease.c | 1307 | ||||
-rw-r--r-- | sys/nfs/nfs_serv.c | 4068 | ||||
-rw-r--r-- | sys/nfs/nfs_socket.c | 2284 | ||||
-rw-r--r-- | sys/nfs/nfs_srvcache.c | 356 | ||||
-rw-r--r-- | sys/nfs/nfs_subs.c | 2272 | ||||
-rw-r--r-- | sys/nfs/nfs_syscalls.c | 1199 | ||||
-rw-r--r-- | sys/nfs/nfs_vfsops.c | 1078 | ||||
-rw-r--r-- | sys/nfs/nfs_vnops.c | 3354 | ||||
-rw-r--r-- | sys/nfs/nfsdiskless.h | 124 | ||||
-rw-r--r-- | sys/nfs/nfsm_subs.h | 564 | ||||
-rw-r--r-- | sys/nfs/nfsmount.h | 106 | ||||
-rw-r--r-- | sys/nfs/nfsnode.h | 210 | ||||
-rw-r--r-- | sys/nfs/nfsproto.h | 439 | ||||
-rw-r--r-- | sys/nfs/nfsrtt.h | 104 | ||||
-rw-r--r-- | sys/nfs/nfsrvcache.h | 91 | ||||
-rw-r--r-- | sys/nfs/nfsv2.h | 40 | ||||
-rw-r--r-- | sys/nfs/nqnfs.h | 218 | ||||
-rw-r--r-- | sys/nfs/rpcv2.h | 143 | ||||
-rw-r--r-- | sys/nfs/xdr_subs.h | 95 |
27 files changed, 25341 insertions, 0 deletions
diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c new file mode 100644 index 0000000..3295248 --- /dev/null +++ b/sys/nfs/bootp_subr.c @@ -0,0 +1,1257 @@ +/* $FreeBSD$ */ + +/* + * Copyright (c) 1995 Gordon Ross, Adam Glass + * Copyright (c) 1992 Regents of the University of California. + * All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * based on: + * nfs/krpc_subr.c + * $NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $ + */ + +#include "opt_bootp.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/sockio.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/uio.h> + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <net/if_types.h> +#include <net/if_dl.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsdiskless.h> +#include <nfs/krpc.h> +#include <nfs/xdr_subs.h> + + +#define BOOTP_MIN_LEN 300 /* Minimum size of bootp udp packet */ + +/* + * What is the longest we will wait before re-sending a request? + * Note this is also the frequency of "RPC timeout" messages. + * The re-send loop count sup linearly to this maximum, so the + * first complaint will happen after (1+2+3+4+5)=15 seconds. + */ +#define MAX_RESEND_DELAY 5 /* seconds */ + +/* Definitions from RFC951 */ +struct bootp_packet { + u_int8_t op; + u_int8_t htype; + u_int8_t hlen; + u_int8_t hops; + u_int32_t xid; + u_int16_t secs; + u_int16_t flags; + struct in_addr ciaddr; + struct in_addr yiaddr; + struct in_addr siaddr; + struct in_addr giaddr; + unsigned char chaddr[16]; + char sname[64]; + char file[128]; + unsigned char vend[256]; +}; + +#define IPPORT_BOOTPC 68 +#define IPPORT_BOOTPS 67 + +extern int nfs_diskless_valid; +extern struct nfsv3_diskless nfsv3_diskless; + +/* mountd RPC */ +static int md_mount __P((struct sockaddr_in *mdsin, char *path, + u_char *fhp, int *fhsizep, struct nfs_args *args,struct proc *procp)); +static int md_lookup_swap __P((struct sockaddr_in *mdsin,char *path, + u_char *fhp, int *fhsizep, + struct nfs_args *args, + struct proc *procp)); +static int setfs __P((struct sockaddr_in *addr, char *path, char *p)); +static int getdec __P((char **ptr)); +static char *substr __P((char *a,char *b)); +static void mountopts __P((struct nfs_args *args, char *p)); +static int xdr_opaque_decode __P((struct mbuf **ptr,u_char *buf, + int len)); +static int xdr_int_decode __P((struct mbuf **ptr,int *iptr)); +static void printip __P((char *prefix,struct in_addr addr)); + +#ifdef BOOTP_DEBUG +void bootpboot_p_sa(struct sockaddr *sa,struct sockaddr *ma); +void bootpboot_p_ma(struct sockaddr *ma); +void bootpboot_p_rtentry(struct rtentry *rt); +void bootpboot_p_tree(struct radix_node *rn); +void bootpboot_p_rtlist(void); +void bootpboot_p_iflist(void); +#endif + +static int bootpc_call(struct bootp_packet *call, + struct bootp_packet *reply, + struct proc *procp); + +static int bootpc_fakeup_interface(struct ifreq *ireq, + struct socket *so, + struct proc *procp); + +static int +bootpc_adjust_interface(struct ifreq *ireq,struct socket *so, + struct sockaddr_in *myaddr, + struct sockaddr_in *netmask, + struct sockaddr_in *gw, + struct proc *procp); + +void bootpc_init(void); + +#ifdef BOOTP_DEBUG +void bootpboot_p_sa(sa,ma) + struct sockaddr *sa; + struct sockaddr *ma; +{ + if (!sa) { + printf("(sockaddr *) <null>"); + return; + } + switch (sa->sa_family) { + case AF_INET: + { + struct sockaddr_in *sin = (struct sockaddr_in *) sa; + printf("inet %x",ntohl(sin->sin_addr.s_addr)); + if (ma) { + struct sockaddr_in *sin = (struct sockaddr_in *) ma; + printf(" mask %x",ntohl(sin->sin_addr.s_addr)); + } + } + break; + case AF_LINK: + { + struct sockaddr_dl *sli = (struct sockaddr_dl *) sa; + int i; + printf("link %.*s ",sli->sdl_nlen,sli->sdl_data); + for (i=0;i<sli->sdl_alen;i++) { + if (i>0) + printf(":"); + printf("%x",(unsigned char) sli->sdl_data[i+sli->sdl_nlen]); + } + } + break; + default: + printf("af%d",sa->sa_family); + } +} + +void bootpboot_p_ma(ma) + struct sockaddr *ma; +{ + if (!ma) { + printf("<null>"); + return; + } + printf("%x",*(int*)ma); +} + +void bootpboot_p_rtentry(rt) + struct rtentry *rt; +{ + bootpboot_p_sa(rt_key(rt),rt_mask(rt)); + printf(" "); + bootpboot_p_ma(rt->rt_genmask); + printf(" "); + bootpboot_p_sa(rt->rt_gateway,NULL); + printf(" "); + printf("flags %x",(unsigned short) rt->rt_flags); + printf(" %d",rt->rt_rmx.rmx_expire); + printf(" %s%d\n",rt->rt_ifp->if_name,rt->rt_ifp->if_unit); +} +void bootpboot_p_tree(rn) + struct radix_node *rn; +{ + while (rn) { + if (rn->rn_b < 0) { + if (rn->rn_flags & RNF_ROOT) { + } else { + bootpboot_p_rtentry((struct rtentry *) rn); + } + rn = rn->rn_dupedkey; + } else { + bootpboot_p_tree(rn->rn_l); + bootpboot_p_tree(rn->rn_r); + return; + } + + } +} + +void bootpboot_p_rtlist(void) +{ + printf("Routing table:\n"); + bootpboot_p_tree(rt_tables[AF_INET]->rnh_treetop); +} + +void bootpboot_p_iflist(void) +{ + struct ifnet *ifp; + struct ifaddr *ifa; + printf("Interface list:\n"); + for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link)) + { + for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa; + ifa=TAILQ_NEXT(ifa,ifa_link)) + if (ifa->ifa_addr->sa_family == AF_INET ) { + printf("%s%d flags %x, addr %x, bcast %x, net %x\n", + ifp->if_name,ifp->if_unit, + (unsigned short) ifp->if_flags, + ntohl(((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr), + ntohl(((struct sockaddr_in *) ifa->ifa_dstaddr)->sin_addr.s_addr), + ntohl(((struct sockaddr_in *) ifa->ifa_netmask)->sin_addr.s_addr) + ); + } + } +} +#endif + +static int +bootpc_call(call,reply,procp) + struct bootp_packet *call; + struct bootp_packet *reply; /* output */ + struct proc *procp; +{ + struct socket *so; + struct sockaddr_in *sin, sa; + struct uio auio; + struct sockopt sopt; + struct iovec aio; + struct timeval tv; + int error, on, len, rcvflg, secs, timo; + + /* + * Create socket and set its recieve timeout. + */ + if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp))) + goto out; + + tv.tv_sec = 1; + tv.tv_usec = 0; + bzero(&sopt, sizeof sopt); + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_RCVTIMEO; + sopt.sopt_val = &tv; + sopt.sopt_valsize = sizeof tv; + + if ((error = sosetopt(so, &sopt)) != 0) + goto out; + + /* + * Enable broadcast. + */ + on = 1; + sopt.sopt_val = &on; + sopt.sopt_valsize = sizeof on; + sopt.sopt_name = SO_BROADCAST; + if ((error = sosetopt(so, &sopt)) != 0) + goto out; + + /* + * Bind the local endpoint to a bootp client port. + */ + sin = &sa; + bzero(sin, sizeof *sin); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + sin->sin_port = htons(IPPORT_BOOTPC); + error = sobind(so, (struct sockaddr *)sin, procp); + if (error) { + printf("bind failed\n"); + goto out; + } + + /* + * Setup socket address for the server. + */ + sin = &sa; + bzero(sin, sizeof *sin); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_BROADCAST; + sin->sin_port = htons(IPPORT_BOOTPS); + + /* + * Send it, repeatedly, until a reply is received, + * but delay each re-send by an increasing amount. + * If the delay hits the maximum, start complaining. + */ + timo = 0; + for (;;) { + /* Send BOOTP request (or re-send). */ + + aio.iov_base = (caddr_t) call; + aio.iov_len = sizeof(*call); + + auio.uio_iov = &aio; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_offset = 0; + auio.uio_resid = sizeof(*call); + auio.uio_procp = procp; + + error = sosend(so, (struct sockaddr *)sin, &auio, NULL, + NULL, 0, procp); + if (error) { + printf("bootpc_call: sosend: %d state %08x\n", error, (int)so->so_state); + goto out; + } + + /* Determine new timeout. */ + if (timo < MAX_RESEND_DELAY) + timo++; + else + printf("BOOTP timeout for server 0x%lx\n", + (u_long)ntohl(sin->sin_addr.s_addr)); + + /* + * Wait for up to timo seconds for a reply. + * The socket receive timeout was set to 1 second. + */ + secs = timo; + while (secs > 0) { + aio.iov_base = (caddr_t) reply; + aio.iov_len = sizeof(*reply); + + auio.uio_iov = &aio; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_offset = 0; + auio.uio_resid = sizeof(*reply); + auio.uio_procp = procp; + + rcvflg = 0; + error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg); + if (error == EWOULDBLOCK) { + secs--; + call->secs=htons(ntohs(call->secs)+1); + continue; + } + if (error) + goto out; + len = sizeof(*reply) - auio.uio_resid; + + /* Do we have the required number of bytes ? */ + if (len < BOOTP_MIN_LEN) + continue; + + /* Is it the right reply? */ + if (reply->op != 2) + continue; + + if (reply->xid != call->xid) + continue; + + if (reply->hlen != call->hlen) + continue; + + if (bcmp(reply->chaddr,call->chaddr,call->hlen)) + continue; + + goto gotreply; /* break two levels */ + + } /* while secs */ + } /* forever send/receive */ + + error = ETIMEDOUT; + goto out; + + gotreply: + out: + soclose(so); + return error; +} + +static int +bootpc_fakeup_interface(struct ifreq *ireq,struct socket *so, + struct proc *procp) +{ + struct sockaddr_in *sin; + int error; + struct sockaddr_in dst; + struct sockaddr_in gw; + struct sockaddr_in mask; + + /* + * Bring up the interface. + * + * Get the old interface flags and or IFF_UP into them; if + * IFF_UP set blindly, interface selection can be clobbered. + */ + error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, procp); + if (error) + panic("bootpc_fakeup_interface: GIFFLAGS, error=%d", error); + ireq->ifr_flags |= IFF_UP; + error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, procp); + if (error) + panic("bootpc_fakeup_interface: SIFFLAGS, error=%d", error); + + /* + * Do enough of ifconfig(8) so that the chosen interface + * can talk to the servers. (just set the address) + */ + + /* addr is 0.0.0.0 */ + + sin = (struct sockaddr_in *)&ireq->ifr_addr; + bzero((caddr_t)sin, sizeof(*sin)); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp); + if (error) + panic("bootpc_fakeup_interface: set if addr, error=%d", error); + + /* netmask is 0.0.0.0 */ + + sin = (struct sockaddr_in *)&ireq->ifr_addr; + bzero((caddr_t)sin, sizeof(*sin)); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp); + if (error) + panic("bootpc_fakeup_interface: set if net addr, error=%d", error); + + /* Broadcast is 255.255.255.255 */ + + sin = (struct sockaddr_in *)&ireq->ifr_addr; + bzero((caddr_t)sin, sizeof(*sin)); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_BROADCAST; + error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp); + if (error) + panic("bootpc_fakeup_interface: set if broadcast addr, error=%d", error); + + /* Add default route to 0.0.0.0 so we can send data */ + + bzero((caddr_t) &dst, sizeof(dst)); + dst.sin_len=sizeof(dst); + dst.sin_family=AF_INET; + dst.sin_addr.s_addr = htonl(0); + + bzero((caddr_t) &gw, sizeof(gw)); + gw.sin_len=sizeof(gw); + gw.sin_family=AF_INET; + gw.sin_addr.s_addr = htonl(0x0); + + bzero((caddr_t) &mask, sizeof(mask)); + mask.sin_len=sizeof(mask); + mask.sin_family=AF_INET; + mask.sin_addr.s_addr = htonl(0); + + error = rtrequest(RTM_ADD, + (struct sockaddr *) &dst, + (struct sockaddr *) &gw, + (struct sockaddr *) &mask, + RTF_UP | RTF_STATIC + , NULL); + if (error) + printf("bootpc_fakeup_interface: add default route, error=%d\n", error); + return error; +} + +static int +bootpc_adjust_interface(struct ifreq *ireq,struct socket *so, + struct sockaddr_in *myaddr, + struct sockaddr_in *netmask, + struct sockaddr_in *gw, + struct proc *procp) +{ + int error; + struct sockaddr_in oldgw; + struct sockaddr_in olddst; + struct sockaddr_in oldmask; + struct sockaddr_in *sin; + + /* Remove old default route to 0.0.0.0 */ + + bzero((caddr_t) &olddst, sizeof(olddst)); + olddst.sin_len=sizeof(olddst); + olddst.sin_family=AF_INET; + olddst.sin_addr.s_addr = INADDR_ANY; + + bzero((caddr_t) &oldgw, sizeof(oldgw)); + oldgw.sin_len=sizeof(oldgw); + oldgw.sin_family=AF_INET; + oldgw.sin_addr.s_addr = INADDR_ANY; + + bzero((caddr_t) &oldmask, sizeof(oldmask)); + oldmask.sin_len=sizeof(oldmask); + oldmask.sin_family=AF_INET; + oldmask.sin_addr.s_addr = INADDR_ANY; + + error = rtrequest(RTM_DELETE, + (struct sockaddr *) &olddst, + (struct sockaddr *) &oldgw, + (struct sockaddr *) &oldmask, + (RTF_UP | RTF_STATIC), NULL); + if (error) { + printf("nfs_boot: del default route, error=%d\n", error); + return error; + } + + /* + * Do enough of ifconfig(8) so that the chosen interface + * can talk to the servers. (just set the address) + */ + bcopy(netmask,&ireq->ifr_addr,sizeof(*netmask)); + error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp); + if (error) + panic("nfs_boot: set if netmask, error=%d", error); + + /* Broadcast is with host part of IP address all 1's */ + + sin = (struct sockaddr_in *)&ireq->ifr_addr; + bzero((caddr_t)sin, sizeof(*sin)); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = myaddr->sin_addr.s_addr | ~ netmask->sin_addr.s_addr; + error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp); + if (error) + panic("bootpc_call: set if broadcast addr, error=%d", error); + + bcopy(myaddr,&ireq->ifr_addr,sizeof(*myaddr)); + error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp); + if (error) + panic("nfs_boot: set if addr, error=%d", error); + + /* Add new default route */ + + error = rtrequest(RTM_ADD, + (struct sockaddr *) &olddst, + (struct sockaddr *) gw, + (struct sockaddr *) &oldmask, + (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL); + if (error) { + printf("nfs_boot: add net route, error=%d\n", error); + return error; + } + + return 0; +} + +static int setfs(addr, path, p) + struct sockaddr_in *addr; + char *path; + char *p; +{ + unsigned ip = 0; + int val; + + if (((val = getdec(&p)) < 0) || (val > 255)) return(0); + ip = val << 24; + if (*p != '.') return(0); + p++; + if (((val = getdec(&p)) < 0) || (val > 255)) return(0); + ip |= (val << 16); + if (*p != '.') return(0); + p++; + if (((val = getdec(&p)) < 0) || (val > 255)) return(0); + ip |= (val << 8); + if (*p != '.') return(0); + p++; + if (((val = getdec(&p)) < 0) || (val > 255)) return(0); + ip |= val; + if (*p != ':') return(0); + p++; + + addr->sin_addr.s_addr = htonl(ip); + addr->sin_len = sizeof(struct sockaddr_in); + addr->sin_family = AF_INET; + + strncpy(path,p,MNAMELEN-1); + return(1); +} + +static int getdec(ptr) + char **ptr; +{ + char *p = *ptr; + int ret=0; + if ((*p < '0') || (*p > '9')) return(-1); + while ((*p >= '0') && (*p <= '9')) { + ret = ret*10 + (*p - '0'); + p++; + } + *ptr = p; + return(ret); +} + +static char *substr(a,b) + char *a,*b; +{ + char *loc1; + char *loc2; + + while (*a != '\0') { + loc1 = a; + loc2 = b; + while (*loc1 == *loc2++) { + if (*loc1 == '\0') return (0); + loc1++; + if (*loc2 == '\0') return (loc1); + } + a++; + } + return (0); +} + +static void mountopts(args,p) + struct nfs_args *args; + char *p; +{ + char *tmp; + + args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT; + args->sotype = SOCK_DGRAM; + if ((tmp = (char *)substr(p,"rsize="))) + args->rsize=getdec(&tmp); + if ((tmp = (char *)substr(p,"wsize="))) + args->wsize=getdec(&tmp); + if ((tmp = (char *)substr(p,"intr"))) + args->flags |= NFSMNT_INT; + if ((tmp = (char *)substr(p,"soft"))) + args->flags |= NFSMNT_SOFT; + if ((tmp = (char *)substr(p,"noconn"))) + args->flags |= NFSMNT_NOCONN; + if ((tmp = (char *)substr(p, "tcp"))) + args->sotype = SOCK_STREAM; +} + +static int xdr_opaque_decode(mptr,buf,len) + struct mbuf **mptr; + u_char *buf; + int len; +{ + struct mbuf *m; + int alignedlen; + + m = *mptr; + alignedlen = ( len + 3 ) & ~3; + + if (m->m_len < alignedlen) { + m = m_pullup(m,alignedlen); + if (m == NULL) { + *mptr = NULL; + return EBADRPC; + } + } + bcopy(mtod(m,u_char *),buf,len); + m_adj(m,alignedlen); + *mptr = m; + return 0; +} + +static int xdr_int_decode(mptr,iptr) + struct mbuf **mptr; + int *iptr; +{ + u_int32_t i; + if (xdr_opaque_decode(mptr,(u_char *) &i,sizeof(u_int32_t))) + return EBADRPC; + *iptr = fxdr_unsigned(u_int32_t,i); + return 0; +} + +static void printip(char *prefix,struct in_addr addr) +{ + unsigned int ip; + + ip = ntohl(addr.s_addr); + + printf("%s is %d.%d.%d.%d\n",prefix, + ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 ); +} + +void +bootpc_init(void) +{ + struct bootp_packet call; + struct bootp_packet reply; + static u_int32_t xid = ~0xFF; + + struct ifreq ireq; + struct ifnet *ifp; + struct socket *so; + int error; + int code,ncode,len; + int j; + char *p; + unsigned int ip; + + struct sockaddr_in myaddr; + struct sockaddr_in netmask; + struct sockaddr_in gw; + int gotgw=0; + int gotnetmask=0; + int gotrootpath=0; + int gotswappath=0; + char lookup_path[24]; + +#define EALEN 6 + struct ifaddr *ifa; + struct sockaddr_dl *sdl = NULL; + char *delim; + + struct nfsv3_diskless *nd = &nfsv3_diskless; + struct proc *procp = curproc; + + /* + * If already filled in, don't touch it here + */ + if (nfs_diskless_valid) + return; + + /* + * Wait until arp entries can be handled. + */ + while (time_second == 0) + tsleep(&time_second, PZERO+8, "arpkludge", 10); + + /* + * Find a network interface. + */ +#ifdef BOOTP_WIRED_TO + printf("bootpc_init: wired to interface '%s'\n", + __XSTRING(BOOTP_WIRED_TO)); +#endif + bzero(&ireq, sizeof(ireq)); + for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link)) + { + snprintf(ireq.ifr_name, sizeof(ireq.ifr_name), + "%s%d", ifp->if_name, ifp->if_unit); +#ifdef BOOTP_WIRED_TO + if (strcmp(ireq.ifr_name, __XSTRING(BOOTP_WIRED_TO)) == 0) + break; +#else + if ((ifp->if_flags & + (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) + break; +#endif + } + if (ifp == NULL) + panic("bootpc_init: no suitable interface"); + strcpy(nd->myif.ifra_name,ireq.ifr_name); + printf("bootpc_init: using network interface '%s'\n", + ireq.ifr_name); + + if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp)) != 0) + panic("nfs_boot: socreate, error=%d", error); + + bootpc_fakeup_interface(&ireq,so,procp); + + printf("Bootpc testing starting\n"); + + /* Get HW address */ + + for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa; + ifa=TAILQ_NEXT(ifa,ifa_link)) + if (ifa->ifa_addr->sa_family == AF_LINK && + (sdl = ((struct sockaddr_dl *) ifa->ifa_addr)) && + sdl->sdl_type == IFT_ETHER) + break; + + if (!sdl) + panic("bootpc: Unable to find HW address"); + if (sdl->sdl_alen != EALEN ) + panic("bootpc: HW address len is %d, expected value is %d", + sdl->sdl_alen,EALEN); + + printf("bootpc hw address is "); + delim=""; + for (j=0;j<sdl->sdl_alen;j++) { + printf("%s%x",delim,((unsigned char *)LLADDR(sdl))[j]); + delim=":"; + } + printf("\n"); + +#if 0 + bootpboot_p_iflist(); + bootpboot_p_rtlist(); +#endif + + bzero((caddr_t) &call, sizeof(call)); + + /* bootpc part */ + call.op = 1; /* BOOTREQUEST */ + call.htype= 1; /* 10mb ethernet */ + call.hlen=sdl->sdl_alen; /* Hardware address length */ + call.hops=0; + xid++; + call.xid = txdr_unsigned(xid); + bcopy(LLADDR(sdl),&call.chaddr,sdl->sdl_alen); + + call.vend[0]=99; + call.vend[1]=130; + call.vend[2]=83; + call.vend[3]=99; + call.vend[4]=255; + + call.secs = 0; + call.flags = htons(0x8000); /* We need an broadcast answer */ + + error = bootpc_call(&call,&reply,procp); + + if (error) { +#ifdef BOOTP_NFSROOT + panic("BOOTP call failed"); +#endif + return; + } + + bzero(&myaddr,sizeof(myaddr)); + bzero(&netmask,sizeof(netmask)); + bzero(&gw,sizeof(gw)); + + myaddr.sin_len = sizeof(myaddr); + myaddr.sin_family = AF_INET; + + netmask.sin_len = sizeof(netmask); + netmask.sin_family = AF_INET; + + gw.sin_len = sizeof(gw); + gw.sin_family= AF_INET; + + nd->root_args.version = NFS_ARGSVERSION; + nd->root_args.rsize = 8192; + nd->root_args.wsize = 8192; + nd->root_args.sotype = SOCK_DGRAM; + nd->root_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT); + + nd->swap_saddr.sin_len = sizeof(gw); + nd->swap_saddr.sin_family = AF_INET; + + nd->swap_args.version = NFS_ARGSVERSION; + nd->swap_args.rsize = 8192; + nd->swap_args.wsize = 8192; + nd->swap_args.sotype = SOCK_DGRAM; + nd->swap_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT); + + myaddr.sin_addr = reply.yiaddr; + + ip = ntohl(myaddr.sin_addr.s_addr); + snprintf(lookup_path, sizeof(lookup_path), "swap.%d.%d.%d.%d", + ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 ); + + printip("My ip address",myaddr.sin_addr); + + printip("Server ip address",reply.siaddr); + + gw.sin_addr = reply.giaddr; + printip("Gateway ip address",reply.giaddr); + + if (reply.sname[0]) + printf("Server name is %s\n",reply.sname); + if (reply.file[0]) + printf("boot file is %s\n",reply.file); + if (reply.vend[0]==99 && reply.vend[1]==130 && + reply.vend[2]==83 && reply.vend[3]==99) { + j=4; + ncode = reply.vend[j]; + while (j<sizeof(reply.vend)) { + code = reply.vend[j] = ncode; + if (code==255) + break; + if (code==0) { + j++; + continue; + } + len = reply.vend[j+1]; + j+=2; + if (len+j>=sizeof(reply.vend)) { + printf("Truncated field"); + break; + } + ncode = reply.vend[j+len]; + reply.vend[j+len]='\0'; + p = &reply.vend[j]; + switch (code) { + case 1: + if (len!=4) + panic("bootpc: subnet mask len is %d",len); + bcopy(&reply.vend[j],&netmask.sin_addr,4); + gotnetmask=1; + printip("Subnet mask",netmask.sin_addr); + break; + case 6: /* Domain Name servers. Unused */ + case 16: /* Swap server IP address. unused */ + case 2: + /* Time offset */ + break; + case 3: + /* Routers */ + if (len % 4) + panic("bootpc: Router Len is %d",len); + if (len > 0) { + bcopy(&reply.vend[j],&gw.sin_addr,4); + printip("Router",gw.sin_addr); + gotgw=1; + } + break; + case 17: + if (setfs(&nd->root_saddr, nd->root_hostnam, p)) { + printf("rootfs is %s\n",p); + gotrootpath=1; + } else + panic("Failed to set rootfs to %s",p); + break; + case 12: + if (len>=MAXHOSTNAMELEN) + panic("bootpc: hostname >=%d bytes",MAXHOSTNAMELEN); + strncpy(nd->my_hostnam,&reply.vend[j],len); + nd->my_hostnam[len]=0; + strncpy(hostname,&reply.vend[j],len); + hostname[len]=0; + printf("Hostname is %s\n",hostname); + break; + case 128: + if (setfs(&nd->swap_saddr, nd->swap_hostnam, p)) { + gotswappath=1; + printf("swapfs is %s\n",p); + } else + panic("Failed to set swapfs to %s",p); + break; + case 129: + { + int swaplen; + if (len!=4) + panic("bootpc: Expected 4 bytes for swaplen, not %d bytes",len); + bcopy(&reply.vend[j],&swaplen,4); + nd->swap_nblks = ntohl(swaplen); + printf("bootpc: Swap size is %d KB\n",nd->swap_nblks); + } + break; + case 130: /* root mount options */ + mountopts(&nd->root_args,p); + break; + case 131: /* swap mount options */ + mountopts(&nd->swap_args,p); + break; + default: + printf("Ignoring field type %d\n",code); + } + j+=len; + } + } + + if (!gotswappath) + nd->swap_nblks = 0; +#ifdef BOOTP_NFSROOT + if (!gotrootpath) + panic("bootpc: No root path offered"); +#endif + + if (!gotnetmask) { + if (IN_CLASSA(ntohl(myaddr.sin_addr.s_addr))) + netmask.sin_addr.s_addr = htonl(IN_CLASSA_NET); + else if (IN_CLASSB(ntohl(myaddr.sin_addr.s_addr))) + netmask.sin_addr.s_addr = htonl(IN_CLASSB_NET); + else + netmask.sin_addr.s_addr = htonl(IN_CLASSC_NET); + } + if (!gotgw) { + /* Use proxyarp */ + gw.sin_addr.s_addr = myaddr.sin_addr.s_addr; + } + +#if 0 + bootpboot_p_iflist(); + bootpboot_p_rtlist(); +#endif + error = bootpc_adjust_interface(&ireq,so, + &myaddr,&netmask,&gw,procp); + + soclose(so); + +#if 0 + bootpboot_p_iflist(); + bootpboot_p_rtlist(); +#endif + + if (gotrootpath) { + + error = md_mount(&nd->root_saddr, nd->root_hostnam, + nd->root_fh, &nd->root_fhsize, + &nd->root_args,procp); + if (error) + panic("nfs_boot: mountd root, error=%d", error); + + if (gotswappath) { + + error = md_mount(&nd->swap_saddr, + nd->swap_hostnam, + nd->swap_fh, &nd->swap_fhsize,&nd->swap_args,procp); + if (error) + panic("nfs_boot: mountd swap, error=%d", error); + + error = md_lookup_swap(&nd->swap_saddr,lookup_path,nd->swap_fh, + &nd->swap_fhsize, &nd->swap_args,procp); + if (error) + panic("nfs_boot: lookup swap, error=%d", error); + } + nfs_diskless_valid = 3; + } + + + bcopy(&myaddr,&nd->myif.ifra_addr,sizeof(myaddr)); + bcopy(&myaddr,&nd->myif.ifra_broadaddr,sizeof(myaddr)); + ((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr = + myaddr.sin_addr.s_addr | ~ netmask.sin_addr.s_addr; + bcopy(&netmask,&nd->myif.ifra_mask,sizeof(netmask)); + +#if 0 + bootpboot_p_iflist(); + bootpboot_p_rtlist(); +#endif + return; +} + +/* + * RPC: mountd/mount + * Given a server pathname, get an NFS file handle. + * Also, sets sin->sin_port to the NFS service port. + */ +static int +md_mount(mdsin, path, fhp, fhsizep, args, procp) + struct sockaddr_in *mdsin; /* mountd server address */ + char *path; + u_char *fhp; + int *fhsizep; + struct nfs_args *args; + struct proc *procp; +{ + struct mbuf *m; + int error; + int authunixok; + int authcount; + int authver; + +#ifdef BOOTP_NFSV3 + /* First try NFS v3 */ + /* Get port number for MOUNTD. */ + error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3, + &mdsin->sin_port, procp); + if (!error) { + m = xdr_string_encode(path, strlen(path)); + + /* Do RPC to mountd. */ + error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER3, + RPCMNT_MOUNT, &m, NULL, curproc); + } + if (!error) { + args->flags |= NFSMNT_NFSV3; + } else { +#endif + /* Fallback to NFS v2 */ + + /* Get port number for MOUNTD. */ + error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1, + &mdsin->sin_port, procp); + if (error) return error; + + m = xdr_string_encode(path, strlen(path)); + + /* Do RPC to mountd. */ + error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1, + RPCMNT_MOUNT, &m, NULL, curproc); + if (error) + return error; /* message already freed */ + +#ifdef BOOTP_NFSV3 + } +#endif + + if (xdr_int_decode(&m,&error) || error) + goto bad; + + if (args->flags & NFSMNT_NFSV3) { + if (xdr_int_decode(&m,fhsizep) || + *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 ) + goto bad; + } else + *fhsizep = NFSX_V2FH; + + if (xdr_opaque_decode(&m,fhp,*fhsizep)) + goto bad; + + if (args->flags & NFSMNT_NFSV3) { + if (xdr_int_decode(&m,&authcount)) + goto bad; + authunixok = 0; + if (authcount<0 || authcount>100) + goto bad; + while (authcount>0) { + if (xdr_int_decode(&m,&authver)) + goto bad; + if (authver == RPCAUTH_UNIX) + authunixok = 1; + authcount--; + } + if (!authunixok) + goto bad; + } + + /* Set port number for NFS use. */ + error = krpc_portmap(mdsin, NFS_PROG, + (args->flags & NFSMNT_NFSV3)?NFS_VER3:NFS_VER2, + &mdsin->sin_port, procp); + + goto out; + +bad: + error = EBADRPC; + +out: + m_freem(m); + return error; +} + + +static int md_lookup_swap(mdsin, path, fhp, fhsizep, args, procp) + struct sockaddr_in *mdsin; /* mountd server address */ + char *path; + u_char *fhp; + int *fhsizep; + struct nfs_args *args; + struct proc *procp; +{ + struct mbuf *m; + int error; + int size = -1; + int attribs_present; + int status; + union { + u_int32_t v2[17]; + u_int32_t v3[21]; + } fattribs; + + m = m_get(M_WAIT,MT_DATA); + if (!m) + return ENOBUFS; + + if (args->flags & NFSMNT_NFSV3) { + *mtod(m,u_int32_t *) = txdr_unsigned(*fhsizep); + bcopy(fhp,mtod(m,u_char *)+sizeof(u_int32_t),*fhsizep); + m->m_len = *fhsizep + sizeof(u_int32_t); + } else { + bcopy(fhp,mtod(m,u_char *),NFSX_V2FH); + m->m_len = NFSX_V2FH; + } + + m->m_next = xdr_string_encode(path, strlen(path)); + if (!m->m_next) { + error = ENOBUFS; + goto out; + } + + /* Do RPC to nfsd. */ + if (args->flags & NFSMNT_NFSV3) + error = krpc_call(mdsin, NFS_PROG, NFS_VER3, + NFSPROC_LOOKUP, &m, NULL, procp); + else + error = krpc_call(mdsin, NFS_PROG, NFS_VER2, + NFSV2PROC_LOOKUP, &m, NULL, procp); + if (error) + return error; /* message already freed */ + + if (xdr_int_decode(&m,&status)) + goto bad; + if (status) { + error = ENOENT; + goto out; + } + + if (args->flags & NFSMNT_NFSV3) { + if (xdr_int_decode(&m,fhsizep) || + *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 ) + goto bad; + } else + *fhsizep = NFSX_V2FH; + + if (xdr_opaque_decode(&m, fhp, *fhsizep)) + goto bad; + + if (args->flags & NFSMNT_NFSV3) { + if (xdr_int_decode(&m,&attribs_present)) + goto bad; + if (attribs_present) { + if (xdr_opaque_decode(&m,(u_char *) &fattribs.v3, + sizeof(u_int32_t)*21)) + goto bad; + size = fxdr_unsigned(u_int32_t, fattribs.v3[6]); + } + } else { + if (xdr_opaque_decode(&m,(u_char *) &fattribs.v2, + sizeof(u_int32_t)*17)) + goto bad; + size = fxdr_unsigned(u_int32_t, fattribs.v2[5]); + } + + if (!nfsv3_diskless.swap_nblks && size!= -1) { + nfsv3_diskless.swap_nblks = size/1024; + printf("md_lookup_swap: Swap size is %d KB\n", + nfsv3_diskless.swap_nblks); + } + + goto out; + +bad: + error = EBADRPC; + +out: + m_freem(m); + return error; +} diff --git a/sys/nfs/krpc.h b/sys/nfs/krpc.h new file mode 100644 index 0000000..6c889b5 --- /dev/null +++ b/sys/nfs/krpc.h @@ -0,0 +1,31 @@ +/* $NetBSD: krpc.h,v 1.4 1995/12/19 23:07:11 cgd Exp $ */ +/* $FreeBSD$ */ + +#include <sys/cdefs.h> + +struct mbuf; +struct proc; +struct sockaddr; +struct sockaddr_in; + +int krpc_call __P((struct sockaddr_in *_sin, + u_int prog, u_int vers, u_int func, + struct mbuf **data, struct sockaddr **from, struct proc *procp)); + +int krpc_portmap __P((struct sockaddr_in *_sin, + u_int prog, u_int vers, u_int16_t *portp,struct proc *procp)); + +struct mbuf *xdr_string_encode __P((char *str, int len)); + +/* + * RPC definitions for the portmapper + */ +#define PMAPPORT 111 +#define PMAPPROG 100000 +#define PMAPVERS 2 +#define PMAPPROC_NULL 0 +#define PMAPPROC_SET 1 +#define PMAPPROC_UNSET 2 +#define PMAPPROC_GETPORT 3 +#define PMAPPROC_DUMP 4 +#define PMAPPROC_CALLIT 5 diff --git a/sys/nfs/krpc_subr.c b/sys/nfs/krpc_subr.c new file mode 100644 index 0000000..2127d46 --- /dev/null +++ b/sys/nfs/krpc_subr.c @@ -0,0 +1,481 @@ +/* $NetBSD: krpc_subr.c,v 1.12.4.1 1996/06/07 00:52:26 cgd Exp $ */ +/* $FreeBSD$ */ + +/* + * Copyright (c) 1995 Gordon Ross, Adam Glass + * Copyright (c) 1992 Regents of the University of California. + * All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * partially based on: + * libnetboot/rpc.c + * @(#) Header: rpc.c,v 1.12 93/09/28 08:31:56 leres Exp (LBL) + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/uio.h> + +#include <net/if.h> +#include <netinet/in.h> + +#include <nfs/rpcv2.h> +#include <nfs/krpc.h> +#include <nfs/xdr_subs.h> + +/* + * Kernel support for Sun RPC + * + * Used currently for bootstrapping in nfs diskless configurations. + */ + +/* + * Generic RPC headers + */ + +struct auth_info { + u_int32_t authtype; /* auth type */ + u_int32_t authlen; /* auth length */ +}; + +struct auth_unix { + int32_t ua_time; + int32_t ua_hostname; /* null */ + int32_t ua_uid; + int32_t ua_gid; + int32_t ua_gidlist; /* null */ +}; + +struct rpc_call { + u_int32_t rp_xid; /* request transaction id */ + int32_t rp_direction; /* call direction (0) */ + u_int32_t rp_rpcvers; /* rpc version (2) */ + u_int32_t rp_prog; /* program */ + u_int32_t rp_vers; /* version */ + u_int32_t rp_proc; /* procedure */ + struct auth_info rpc_auth; + struct auth_unix rpc_unix; + struct auth_info rpc_verf; +}; + +struct rpc_reply { + u_int32_t rp_xid; /* request transaction id */ + int32_t rp_direction; /* call direction (1) */ + int32_t rp_astatus; /* accept status (0: accepted) */ + union { + u_int32_t rpu_errno; + struct { + struct auth_info rok_auth; + u_int32_t rok_status; + } rpu_rok; + } rp_u; +}; +#define rp_errno rp_u.rpu_errno +#define rp_auth rp_u.rpu_rok.rok_auth +#define rp_status rp_u.rpu_rok.rok_status + +#define MIN_REPLY_HDR 16 /* xid, dir, astat, errno */ + +/* + * What is the longest we will wait before re-sending a request? + * Note this is also the frequency of "RPC timeout" messages. + * The re-send loop count sup linearly to this maximum, so the + * first complaint will happen after (1+2+3+4+5)=15 seconds. + */ +#define MAX_RESEND_DELAY 5 /* seconds */ + +/* + * Call portmap to lookup a port number for a particular rpc program + * Returns non-zero error on failure. + */ +int +krpc_portmap(sin, prog, vers, portp, procp) + struct sockaddr_in *sin; /* server address */ + u_int prog, vers; /* host order */ + u_int16_t *portp; /* network order */ + struct proc *procp; +{ + struct sdata { + u_int32_t prog; /* call program */ + u_int32_t vers; /* call version */ + u_int32_t proto; /* call protocol */ + u_int32_t port; /* call port (unused) */ + } *sdata; + struct rdata { + u_int16_t pad; + u_int16_t port; + } *rdata; + struct mbuf *m; + int error; + + /* The portmapper port is fixed. */ + if (prog == PMAPPROG) { + *portp = htons(PMAPPORT); + return 0; + } + + m = m_get(M_WAIT, MT_DATA); + if (m == NULL) + return ENOBUFS; + sdata = mtod(m, struct sdata *); + m->m_len = sizeof(*sdata); + + /* Do the RPC to get it. */ + sdata->prog = txdr_unsigned(prog); + sdata->vers = txdr_unsigned(vers); + sdata->proto = txdr_unsigned(IPPROTO_UDP); + sdata->port = 0; + + sin->sin_port = htons(PMAPPORT); + error = krpc_call(sin, PMAPPROG, PMAPVERS, + PMAPPROC_GETPORT, &m, NULL, procp); + if (error) + return error; + + if (m->m_len < sizeof(*rdata)) { + m = m_pullup(m, sizeof(*rdata)); + if (m == NULL) + return ENOBUFS; + } + rdata = mtod(m, struct rdata *); + *portp = rdata->port; + + m_freem(m); + return 0; +} + +/* + * Do a remote procedure call (RPC) and wait for its reply. + * If from_p is non-null, then we are doing broadcast, and + * the address from whence the response came is saved there. + */ +int +krpc_call(sa, prog, vers, func, data, from_p, procp) + struct sockaddr_in *sa; + u_int prog, vers, func; + struct mbuf **data; /* input/output */ + struct sockaddr **from_p; /* output */ + struct proc *procp; +{ + struct socket *so; + struct sockaddr_in *sin, ssin; + struct sockaddr *from; + struct mbuf *m, *nam, *mhead; + struct rpc_call *call; + struct rpc_reply *reply; + struct sockopt sopt; + struct timeval tv; + struct uio auio; + int error, rcvflg, timo, secs, len; + static u_int32_t xid = ~0xFF; + u_int16_t tport; + + /* + * Validate address family. + * Sorry, this is INET specific... + */ + if (sa->sin_family != AF_INET) + return (EAFNOSUPPORT); + + /* Free at end if not null. */ + nam = mhead = NULL; + from = NULL; + + /* + * Create socket and set its recieve timeout. + */ + if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, procp))) + goto out; + + tv.tv_sec = 1; + tv.tv_usec = 0; + bzero(&sopt, sizeof sopt); + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_RCVTIMEO; + sopt.sopt_val = &tv; + sopt.sopt_valsize = sizeof tv; + + if ((error = sosetopt(so, &sopt)) != 0) + goto out; + + /* + * Enable broadcast if necessary. + */ + if (from_p) { + int on = 1; + sopt.sopt_name = SO_BROADCAST; + sopt.sopt_val = &on; + sopt.sopt_valsize = sizeof on; + if ((error = sosetopt(so, &sopt)) != 0) + goto out; + } + + /* + * Bind the local endpoint to a reserved port, + * because some NFS servers refuse requests from + * non-reserved (non-privileged) ports. + */ + sin = &ssin; + bzero(sin, sizeof *sin); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + tport = IPPORT_RESERVED; + do { + tport--; + sin->sin_port = htons(tport); + error = sobind(so, (struct sockaddr *)sin, procp); + } while (error == EADDRINUSE && + tport > IPPORT_RESERVED / 2); + if (error) { + printf("bind failed\n"); + goto out; + } + + /* + * Setup socket address for the server. + */ + + /* + * Prepend RPC message header. + */ + mhead = m_gethdr(M_WAIT, MT_DATA); + mhead->m_next = *data; + call = mtod(mhead, struct rpc_call *); + mhead->m_len = sizeof(*call); + bzero((caddr_t)call, sizeof(*call)); + /* rpc_call part */ + xid++; + call->rp_xid = txdr_unsigned(xid); + /* call->rp_direction = 0; */ + call->rp_rpcvers = txdr_unsigned(2); + call->rp_prog = txdr_unsigned(prog); + call->rp_vers = txdr_unsigned(vers); + call->rp_proc = txdr_unsigned(func); + /* rpc_auth part (auth_unix as root) */ + call->rpc_auth.authtype = txdr_unsigned(RPCAUTH_UNIX); + call->rpc_auth.authlen = txdr_unsigned(sizeof(struct auth_unix)); + /* rpc_verf part (auth_null) */ + call->rpc_verf.authtype = 0; + call->rpc_verf.authlen = 0; + + /* + * Setup packet header + */ + len = 0; + m = mhead; + while (m) { + len += m->m_len; + m = m->m_next; + } + mhead->m_pkthdr.len = len; + mhead->m_pkthdr.rcvif = NULL; + + /* + * Send it, repeatedly, until a reply is received, + * but delay each re-send by an increasing amount. + * If the delay hits the maximum, start complaining. + */ + timo = 0; + for (;;) { + /* Send RPC request (or re-send). */ + m = m_copym(mhead, 0, M_COPYALL, M_WAIT); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + error = sosend(so, (struct sockaddr *)sa, NULL, m, + NULL, 0, 0); + if (error) { + printf("krpc_call: sosend: %d\n", error); + goto out; + } + m = NULL; + + /* Determine new timeout. */ + if (timo < MAX_RESEND_DELAY) + timo++; + else + printf("RPC timeout for server 0x%lx\n", + (u_long)ntohl(sa->sin_addr.s_addr)); + + /* + * Wait for up to timo seconds for a reply. + * The socket receive timeout was set to 1 second. + */ + secs = timo; + while (secs > 0) { + if (from) { + FREE(from, M_SONAME); + from = NULL; + } + if (m) { + m_freem(m); + m = NULL; + } + bzero(&auio,sizeof(auio)); + auio.uio_resid = len = 1<<16; + rcvflg = 0; + error = soreceive(so, &from, &auio, &m, NULL, &rcvflg); + if (error == EWOULDBLOCK) { + secs--; + continue; + } + if (error) + goto out; + len -= auio.uio_resid; + + /* Does the reply contain at least a header? */ + if (len < MIN_REPLY_HDR) + continue; + if (m->m_len < MIN_REPLY_HDR) + continue; + reply = mtod(m, struct rpc_reply *); + + /* Is it the right reply? */ + if (reply->rp_direction != txdr_unsigned(RPC_REPLY)) + continue; + + if (reply->rp_xid != txdr_unsigned(xid)) + continue; + + /* Was RPC accepted? (authorization OK) */ + if (reply->rp_astatus != 0) { + error = fxdr_unsigned(u_int32_t, reply->rp_errno); + printf("rpc denied, error=%d\n", error); + continue; + } + + /* Did the call succeed? */ + if (reply->rp_status != 0) { + error = fxdr_unsigned(u_int32_t, reply->rp_status); + if (error == RPC_PROGMISMATCH) { + error = EBADRPC; + goto out; + } + printf("rpc denied, status=%d\n", error); + continue; + } + + goto gotreply; /* break two levels */ + + } /* while secs */ + } /* forever send/receive */ + + error = ETIMEDOUT; + goto out; + + gotreply: + + /* + * Get RPC reply header into first mbuf, + * get its length, then strip it off. + */ + len = sizeof(*reply); + if (m->m_len < len) { + m = m_pullup(m, len); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + } + reply = mtod(m, struct rpc_reply *); + if (reply->rp_auth.authtype != 0) { + len += fxdr_unsigned(u_int32_t, reply->rp_auth.authlen); + len = (len + 3) & ~3; /* XXX? */ + } + m_adj(m, len); + + /* result */ + *data = m; + if (from_p) { + *from_p = from; + from = NULL; + } + + out: + if (mhead) m_freem(mhead); + if (from) free(from, M_SONAME); + soclose(so); + return error; +} + +/* + * eXternal Data Representation routines. + * (but with non-standard args...) + */ + +/* + * String representation for RPC. + */ +struct xdr_string { + u_int32_t len; /* length without null or padding */ + char data[4]; /* data (longer, of course) */ + /* data is padded to a long-word boundary */ +}; + +struct mbuf * +xdr_string_encode(str, len) + char *str; + int len; +{ + struct mbuf *m; + struct xdr_string *xs; + int dlen; /* padded string length */ + int mlen; /* message length */ + + dlen = (len + 3) & ~3; + mlen = dlen + 4; + + if (mlen > MCLBYTES) /* If too big, we just can't do it. */ + return (NULL); + + m = m_get(M_WAIT, MT_DATA); + if (mlen > MLEN) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) { + (void) m_free(m); /* There can be only one. */ + return (NULL); + } + } + xs = mtod(m, struct xdr_string *); + m->m_len = mlen; + xs->len = txdr_unsigned(len); + bcopy(str, xs->data, len); + return (m); +} diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h new file mode 100644 index 0000000..75a3982 --- /dev/null +++ b/sys/nfs/nfs.h @@ -0,0 +1,739 @@ +/* + * Copyright (c) 1989, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs.h 8.4 (Berkeley) 5/1/95 + * $FreeBSD$ + */ + +#ifndef _NFS_NFS_H_ +#define _NFS_NFS_H_ + +#ifdef KERNEL +#include "opt_nfs.h" +#endif + +/* + * Tunable constants for nfs + */ + +#define NFS_MAXIOVEC 34 +#define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ +#define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ +#define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ +#define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ +#define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ +#define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ +#define NFS_MAXREXMIT 100 /* Stop counting after this many */ +#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ +#define NFS_RETRANS 10 /* Num of retrans for soft mounts */ +#define NFS_MAXGRPS 16 /* Max. size of groups list */ +#ifndef NFS_MINATTRTIMO +#define NFS_MINATTRTIMO 3 /* VREG attrib cache timeout in sec */ +#endif +#ifndef NFS_MAXATTRTIMO +#define NFS_MAXATTRTIMO 60 +#endif +#ifndef NFS_MINDIRATTRTIMO +#define NFS_MINDIRATTRTIMO 30 /* VDIR attrib cache timeout in sec */ +#endif +#ifndef NFS_MAXDIRATTRTIMO +#define NFS_MAXDIRATTRTIMO 60 +#endif +#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ +#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ +#define NFS_READDIRSIZE 8192 /* Def. readdir size */ +#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ +#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ +#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ +#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */ +#define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ +#ifndef NFS_GATHERDELAY +#define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */ +#endif +#define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ +#ifdef KERNEL +#define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ +#endif + +/* + * Oddballs + */ +#define NMOD(a) ((a) % nfs_asyncdaemons) +#define NFS_CMPFH(n, f, s) \ + ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) +#define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) +#define NFS_SRVMAXDATA(n) \ + (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ + NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) + +/* + * XXX + * The B_INVAFTERWRITE flag should be set to whatever is required by the + * buffer cache code to say "Invalidate the block after it is written back". + */ +#define B_INVAFTERWRITE B_NOCACHE + +/* + * The IO_METASYNC flag should be implemented for local file systems. + * (Until then, it is nothin at all.) + */ +#ifndef IO_METASYNC +#define IO_METASYNC 0 +#endif + +/* + * Expected allocation sizes for major data structures. If the actual size + * of the structure exceeds these sizes, then malloc() will be allocating + * almost twice the memory required. This is used in nfs_init() to warn + * the sysadmin that the size of a structure should be reduced. + * (These sizes are always a power of 2. If the kernel malloc() changes + * to one that does not allocate space in powers of 2 size, then this all + * becomes bunk!) + */ +#define NFS_SVCALLOC 256 +#define NFS_UIDALLOC 128 + +/* + * Arguments to mount NFS + */ +#define NFS_ARGSVERSION 3 /* change when nfs_args changes */ +struct nfs_args { + int version; /* args structure version number */ + struct sockaddr *addr; /* file server address */ + int addrlen; /* length of address */ + int sotype; /* Socket type */ + int proto; /* and Protocol */ + u_char *fh; /* File handle to be mounted */ + int fhsize; /* Size, in bytes, of fh */ + int flags; /* flags */ + int wsize; /* write size in bytes */ + int rsize; /* read size in bytes */ + int readdirsize; /* readdir size in bytes */ + int timeo; /* initial timeout in .1 secs */ + int retrans; /* times to retry send */ + int maxgrouplist; /* Max. size of group list */ + int readahead; /* # of blocks to readahead */ + int leaseterm; /* Term (sec) of lease */ + int deadthresh; /* Retrans threshold */ + char *hostname; /* server's name */ + int acregmin; /* cache attrs for reg files min time */ + int acregmax; /* cache attrs for reg files max time */ + int acdirmin; /* cache attrs for dirs min time */ + int acdirmax; /* cache attrs for dirs max time */ +}; + +/* + * NFS mount option flags + */ +#define NFSMNT_SOFT 0x00000001 /* soft mount (hard is default) */ +#define NFSMNT_WSIZE 0x00000002 /* set write size */ +#define NFSMNT_RSIZE 0x00000004 /* set read size */ +#define NFSMNT_TIMEO 0x00000008 /* set initial timeout */ +#define NFSMNT_RETRANS 0x00000010 /* set number of request retries */ +#define NFSMNT_MAXGRPS 0x00000020 /* set maximum grouplist size */ +#define NFSMNT_INT 0x00000040 /* allow interrupts on hard mount */ +#define NFSMNT_NOCONN 0x00000080 /* Don't Connect the socket */ +#define NFSMNT_NQNFS 0x00000100 /* Use Nqnfs protocol */ +#define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */ +#define NFSMNT_KERB 0x00000400 /* Use Kerberos authentication */ +#define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */ +#define NFSMNT_LEASETERM 0x00001000 /* set lease term (nqnfs) */ +#define NFSMNT_READAHEAD 0x00002000 /* set read ahead */ +#define NFSMNT_DEADTHRESH 0x00004000 /* set dead server retry thresh */ +#define NFSMNT_RESVPORT 0x00008000 /* Allocate a reserved port */ +#define NFSMNT_RDIRPLUS 0x00010000 /* Use Readdirplus for V3 */ +#define NFSMNT_READDIRSIZE 0x00020000 /* Set readdir size */ +#define NFSMNT_ACREGMIN 0x00040000 +#define NFSMNT_ACREGMAX 0x00080000 +#define NFSMNT_ACDIRMIN 0x00100000 +#define NFSMNT_ACDIRMAX 0x00200000 + +#define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier for V3 */ +#define NFSSTA_GOTPATHCONF 0x00080000 /* Got the V3 pathconf info */ +#define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */ +#define NFSSTA_MNTD 0x00200000 /* Mnt server for mnt point */ +#define NFSSTA_DISMINPROG 0x00400000 /* Dismount in progress */ +#define NFSSTA_DISMNT 0x00800000 /* Dismounted */ +#define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */ +#define NFSSTA_WANTSND 0x02000000 /* Want above */ +#define NFSSTA_RCVLOCK 0x04000000 /* Rcv socket lock */ +#define NFSSTA_WANTRCV 0x08000000 /* Want above */ +#define NFSSTA_WAITAUTH 0x10000000 /* Wait for authentication */ +#define NFSSTA_HASAUTH 0x20000000 /* Has authenticator */ +#define NFSSTA_WANTAUTH 0x40000000 /* Wants an authenticator */ +#define NFSSTA_AUTHERR 0x80000000 /* Authentication error */ + +/* + * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs + * should ever try and use it. + */ +struct nfsd_args { + int sock; /* Socket to serve */ + caddr_t name; /* Client addr for connection based sockets */ + int namelen; /* Length of name */ +}; + +struct nfsd_srvargs { + struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ + uid_t nsd_uid; /* Effective uid mapped to cred */ + u_int32_t nsd_haddr; /* Ip address of client */ + struct ucred nsd_cr; /* Cred. uid maps to */ + int nsd_authlen; /* Length of auth string (ret) */ + u_char *nsd_authstr; /* Auth string (ret) */ + int nsd_verflen; /* and the verfier */ + u_char *nsd_verfstr; + struct timeval nsd_timestamp; /* timestamp from verifier */ + u_int32_t nsd_ttl; /* credential ttl (sec) */ + NFSKERBKEY_T nsd_key; /* Session key */ +}; + +struct nfsd_cargs { + char *ncd_dirp; /* Mount dir path */ + uid_t ncd_authuid; /* Effective uid */ + int ncd_authtype; /* Type of authenticator */ + int ncd_authlen; /* Length of authenticator string */ + u_char *ncd_authstr; /* Authenticator string */ + int ncd_verflen; /* and the verifier */ + u_char *ncd_verfstr; + NFSKERBKEY_T ncd_key; /* Session key */ +}; + +/* + * XXX to allow amd to include nfs.h without nfsproto.h + */ +#ifdef NFS_NPROCS +/* + * Stats structure + */ +struct nfsstats { + int attrcache_hits; + int attrcache_misses; + int lookupcache_hits; + int lookupcache_misses; + int direofcache_hits; + int direofcache_misses; + int biocache_reads; + int read_bios; + int read_physios; + int biocache_writes; + int write_bios; + int write_physios; + int biocache_readlinks; + int readlink_bios; + int biocache_readdirs; + int readdir_bios; + int rpccnt[NFS_NPROCS]; + int rpcretries; + int srvrpccnt[NFS_NPROCS]; + int srvrpc_errs; + int srv_errs; + int rpcrequests; + int rpctimeouts; + int rpcunexpected; + int rpcinvalid; + int srvcache_inproghits; + int srvcache_idemdonehits; + int srvcache_nonidemdonehits; + int srvcache_misses; + int srvnqnfs_leases; + int srvnqnfs_maxleases; + int srvnqnfs_getleases; + int srvvop_writes; + int accesscache_hits; + int accesscache_misses; +}; +#endif + +/* + * Flags for nfssvc() system call. + */ +#define NFSSVC_BIOD 0x002 +#define NFSSVC_NFSD 0x004 +#define NFSSVC_ADDSOCK 0x008 +#define NFSSVC_AUTHIN 0x010 +#define NFSSVC_GOTAUTH 0x040 +#define NFSSVC_AUTHINFAIL 0x080 +#define NFSSVC_MNTD 0x100 + +/* + * fs.nfs sysctl(3) identifiers + */ +#define NFS_NFSSTATS 1 /* struct: struct nfsstats */ +#define NFS_NFSPRIVPORT 2 /* int: prohibit nfs to resvports */ + +#define FS_NFS_NAMES { \ + { 0, 0 }, \ + { "nfsstats", CTLTYPE_STRUCT }, \ + { "nfsprivport", CTLTYPE_INT }, \ +} + +#ifdef KERNEL + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_NFSREQ); +MALLOC_DECLARE(M_NFSDIROFF); +MALLOC_DECLARE(M_NFSRVDESC); +MALLOC_DECLARE(M_NFSUID); +MALLOC_DECLARE(M_NQLEASE); +MALLOC_DECLARE(M_NFSD); +MALLOC_DECLARE(M_NFSBIGFH); +MALLOC_DECLARE(M_NFSHASH); +#endif + +#ifdef ZONE_INTERRUPT +extern vm_zone_t nfsmount_zone; +#endif + +extern struct callout_handle nfs_timer_handle; + +struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ + +/* + * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. + * What should be in this set is open to debate, but I believe that since + * I/O system calls on ufs are never interrupted by signals the set should + * be minimal. My reasoning is that many current programs that use signals + * such as SIGALRM will not expect file I/O system calls to be interrupted + * by them and break. + */ +#define NFSINT_SIGMASK(set) \ + (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \ + SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \ + SIGISMEMBER(set, SIGQUIT)) + +/* + * Socket errors ignored for connectionless sockets?? + * For now, ignore them all + */ +#define NFSIGNORE_SOERROR(s, e) \ + ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ + ((s) & PR_CONNREQUIRED) == 0) + +/* + * Nfs outstanding request list element + */ +struct nfsreq { + TAILQ_ENTRY(nfsreq) r_chain; + struct mbuf *r_mreq; + struct mbuf *r_mrep; + struct mbuf *r_md; + caddr_t r_dpos; + struct nfsmount *r_nmp; + struct vnode *r_vp; + u_int32_t r_xid; + int r_flags; /* flags on request, see below */ + int r_retry; /* max retransmission count */ + int r_rexmit; /* current retrans count */ + int r_timer; /* tick counter on reply */ + u_int32_t r_procnum; /* NFS procedure number */ + int r_rtt; /* RTT for rpc */ + struct proc *r_procp; /* Proc that did I/O system call */ +}; + +/* + * Queue head for nfsreq's + */ +extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; + +/* Flag values for r_flags */ +#define R_TIMING 0x01 /* timing request (in mntp) */ +#define R_SENT 0x02 /* request has been sent */ +#define R_SOFTTERM 0x04 /* soft mnt, too many retries */ +#define R_INTR 0x08 /* intr mnt, signal pending */ +#define R_SOCKERR 0x10 /* Fatal error on socket */ +#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ +#define R_MUSTRESEND 0x40 /* Must resend request */ +#define R_GETONEREP 0x80 /* Probe for one reply only */ + +/* + * A list of nfssvc_sock structures is maintained with all the sockets + * that require service by the nfsd. + * The nfsuid structs hang off of the nfssvc_sock structs in both lru + * and uid hash lists. + */ +#ifndef NFS_UIDHASHSIZ +#define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ +#endif +#define NUIDHASH(sock, uid) \ + (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) +#ifndef NFS_WDELAYHASHSIZ +#define NFS_WDELAYHASHSIZ 16 /* and with this */ +#endif +#define NWDELAYHASH(sock, f) \ + (&(sock)->ns_wdelayhashtbl[(*((u_int32_t *)(f))) % NFS_WDELAYHASHSIZ]) +#ifndef NFS_MUIDHASHSIZ +#define NFS_MUIDHASHSIZ 63 /* Tune the size of nfsmount with this */ +#endif +#define NMUIDHASH(nmp, uid) \ + (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) +#define NFSNOHASH(fhsum) \ + (&nfsnodehashtbl[(fhsum) & nfsnodehash]) + +/* + * Network address hash list element + */ +union nethostaddr { + u_int32_t had_inetaddr; + struct sockaddr *had_nam; +}; + +struct nfsuid { + TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ + LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ + int nu_flag; /* Flags */ + union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ + struct ucred nu_cr; /* Cred uid mapped to */ + int nu_expire; /* Expiry time (sec) */ + struct timeval nu_timestamp; /* Kerb. timestamp */ + u_int32_t nu_nickname; /* Nickname on server */ + NFSKERBKEY_T nu_key; /* and session key */ +}; + +#define nu_inetaddr nu_haddr.had_inetaddr +#define nu_nam nu_haddr.had_nam +/* Bits for nu_flag */ +#define NU_INETADDR 0x1 +#define NU_NAM 0x2 +#define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO) + +struct nfsrv_rec { + STAILQ_ENTRY(nfsrv_rec) nr_link; + struct sockaddr *nr_address; + struct mbuf *nr_packet; +}; + +struct nfssvc_sock { + TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ + TAILQ_HEAD(, nfsuid) ns_uidlruhead; + struct file *ns_fp; + struct socket *ns_so; + struct sockaddr *ns_nam; + struct mbuf *ns_raw; + struct mbuf *ns_rawend; + STAILQ_HEAD(, nfsrv_rec) ns_rec; + struct mbuf *ns_frag; + int ns_flag; + int ns_solock; + int ns_cc; + int ns_reclen; + int ns_numuids; + u_int32_t ns_sref; + LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ + LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; + LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; +}; + +/* Bits for "ns_flag" */ +#define SLP_VALID 0x01 +#define SLP_DOREC 0x02 +#define SLP_NEEDQ 0x04 +#define SLP_DISCONN 0x08 +#define SLP_GETSTREAM 0x10 +#define SLP_LASTFRAG 0x20 +#define SLP_ALLFLAGS 0xff + +extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; +extern int nfssvc_sockhead_flag; +#define SLP_INIT 0x01 +#define SLP_WANTINIT 0x02 + +/* + * One of these structures is allocated for each nfsd. + */ +struct nfsd { + TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */ + int nfsd_flag; /* NFSD_ flags */ + struct nfssvc_sock *nfsd_slp; /* Current socket */ + int nfsd_authlen; /* Authenticator len */ + u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ + int nfsd_verflen; /* and the Verifier */ + u_char nfsd_verfstr[RPCVERF_MAXSIZ]; + struct proc *nfsd_procp; /* Proc ptr */ + struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ +}; + +/* Bits for "nfsd_flag" */ +#define NFSD_WAITING 0x01 +#define NFSD_REQINPROG 0x02 +#define NFSD_NEEDAUTH 0x04 +#define NFSD_AUTHFAIL 0x08 + +/* + * This structure is used by the server for describing each request. + * Some fields are used only when write request gathering is performed. + */ +struct nfsrv_descript { + u_quad_t nd_time; /* Write deadline (usec) */ + off_t nd_off; /* Start byte offset */ + off_t nd_eoff; /* and end byte offset */ + LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ + LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ + LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ + struct mbuf *nd_mrep; /* Request mbuf list */ + struct mbuf *nd_md; /* Current dissect mbuf */ + struct mbuf *nd_mreq; /* Reply mbuf list */ + struct sockaddr *nd_nam; /* and socket addr */ + struct sockaddr *nd_nam2; /* return socket addr */ + caddr_t nd_dpos; /* Current dissect pos */ + u_int32_t nd_procnum; /* RPC # */ + int nd_stable; /* storage type */ + int nd_flag; /* nd_flag */ + int nd_len; /* Length of this write */ + int nd_repstat; /* Reply status */ + u_int32_t nd_retxid; /* Reply xid */ + u_int32_t nd_duration; /* Lease duration */ + struct timeval nd_starttime; /* Time RPC initiated */ + fhandle_t nd_fh; /* File handle */ + struct ucred nd_cr; /* Credentials */ +}; + +/* Bits for "nd_flag" */ +#define ND_READ LEASE_READ +#define ND_WRITE LEASE_WRITE +#define ND_CHECK 0x04 +#define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) +#define ND_NFSV3 0x08 +#define ND_NQNFS 0x10 +#define ND_KERBNICK 0x20 +#define ND_KERBFULL 0x40 +#define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) + +extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head; +extern int nfsd_head_flag; +#define NFSD_CHECKSLP 0x01 + +/* + * These macros compare nfsrv_descript structures. + */ +#define NFSW_CONTIG(o, n) \ + ((o)->nd_eoff >= (n)->nd_off && \ + !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) + +#define NFSW_SAMECRED(o, n) \ + (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ + !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ + sizeof (struct ucred))) + +/* + * Defines for WebNFS + */ + +#define WEBNFS_ESC_CHAR '%' +#define WEBNFS_SPECCHAR_START 0x80 + +#define WEBNFS_NATIVE_CHAR 0x80 +/* + * .. + * Possibly more here in the future. + */ + +/* + * Macro for converting escape characters in WebNFS pathnames. + * Should really be in libkern. + */ + +#define HEXTOC(c) \ + ((c) >= 'a' ? ((c) - ('a' - 10)) : \ + ((c) >= 'A' ? ((c) - ('A' - 10)) : ((c) - '0'))) +#define HEXSTRTOI(p) \ + ((HEXTOC(p[0]) << 4) + HEXTOC(p[1])) + +#ifdef NFS_DEBUG + +extern int nfs_debug; +#define NFS_DEBUG_ASYNCIO 1 /* asynchronous i/o */ +#define NFS_DEBUG_WG 2 /* server write gathering */ +#define NFS_DEBUG_RC 4 /* server request caching */ + +#define NFS_DPF(cat, args) \ + do { \ + if (nfs_debug & NFS_DEBUG_##cat) printf args; \ + } while (0) + +#else + +#define NFS_DPF(cat, args) + +#endif + +u_quad_t nfs_curusec __P((void)); +int nfs_init __P((struct vfsconf *vfsp)); +int nfs_uninit __P((struct vfsconf *vfsp)); +int nfs_reply __P((struct nfsreq *)); +int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); +int nfs_send __P((struct socket *, struct sockaddr *, struct mbuf *, + struct nfsreq *)); +int nfs_rephead __P((int, struct nfsrv_descript *, struct nfssvc_sock *, + int, int, u_quad_t *, struct mbuf **, struct mbuf **, + caddr_t *)); +int nfs_sndlock __P((struct nfsreq *)); +void nfs_sndunlock __P((struct nfsreq *)); +int nfs_slplock __P((struct nfssvc_sock *, int)); +void nfs_slpunlock __P((struct nfssvc_sock *)); +int nfs_disct __P((struct mbuf **, caddr_t *, int, int, caddr_t *)); +int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, + int)); +int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *)); +int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *, + int *)); +int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt, + struct ucred *cred, struct proc *procp)); +int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *)); +int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *)); +int nfs_doio __P((struct buf *, struct ucred *, struct proc *)); +int nfs_readlinkrpc __P((struct vnode *, struct uio *, struct ucred *)); +int nfs_sigintr __P((struct nfsmount *, struct nfsreq *, struct proc *)); +int nfs_readdirplusrpc __P((struct vnode *, struct uio *, struct ucred *)); +int nfsm_disct __P((struct mbuf **, caddr_t *, int, int, caddr_t *)); +void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *, + struct nfs_fattr *)); +void nfsm_srvwcc __P((struct nfsrv_descript *, int, struct vattr *, int, + struct vattr *, struct mbuf **, char **)); +void nfsm_srvpostopattr __P((struct nfsrv_descript *, int, struct vattr *, + struct mbuf **, char **)); +int netaddr_match __P((int, union nethostaddr *, struct sockaddr *)); +int nfs_request __P((struct vnode *, struct mbuf *, int, struct proc *, + struct ucred *, struct mbuf **, struct mbuf **, + caddr_t *)); +int nfs_loadattrcache __P((struct vnode **, struct mbuf **, caddr_t *, + struct vattr *)); +int nfs_namei __P((struct nameidata *, fhandle_t *, int, + struct nfssvc_sock *, struct sockaddr *, struct mbuf **, + caddr_t *, struct vnode **, struct proc *, int, int)); +void nfsm_adj __P((struct mbuf *, int, int)); +int nfsm_mbuftouio __P((struct mbuf **, struct uio *, int, caddr_t *)); +void nfsrv_initcache __P((void)); +int nfs_getauth __P((struct nfsmount *, struct nfsreq *, struct ucred *, + char **, int *, char *, int *, NFSKERBKEY_T)); +int nfs_getnickauth __P((struct nfsmount *, struct ucred *, char **, + int *, char *, int)); +int nfs_savenickauth __P((struct nfsmount *, struct ucred *, int, + NFSKERBKEY_T, struct mbuf **, char **, + struct mbuf *)); +int nfs_adv __P((struct mbuf **, caddr_t *, int, int)); +void nfs_nhinit __P((void)); +void nfs_timer __P((void*)); +u_long nfs_hash __P((nfsfh_t *, int)); +int nfsrv_dorec __P((struct nfssvc_sock *, struct nfsd *, + struct nfsrv_descript **)); +int nfsrv_getcache __P((struct nfsrv_descript *, struct nfssvc_sock *, + struct mbuf **)); +void nfsrv_updatecache __P((struct nfsrv_descript *, int, struct mbuf *)); +void nfsrv_cleancache __P((void)); +int nfs_connect __P((struct nfsmount *, struct nfsreq *)); +void nfs_disconnect __P((struct nfsmount *)); +void nfs_safedisconnect __P((struct nfsmount *)); +int nfs_getattrcache __P((struct vnode *, struct vattr *)); +int nfsm_strtmbuf __P((struct mbuf **, char **, const char *, long)); +int nfs_bioread __P((struct vnode *, struct uio *, int, struct ucred *)); +int nfsm_uiotombuf __P((struct uio *, struct mbuf **, int, caddr_t *)); +void nfsrv_init __P((int)); +void nfs_clearcommit __P((struct mount *)); +int nfsrv_errmap __P((struct nfsrv_descript *, int)); +void nfsrvw_sort __P((gid_t *, int)); +void nfsrv_setcred __P((struct ucred *, struct ucred *)); +int nfs_writebp __P((struct buf *, int, struct proc *)); +int nfsrv_object_create __P((struct vnode *)); +void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); +int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, + struct proc *, struct mbuf **)); +int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, + struct proc *p)); + +int nfsrv3_access __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *, + struct nfssvc_sock *, struct sockaddr *, int *, + int, int)); +int nfsrv_setpublicfs __P((struct mount *, struct netexport *, + struct export_args *)); +int nfs_ispublicfh __P((fhandle_t *)); +int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, struct proc *procp, + struct mbuf **mrq)); +int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_readdir __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, struct proc *procp, + struct mbuf **mrq)); +int nfsrv_readlink __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, struct proc *procp, + struct mbuf **mrq)); +int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_setattr __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_statfs __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_symlink __P((struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + struct proc *procp, struct mbuf **mrq)); +void nfsrv_rcv __P((struct socket *so, void *arg, int waitflag)); +void nfsrv_slpderef __P((struct nfssvc_sock *slp)); +#endif /* KERNEL */ + +#endif diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c new file mode 100644 index 0000000..8e99d98 --- /dev/null +++ b/sys/nfs/nfs_bio.c @@ -0,0 +1,1555 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/signalvar.h> +#include <sys/proc.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/kernel.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> +#include <vm/vnode_pager.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsmount.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsnode.h> + +static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size, + struct proc *p)); + +extern int nfs_numasync; +extern int nfs_pbuf_freecnt; +extern struct nfsstats nfsstats; + +/* + * Vnode op for VM getpages. + */ +int +nfs_getpages(ap) + struct vop_getpages_args /* { + struct vnode *a_vp; + vm_page_t *a_m; + int a_count; + int a_reqpage; + vm_ooffset_t a_offset; + } */ *ap; +{ + int i, error, nextoff, size, toff, count, npages; + struct uio uio; + struct iovec iov; + vm_offset_t kva; + struct buf *bp; + struct vnode *vp; + struct proc *p; + struct ucred *cred; + struct nfsmount *nmp; + vm_page_t *pages; + + vp = ap->a_vp; + p = curproc; /* XXX */ + cred = curproc->p_ucred; /* XXX */ + nmp = VFSTONFS(vp->v_mount); + pages = ap->a_m; + count = ap->a_count; + + if (vp->v_object == NULL) { + printf("nfs_getpages: called with non-merged cache vnode??\n"); + return VM_PAGER_ERROR; + } + + if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (void)nfs_fsinfo(nmp, vp, cred, p); + + npages = btoc(count); + + /* + * If the requested page is partially valid, just return it and + * allow the pager to zero-out the blanks. Partially valid pages + * can only occur at the file EOF. + */ + + { + vm_page_t m = pages[ap->a_reqpage]; + + if (m->valid != 0) { + /* handled by vm_fault now */ + /* vm_page_zero_invalid(m, TRUE); */ + for (i = 0; i < npages; ++i) { + if (i != ap->a_reqpage) + vnode_pager_freepage(pages[i]); + } + return(0); + } + } + + /* + * We use only the kva address for the buffer, but this is extremely + * convienient and fast. + */ + bp = getpbuf(&nfs_pbuf_freecnt); + + kva = (vm_offset_t) bp->b_data; + pmap_qenter(kva, pages, npages); + + iov.iov_base = (caddr_t) kva; + iov.iov_len = count; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); + uio.uio_resid = count; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_READ; + uio.uio_procp = p; + + error = nfs_readrpc(vp, &uio, cred); + pmap_qremove(kva, npages); + + relpbuf(bp, &nfs_pbuf_freecnt); + + if (error && (uio.uio_resid == count)) { + printf("nfs_getpages: error %d\n", error); + for (i = 0; i < npages; ++i) { + if (i != ap->a_reqpage) + vnode_pager_freepage(pages[i]); + } + return VM_PAGER_ERROR; + } + + /* + * Calculate the number of bytes read and validate only that number + * of bytes. Note that due to pending writes, size may be 0. This + * does not mean that the remaining data is invalid! + */ + + size = count - uio.uio_resid; + + for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { + vm_page_t m; + nextoff = toff + PAGE_SIZE; + m = pages[i]; + + m->flags &= ~PG_ZERO; + + if (nextoff <= size) { + /* + * Read operation filled an entire page + */ + m->valid = VM_PAGE_BITS_ALL; + vm_page_undirty(m); + } else if (size > toff) { + /* + * Read operation filled a partial page. + */ + m->valid = 0; + vm_page_set_validclean(m, 0, size - toff); + /* handled by vm_fault now */ + /* vm_page_zero_invalid(m, TRUE); */ + } + + if (i != ap->a_reqpage) { + /* + * Whether or not to leave the page activated is up in + * the air, but we should put the page on a page queue + * somewhere (it already is in the object). Result: + * It appears that emperical results show that + * deactivating pages is best. + */ + + /* + * Just in case someone was asking for this page we + * now tell them that it is ok to use. + */ + if (!error) { + if (m->flags & PG_WANTED) + vm_page_activate(m); + else + vm_page_deactivate(m); + vm_page_wakeup(m); + } else { + vnode_pager_freepage(m); + } + } + } + return 0; +} + +/* + * Vnode op for VM putpages. + */ +int +nfs_putpages(ap) + struct vop_putpages_args /* { + struct vnode *a_vp; + vm_page_t *a_m; + int a_count; + int a_sync; + int *a_rtvals; + vm_ooffset_t a_offset; + } */ *ap; +{ + struct uio uio; + struct iovec iov; + vm_offset_t kva; + struct buf *bp; + int iomode, must_commit, i, error, npages, count; + off_t offset; + int *rtvals; + struct vnode *vp; + struct proc *p; + struct ucred *cred; + struct nfsmount *nmp; + struct nfsnode *np; + vm_page_t *pages; + + vp = ap->a_vp; + np = VTONFS(vp); + p = curproc; /* XXX */ + cred = curproc->p_ucred; /* XXX */ + nmp = VFSTONFS(vp->v_mount); + pages = ap->a_m; + count = ap->a_count; + rtvals = ap->a_rtvals; + npages = btoc(count); + offset = IDX_TO_OFF(pages[0]->pindex); + + if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (void)nfs_fsinfo(nmp, vp, cred, p); + + for (i = 0; i < npages; i++) { + rtvals[i] = VM_PAGER_AGAIN; + } + + /* + * When putting pages, do not extend file past EOF. + */ + + if (offset + count > np->n_size) { + count = np->n_size - offset; + if (count < 0) + count = 0; + } + + /* + * We use only the kva address for the buffer, but this is extremely + * convienient and fast. + */ + bp = getpbuf(&nfs_pbuf_freecnt); + + kva = (vm_offset_t) bp->b_data; + pmap_qenter(kva, pages, npages); + + iov.iov_base = (caddr_t) kva; + iov.iov_len = count; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = offset; + uio.uio_resid = count; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_WRITE; + uio.uio_procp = p; + + if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) + iomode = NFSV3WRITE_UNSTABLE; + else + iomode = NFSV3WRITE_FILESYNC; + + error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit); + + pmap_qremove(kva, npages); + relpbuf(bp, &nfs_pbuf_freecnt); + + if (!error) { + int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; + for (i = 0; i < nwritten; i++) { + rtvals[i] = VM_PAGER_OK; + vm_page_undirty(pages[i]); + } + if (must_commit) + nfs_clearcommit(vp->v_mount); + } + return rtvals[0]; +} + +/* + * Vnode op for read using bio + */ +int +nfs_bioread(vp, uio, ioflag, cred) + register struct vnode *vp; + register struct uio *uio; + int ioflag; + struct ucred *cred; +{ + register struct nfsnode *np = VTONFS(vp); + register int biosize, i; + struct buf *bp = 0, *rabp; + struct vattr vattr; + struct proc *p; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + daddr_t lbn, rabn; + int bcount; + int seqcount; + int nra, error = 0, n = 0, on = 0; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("nfs_read mode"); +#endif + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */ + return (EINVAL); + p = uio->uio_procp; + + if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (void)nfs_fsinfo(nmp, vp, cred, p); + if (vp->v_type != VDIR && + (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) + return (EFBIG); + biosize = vp->v_mount->mnt_stat.f_iosize; + seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE); + /* + * For nfs, cache consistency can only be maintained approximately. + * Although RFC1094 does not specify the criteria, the following is + * believed to be compatible with the reference port. + * For nqnfs, full cache consistency is maintained within the loop. + * For nfs: + * If the file's modify time on the server has changed since the + * last read rpc or you have written to the file, + * you may have lost data cache consistency with the + * server, so flush all of the file's data out of the cache. + * Then force a getattr rpc to ensure that you have up to date + * attributes. + * NB: This implies that cache data can be read when up to + * NFS_ATTRTIMEO seconds out of date. If you find that you need current + * attributes this could be forced by setting n_attrstamp to 0 before + * the VOP_GETATTR() call. + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { + if (np->n_flag & NMODIFIED) { + if (vp->v_type != VREG) { + if (vp->v_type != VDIR) + panic("nfs: bioread, not dir"); + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + return (error); + } + np->n_attrstamp = 0; + error = VOP_GETATTR(vp, &vattr, cred, p); + if (error) + return (error); + np->n_mtime = vattr.va_mtime.tv_sec; + } else { + error = VOP_GETATTR(vp, &vattr, cred, p); + if (error) + return (error); + if (np->n_mtime != vattr.va_mtime.tv_sec) { + if (vp->v_type == VDIR) + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + return (error); + np->n_mtime = vattr.va_mtime.tv_sec; + } + } + } + do { + + /* + * Get a valid lease. If cached data is stale, flush it. + */ + if (nmp->nm_flag & NFSMNT_NQNFS) { + if (NQNFS_CKINVALID(vp, np, ND_READ)) { + do { + error = nqnfs_getlease(vp, ND_READ, cred, p); + } while (error == NQNFS_EXPIRED); + if (error) + return (error); + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE) || + ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { + if (vp->v_type == VDIR) + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + return (error); + np->n_brev = np->n_lrev; + } + } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + return (error); + } + } + if (np->n_flag & NQNFSNONCACHE) { + switch (vp->v_type) { + case VREG: + return (nfs_readrpc(vp, uio, cred)); + case VLNK: + return (nfs_readlinkrpc(vp, uio, cred)); + case VDIR: + break; + default: + printf(" NQNFSNONCACHE: type %x unexpected\n", + vp->v_type); + }; + } + switch (vp->v_type) { + case VREG: + nfsstats.biocache_reads++; + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize - 1); + + /* + * Start the read ahead(s), as required. + */ + if (nfs_numasync > 0 && nmp->nm_readahead > 0) { + for (nra = 0; nra < nmp->nm_readahead && nra < seqcount && + (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) { + rabn = lbn + 1 + nra; + if (!incore(vp, rabn)) { + rabp = nfs_getcacheblk(vp, rabn, biosize, p); + if (!rabp) + return (EINTR); + if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { + rabp->b_flags |= (B_READ | B_ASYNC); + vfs_busy_pages(rabp, 0); + if (nfs_asyncio(rabp, cred, p)) { + rabp->b_flags |= B_INVAL|B_ERROR; + vfs_unbusy_pages(rabp); + brelse(rabp); + } + } else + brelse(rabp); + } + } + } + + /* + * Obtain the buffer cache block. Figure out the buffer size + * when we are at EOF. If we are modifying the size of the + * buffer based on an EOF condition we need to hold + * nfs_rslock() through obtaining the buffer to prevent + * a potential writer-appender from messing with n_size. + * Otherwise we may accidently truncate the buffer and + * lose dirty data. + * + * Note that bcount is *not* DEV_BSIZE aligned. + */ + +again: + bcount = biosize; + if ((off_t)lbn * biosize >= np->n_size) { + bcount = 0; + } else if ((off_t)(lbn + 1) * biosize > np->n_size) { + bcount = np->n_size - (off_t)lbn * biosize; + } + if (bcount != biosize && nfs_rslock(np, p) == ENOLCK) + goto again; + + bp = nfs_getcacheblk(vp, lbn, bcount, p); + + if (bcount != biosize) + nfs_rsunlock(np, p); + if (!bp) + return (EINTR); + + /* + * If B_CACHE is not set, we must issue the read. If this + * fails, we return an error. + */ + + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); + error = nfs_doio(bp, cred, p); + if (error) { + brelse(bp); + return (error); + } + } + + /* + * on is the offset into the current bp. Figure out how many + * bytes we can copy out of the bp. Note that bcount is + * NOT DEV_BSIZE aligned. + * + * Then figure out how many bytes we can copy into the uio. + */ + + n = 0; + if (on < bcount) + n = min((unsigned)(bcount - on), uio->uio_resid); + break; + case VLNK: + nfsstats.biocache_readlinks++; + bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); + if (!bp) + return (EINTR); + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); + error = nfs_doio(bp, cred, p); + if (error) { + bp->b_flags |= B_ERROR; + brelse(bp); + return (error); + } + } + n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); + on = 0; + break; + case VDIR: + nfsstats.biocache_readdirs++; + if (np->n_direofoffset + && uio->uio_offset >= np->n_direofoffset) { + return (0); + } + lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ; + on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); + bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); + if (!bp) + return (EINTR); + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); + error = nfs_doio(bp, cred, p); + if (error) { + brelse(bp); + } + while (error == NFSERR_BAD_COOKIE) { + printf("got bad cookie vp %p bp %p\n", vp, bp); + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, 0, cred, p, 1); + /* + * Yuck! The directory has been modified on the + * server. The only way to get the block is by + * reading from the beginning to get all the + * offset cookies. + * + * Leave the last bp intact unless there is an error. + * Loop back up to the while if the error is another + * NFSERR_BAD_COOKIE (double yuch!). + */ + for (i = 0; i <= lbn && !error; i++) { + if (np->n_direofoffset + && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) + return (0); + bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p); + if (!bp) + return (EINTR); + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); + error = nfs_doio(bp, cred, p); + /* + * no error + B_INVAL == directory EOF, + * use the block. + */ + if (error == 0 && (bp->b_flags & B_INVAL)) + break; + } + /* + * An error will throw away the block and the + * for loop will break out. If no error and this + * is not the block we want, we throw away the + * block and go for the next one via the for loop. + */ + if (error || i < lbn) + brelse(bp); + } + } + /* + * The above while is repeated if we hit another cookie + * error. If we hit an error and it wasn't a cookie error, + * we give up. + */ + if (error) + return (error); + } + + /* + * If not eof and read aheads are enabled, start one. + * (You need the current block first, so that you have the + * directory offset cookie of the next block.) + */ + if (nfs_numasync > 0 && nmp->nm_readahead > 0 && + (bp->b_flags & B_INVAL) == 0 && + (np->n_direofoffset == 0 || + (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && + !(np->n_flag & NQNFSNONCACHE) && + !incore(vp, lbn + 1)) { + rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p); + if (rabp) { + if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { + rabp->b_flags |= (B_READ | B_ASYNC); + vfs_busy_pages(rabp, 0); + if (nfs_asyncio(rabp, cred, p)) { + rabp->b_flags |= B_INVAL|B_ERROR; + vfs_unbusy_pages(rabp); + brelse(rabp); + } + } else { + brelse(rabp); + } + } + } + /* + * Unlike VREG files, whos buffer size ( bp->b_bcount ) is + * chopped for the EOF condition, we cannot tell how large + * NFS directories are going to be until we hit EOF. So + * an NFS directory buffer is *not* chopped to its EOF. Now, + * it just so happens that b_resid will effectively chop it + * to EOF. *BUT* this information is lost if the buffer goes + * away and is reconstituted into a B_CACHE state ( due to + * being VMIO ) later. So we keep track of the directory eof + * in np->n_direofoffset and chop it off as an extra step + * right here. + */ + n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); + if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset) + n = np->n_direofoffset - uio->uio_offset; + break; + default: + printf(" nfs_bioread: type %x unexpected\n",vp->v_type); + break; + }; + + if (n > 0) { + error = uiomove(bp->b_data + on, (int)n, uio); + } + switch (vp->v_type) { + case VREG: + break; + case VLNK: + n = 0; + break; + case VDIR: + /* + * Invalidate buffer if caching is disabled, forcing a + * re-read from the remote later. + */ + if (np->n_flag & NQNFSNONCACHE) + bp->b_flags |= B_INVAL; + break; + default: + printf(" nfs_bioread: type %x unexpected\n",vp->v_type); + } + brelse(bp); + } while (error == 0 && uio->uio_resid > 0 && n > 0); + return (error); +} + +/* + * Vnode op for write using bio + */ +int +nfs_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + int biosize; + struct uio *uio = ap->a_uio; + struct proc *p = uio->uio_procp; + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct ucred *cred = ap->a_cred; + int ioflag = ap->a_ioflag; + struct buf *bp; + struct vattr vattr; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + daddr_t lbn; + int bcount; + int n, on, error = 0, iomode, must_commit; + int haverslock = 0; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("nfs_write mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("nfs_write proc"); +#endif + if (vp->v_type != VREG) + return (EIO); + if (np->n_flag & NWRITEERR) { + np->n_flag &= ~NWRITEERR; + return (np->n_error); + } + if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (void)nfs_fsinfo(nmp, vp, cred, p); + + /* + * Synchronously flush pending buffers if we are in synchronous + * mode or if we are appending. + */ + if (ioflag & (IO_APPEND | IO_SYNC)) { + if (np->n_flag & NMODIFIED) { + np->n_attrstamp = 0; + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + return (error); + } + } + + /* + * If IO_APPEND then load uio_offset. We restart here if we cannot + * get the append lock. + */ +restart: + if (ioflag & IO_APPEND) { + np->n_attrstamp = 0; + error = VOP_GETATTR(vp, &vattr, cred, p); + if (error) + return (error); + uio->uio_offset = np->n_size; + } + + if (uio->uio_offset < 0) + return (EINVAL); + if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) + return (EFBIG); + if (uio->uio_resid == 0) + return (0); + + /* + * We need to obtain the rslock if we intend to modify np->n_size + * in order to guarentee the append point with multiple contending + * writers, to guarentee that no other appenders modify n_size + * while we are trying to obtain a truncated buffer (i.e. to avoid + * accidently truncating data written by another appender due to + * the race), and to ensure that the buffer is populated prior to + * our extending of the file. We hold rslock through the entire + * operation. + * + * Note that we do not synchronize the case where someone truncates + * the file while we are appending to it because attempting to lock + * this case may deadlock other parts of the system unexpectedly. + */ + if ((ioflag & IO_APPEND) || + uio->uio_offset + uio->uio_resid > np->n_size) { + if (nfs_rslock(np, p) == ENOLCK) + goto restart; + haverslock = 1; + } + + /* + * Maybe this should be above the vnode op call, but so long as + * file servers have no limits, i don't think it matters + */ + if (p && uio->uio_offset + uio->uio_resid > + p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { + psignal(p, SIGXFSZ); + if (haverslock) + nfs_rsunlock(np, p); + return (EFBIG); + } + + biosize = vp->v_mount->mnt_stat.f_iosize; + + do { + /* + * Check for a valid write lease. + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKINVALID(vp, np, ND_WRITE)) { + do { + error = nqnfs_getlease(vp, ND_WRITE, cred, p); + } while (error == NQNFS_EXPIRED); + if (error) + break; + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE)) { + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + break; + np->n_brev = np->n_lrev; + } + } + if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { + iomode = NFSV3WRITE_FILESYNC; + error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); + if (must_commit) + nfs_clearcommit(vp->v_mount); + break; + } + nfsstats.biocache_writes++; + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize-1); + n = min((unsigned)(biosize - on), uio->uio_resid); +again: + /* + * Handle direct append and file extension cases, calculate + * unaligned buffer size. + */ + + if (uio->uio_offset == np->n_size && n) { + /* + * Get the buffer (in its pre-append state to maintain + * B_CACHE if it was previously set). Resize the + * nfsnode after we have locked the buffer to prevent + * readers from reading garbage. + */ + bcount = on; + bp = nfs_getcacheblk(vp, lbn, bcount, p); + + if (bp != NULL) { + long save; + + np->n_size = uio->uio_offset + n; + np->n_flag |= NMODIFIED; + vnode_pager_setsize(vp, np->n_size); + + save = bp->b_flags & B_CACHE; + bcount += n; + allocbuf(bp, bcount); + bp->b_flags |= save; + } + } else { + /* + * Obtain the locked cache block first, and then + * adjust the file's size as appropriate. + */ + bcount = on + n; + if ((off_t)lbn * biosize + bcount < np->n_size) { + if ((off_t)(lbn + 1) * biosize < np->n_size) + bcount = biosize; + else + bcount = np->n_size - (off_t)lbn * biosize; + } + + bp = nfs_getcacheblk(vp, lbn, bcount, p); + + if (uio->uio_offset + n > np->n_size) { + np->n_size = uio->uio_offset + n; + np->n_flag |= NMODIFIED; + vnode_pager_setsize(vp, np->n_size); + } + } + + if (!bp) { + error = EINTR; + break; + } + + /* + * Issue a READ if B_CACHE is not set. In special-append + * mode, B_CACHE is based on the buffer prior to the write + * op and is typically set, avoiding the read. If a read + * is required in special append mode, the server will + * probably send us a short-read since we extended the file + * on our end, resulting in b_resid == 0 and, thusly, + * B_CACHE getting set. + * + * We can also avoid issuing the read if the write covers + * the entire buffer. We have to make sure the buffer state + * is reasonable in this case since we will not be initiating + * I/O. See the comments in kern/vfs_bio.c's getblk() for + * more information. + * + * B_CACHE may also be set due to the buffer being cached + * normally. + */ + + if (on == 0 && n == bcount) { + bp->b_flags |= B_CACHE; + bp->b_flags &= ~(B_ERROR | B_INVAL); + } + + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); + error = nfs_doio(bp, cred, p); + if (error) { + brelse(bp); + break; + } + } + if (!bp) { + error = EINTR; + break; + } + if (bp->b_wcred == NOCRED) { + crhold(cred); + bp->b_wcred = cred; + } + np->n_flag |= NMODIFIED; + + /* + * If dirtyend exceeds file size, chop it down. This should + * not normally occur but there is an append race where it + * might occur XXX, so we log it. + * + * If the chopping creates a reverse-indexed or degenerate + * situation with dirtyoff/end, we 0 both of them. + */ + + if (bp->b_dirtyend > bcount) { + printf("NFS append race @%lx:%d\n", + (long)bp->b_blkno * DEV_BSIZE, + bp->b_dirtyend - bcount); + bp->b_dirtyend = bcount; + } + + if (bp->b_dirtyoff >= bp->b_dirtyend) + bp->b_dirtyoff = bp->b_dirtyend = 0; + + /* + * If the new write will leave a contiguous dirty + * area, just update the b_dirtyoff and b_dirtyend, + * otherwise force a write rpc of the old dirty area. + * + * While it is possible to merge discontiguous writes due to + * our having a B_CACHE buffer ( and thus valid read data + * for the hole), we don't because it could lead to + * significant cache coherency problems with multiple clients, + * especially if locking is implemented later on. + * + * as an optimization we could theoretically maintain + * a linked list of discontinuous areas, but we would still + * have to commit them separately so there isn't much + * advantage to it except perhaps a bit of asynchronization. + */ + + if (bp->b_dirtyend > 0 && + (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { + if (VOP_BWRITE(bp->b_vp, bp) == EINTR) + return (EINTR); + goto again; + } + + /* + * Check for valid write lease and get one as required. + * In case getblk() and/or bwrite() delayed us. + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKINVALID(vp, np, ND_WRITE)) { + do { + error = nqnfs_getlease(vp, ND_WRITE, cred, p); + } while (error == NQNFS_EXPIRED); + if (error) { + brelse(bp); + break; + } + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE)) { + brelse(bp); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + break; + np->n_brev = np->n_lrev; + goto again; + } + } + + error = uiomove((char *)bp->b_data + on, n, uio); + + /* + * Since this block is being modified, it must be written + * again and not just committed. Since write clustering does + * not work for the stage 1 data write, only the stage 2 + * commit rpc, we have to clear B_CLUSTEROK as well. + */ + bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + + if (error) { + bp->b_flags |= B_ERROR; + brelse(bp); + break; + } + + /* + * Only update dirtyoff/dirtyend if not a degenerate + * condition. + */ + if (n) { + if (bp->b_dirtyend > 0) { + bp->b_dirtyoff = min(on, bp->b_dirtyoff); + bp->b_dirtyend = max((on + n), bp->b_dirtyend); + } else { + bp->b_dirtyoff = on; + bp->b_dirtyend = on + n; + } + vfs_bio_set_validclean(bp, on, n); + } + + /* + * If the lease is non-cachable or IO_SYNC do bwrite(). + * + * IO_INVAL appears to be unused. The idea appears to be + * to turn off caching in this case. Very odd. XXX + */ + if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { + if (ioflag & IO_INVAL) + bp->b_flags |= B_NOCACHE; + error = VOP_BWRITE(bp->b_vp, bp); + if (error) + break; + if (np->n_flag & NQNFSNONCACHE) { + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); + if (error) + break; + } + } else if ((n + on) == biosize && + (nmp->nm_flag & NFSMNT_NQNFS) == 0) { + bp->b_flags |= B_ASYNC; + (void)nfs_writebp(bp, 0, 0); + } else { + bdwrite(bp); + } + } while (uio->uio_resid > 0 && n > 0); + + if (haverslock) + nfs_rsunlock(np, p); + + return (error); +} + +/* + * Get an nfs cache block. + * + * Allocate a new one if the block isn't currently in the cache + * and return the block marked busy. If the calling process is + * interrupted by a signal for an interruptible mount point, return + * NULL. + * + * The caller must carefully deal with the possible B_INVAL state of + * the buffer. nfs_doio() clears B_INVAL (and nfs_asyncio() clears it + * indirectly), so synchronous reads can be issued without worrying about + * the B_INVAL state. We have to be a little more careful when dealing + * with writes (see comments in nfs_write()) when extending a file past + * its EOF. + */ +static struct buf * +nfs_getcacheblk(vp, bn, size, p) + struct vnode *vp; + daddr_t bn; + int size; + struct proc *p; +{ + register struct buf *bp; + struct mount *mp; + struct nfsmount *nmp; + + mp = vp->v_mount; + nmp = VFSTONFS(mp); + + if (nmp->nm_flag & NFSMNT_INT) { + bp = getblk(vp, bn, size, PCATCH, 0); + while (bp == (struct buf *)0) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) + return ((struct buf *)0); + bp = getblk(vp, bn, size, 0, 2 * hz); + } + } else { + bp = getblk(vp, bn, size, 0, 0); + } + + if (vp->v_type == VREG) { + int biosize; + + biosize = mp->mnt_stat.f_iosize; + bp->b_blkno = bn * (biosize / DEV_BSIZE); + } + return (bp); +} + +/* + * Flush and invalidate all dirty buffers. If another process is already + * doing the flush, just wait for completion. + */ +int +nfs_vinvalbuf(vp, flags, cred, p, intrflg) + struct vnode *vp; + int flags; + struct ucred *cred; + struct proc *p; + int intrflg; +{ + register struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + int error = 0, slpflag, slptimeo; + + if (vp->v_flag & VXLOCK) { + return (0); + } + + if ((nmp->nm_flag & NFSMNT_INT) == 0) + intrflg = 0; + if (intrflg) { + slpflag = PCATCH; + slptimeo = 2 * hz; + } else { + slpflag = 0; + slptimeo = 0; + } + /* + * First wait for any other process doing a flush to complete. + */ + while (np->n_flag & NFLUSHINPROG) { + np->n_flag |= NFLUSHWANT; + error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", + slptimeo); + if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) + return (EINTR); + } + + /* + * Now, flush as required. + */ + np->n_flag |= NFLUSHINPROG; + error = vinvalbuf(vp, flags, cred, p, slpflag, 0); + while (error) { + if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { + np->n_flag &= ~NFLUSHINPROG; + if (np->n_flag & NFLUSHWANT) { + np->n_flag &= ~NFLUSHWANT; + wakeup((caddr_t)&np->n_flag); + } + return (EINTR); + } + error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); + } + np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); + if (np->n_flag & NFLUSHWANT) { + np->n_flag &= ~NFLUSHWANT; + wakeup((caddr_t)&np->n_flag); + } + return (0); +} + +/* + * Initiate asynchronous I/O. Return an error if no nfsiods are available. + * This is mainly to avoid queueing async I/O requests when the nfsiods + * are all hung on a dead server. + * + * Note: nfs_asyncio() does not clear (B_ERROR|B_INVAL) but when the bp + * is eventually dequeued by the async daemon, nfs_doio() *will*. + */ +int +nfs_asyncio(bp, cred, procp) + register struct buf *bp; + struct ucred *cred; + struct proc *procp; +{ + struct nfsmount *nmp; + int i; + int gotiod; + int slpflag = 0; + int slptimeo = 0; + int error; + + if (nfs_numasync == 0) + return (EIO); + + nmp = VFSTONFS(bp->b_vp->v_mount); +again: + if (nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; + gotiod = FALSE; + + /* + * Find a free iod to process this request. + */ + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) + if (nfs_iodwant[i]) { + /* + * Found one, so wake it up and tell it which + * mount to process. + */ + NFS_DPF(ASYNCIO, + ("nfs_asyncio: waking iod %d for mount %p\n", + i, nmp)); + nfs_iodwant[i] = (struct proc *)0; + nfs_iodmount[i] = nmp; + nmp->nm_bufqiods++; + wakeup((caddr_t)&nfs_iodwant[i]); + gotiod = TRUE; + break; + } + + /* + * If none are free, we may already have an iod working on this mount + * point. If so, it will process our request. + */ + if (!gotiod) { + if (nmp->nm_bufqiods > 0) { + NFS_DPF(ASYNCIO, + ("nfs_asyncio: %d iods are already processing mount %p\n", + nmp->nm_bufqiods, nmp)); + gotiod = TRUE; + } + } + + /* + * If we have an iod which can process the request, then queue + * the buffer. + */ + if (gotiod) { + /* + * Ensure that the queue never grows too large. + */ + while (nmp->nm_bufqlen >= 2*nfs_numasync) { + NFS_DPF(ASYNCIO, + ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); + nmp->nm_bufqwant = TRUE; + error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, + "nfsaio", slptimeo); + if (error) { + if (nfs_sigintr(nmp, NULL, procp)) + return (EINTR); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + /* + * We might have lost our iod while sleeping, + * so check and loop if nescessary. + */ + if (nmp->nm_bufqiods == 0) { + NFS_DPF(ASYNCIO, + ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); + goto again; + } + } + + if (bp->b_flags & B_READ) { + if (bp->b_rcred == NOCRED && cred != NOCRED) { + crhold(cred); + bp->b_rcred = cred; + } + } else { + bp->b_flags |= B_WRITEINPROG; + if (bp->b_wcred == NOCRED && cred != NOCRED) { + crhold(cred); + bp->b_wcred = cred; + } + } + + BUF_KERNPROC(bp); + TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); + nmp->nm_bufqlen++; + return (0); + } + + /* + * All the iods are busy on other mounts, so return EIO to + * force the caller to process the i/o synchronously. + */ + NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n")); + return (EIO); +} + +/* + * Do an I/O operation to/from a cache block. This may be called + * synchronously or from an nfsiod. + */ +int +nfs_doio(bp, cr, p) + struct buf *bp; + struct ucred *cr; + struct proc *p; +{ + struct uio *uiop; + struct vnode *vp; + struct nfsnode *np; + struct nfsmount *nmp; + int error = 0, iomode, must_commit = 0; + struct uio uio; + struct iovec io; + + vp = bp->b_vp; + np = VTONFS(vp); + nmp = VFSTONFS(vp->v_mount); + uiop = &uio; + uiop->uio_iov = &io; + uiop->uio_iovcnt = 1; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_procp = p; + + /* + * clear B_ERROR and B_INVAL state prior to initiating the I/O. We + * do this here so we do not have to do it in all the code that + * calls us. + */ + bp->b_flags &= ~(B_ERROR | B_INVAL); + + KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp)); + + /* + * Historically, paging was done with physio, but no more. + */ + if (bp->b_flags & B_PHYS) { + /* + * ...though reading /dev/drum still gets us here. + */ + io.iov_len = uiop->uio_resid = bp->b_bcount; + /* mapping was done by vmapbuf() */ + io.iov_base = bp->b_data; + uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; + if (bp->b_flags & B_READ) { + uiop->uio_rw = UIO_READ; + nfsstats.read_physios++; + error = nfs_readrpc(vp, uiop, cr); + } else { + int com; + + iomode = NFSV3WRITE_DATASYNC; + uiop->uio_rw = UIO_WRITE; + nfsstats.write_physios++; + error = nfs_writerpc(vp, uiop, cr, &iomode, &com); + } + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = error; + } + } else if (bp->b_flags & B_READ) { + io.iov_len = uiop->uio_resid = bp->b_bcount; + io.iov_base = bp->b_data; + uiop->uio_rw = UIO_READ; + switch (vp->v_type) { + case VREG: + uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; + nfsstats.read_bios++; + error = nfs_readrpc(vp, uiop, cr); + if (!error) { + if (uiop->uio_resid) { + /* + * If we had a short read with no error, we must have + * hit a file hole. We should zero-fill the remainder. + * This can also occur if the server hits the file EOF. + * + * Holes used to be able to occur due to pending + * writes, but that is not possible any longer. + */ + int nread = bp->b_bcount - uiop->uio_resid; + int left = bp->b_bcount - nread; + + if (left > 0) + bzero((char *)bp->b_data + nread, left); + uiop->uio_resid = 0; + } + } + if (p && (vp->v_flag & VTEXT) && + (((nmp->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKINVALID(vp, np, ND_READ) && + np->n_lrev != np->n_brev) || + (!(nmp->nm_flag & NFSMNT_NQNFS) && + np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { + uprintf("Process killed due to text file modification\n"); + psignal(p, SIGKILL); + PHOLD(p); + } + break; + case VLNK: + uiop->uio_offset = (off_t)0; + nfsstats.readlink_bios++; + error = nfs_readlinkrpc(vp, uiop, cr); + break; + case VDIR: + nfsstats.readdir_bios++; + uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; + if (nmp->nm_flag & NFSMNT_RDIRPLUS) { + error = nfs_readdirplusrpc(vp, uiop, cr); + if (error == NFSERR_NOTSUPP) + nmp->nm_flag &= ~NFSMNT_RDIRPLUS; + } + if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) + error = nfs_readdirrpc(vp, uiop, cr); + /* + * end-of-directory sets B_INVAL but does not generate an + * error. + */ + if (error == 0 && uiop->uio_resid == bp->b_bcount) + bp->b_flags |= B_INVAL; + break; + default: + printf("nfs_doio: type %x unexpected\n",vp->v_type); + break; + }; + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = error; + } + } else { + /* + * If we only need to commit, try to commit + */ + if (bp->b_flags & B_NEEDCOMMIT) { + int retv; + off_t off; + + off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; + bp->b_flags |= B_WRITEINPROG; + retv = nfs_commit( + bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff, + bp->b_wcred, p); + bp->b_flags &= ~B_WRITEINPROG; + if (retv == 0) { + bp->b_dirtyoff = bp->b_dirtyend = 0; + bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + bp->b_resid = 0; + biodone(bp); + return (0); + } + if (retv == NFSERR_STALEWRITEVERF) { + nfs_clearcommit(bp->b_vp->v_mount); + } + } + + /* + * Setup for actual write + */ + + if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) + bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; + + if (bp->b_dirtyend > bp->b_dirtyoff) { + io.iov_len = uiop->uio_resid = bp->b_dirtyend + - bp->b_dirtyoff; + uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE + + bp->b_dirtyoff; + io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; + uiop->uio_rw = UIO_WRITE; + nfsstats.write_bios++; + + if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) + iomode = NFSV3WRITE_UNSTABLE; + else + iomode = NFSV3WRITE_FILESYNC; + + bp->b_flags |= B_WRITEINPROG; + error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); + + /* + * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try + * to cluster the buffers needing commit. This will allow + * the system to submit a single commit rpc for the whole + * cluster. We can do this even if the buffer is not 100% + * dirty (relative to the NFS blocksize), so we optimize the + * append-to-file-case. + * + * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be + * cleared because write clustering only works for commit + * rpc's, not for the data portion of the write). + */ + + if (!error && iomode == NFSV3WRITE_UNSTABLE) { + bp->b_flags |= B_NEEDCOMMIT; + if (bp->b_dirtyoff == 0 + && bp->b_dirtyend == bp->b_bcount) + bp->b_flags |= B_CLUSTEROK; + } else { + bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + } + bp->b_flags &= ~B_WRITEINPROG; + + /* + * For an interrupted write, the buffer is still valid + * and the write hasn't been pushed to the server yet, + * so we can't set B_ERROR and report the interruption + * by setting B_EINTR. For the B_ASYNC case, B_EINTR + * is not relevant, so the rpc attempt is essentially + * a noop. For the case of a V3 write rpc not being + * committed to stable storage, the block is still + * dirty and requires either a commit rpc or another + * write rpc with iomode == NFSV3WRITE_FILESYNC before + * the block is reused. This is indicated by setting + * the B_DELWRI and B_NEEDCOMMIT flags. + * + * If the buffer is marked B_PAGING, it does not reside on + * the vp's paging queues so we cannot call bdirty(). The + * bp in this case is not an NFS cache block so we should + * be safe. XXX + */ + if (error == EINTR + || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + int s; + + s = splbio(); + bp->b_flags &= ~(B_INVAL|B_NOCACHE); + if ((bp->b_flags & B_PAGING) == 0) { + bdirty(bp); + bp->b_flags &= ~B_DONE; + } + if (error && (bp->b_flags & B_ASYNC) == 0) + bp->b_flags |= B_EINTR; + splx(s); + } else { + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = np->n_error = error; + np->n_flag |= NWRITEERR; + } + bp->b_dirtyoff = bp->b_dirtyend = 0; + } + } else { + bp->b_resid = 0; + biodone(bp); + return (0); + } + } + bp->b_resid = uiop->uio_resid; + if (must_commit) + nfs_clearcommit(vp->v_mount); + biodone(bp); + return (error); +} diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c new file mode 100644 index 0000000..e63ba63 --- /dev/null +++ b/sys/nfs/nfs_common.c @@ -0,0 +1,2272 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 + * $FreeBSD$ + */ + +/* + * These functions support the macros and help fiddle mbuf chains for + * the nfs op functions. They do things like create the rpc header and + * copy data between mbuf chains and uio lists. + */ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/malloc.h> +#include <sys/sysent.h> +#include <sys/syscall.h> +#include <sys/conf.h> + +#include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/vm_zone.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsnode.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsmount.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsrtt.h> + +#include <netinet/in.h> +#ifdef ISO +#include <netiso/iso.h> +#endif + +/* + * Data items converted to xdr at startup, since they are constant + * This is kinda hokey, but may save a little time doing byte swaps + */ +u_int32_t nfs_xdrneg1; +u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, + rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, + rpc_auth_kerb; +u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false; + +/* And other global data */ +static u_int32_t nfs_xid = 0; +static enum vtype nv2tov_type[8]= { + VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON +}; +enum vtype nv3tov_type[8]= { + VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO +}; + +int nfs_ticks; +int nfs_pbuf_freecnt = -1; /* start out unlimited */ + +struct nfs_reqq nfs_reqq; +struct nfssvc_sockhead nfssvc_sockhead; +int nfssvc_sockhead_flag; +struct nfsd_head nfsd_head; +int nfsd_head_flag; +struct nfs_bufq nfs_bufq; +struct nqtimerhead nqtimerhead; +struct nqfhhashhead *nqfhhashtbl; +u_long nqfhhash; + +static void (*nfs_prev_lease_updatetime) __P((int)); +static int nfs_prev_nfssvc_sy_narg; +static sy_call_t *nfs_prev_nfssvc_sy_call; + +#ifndef NFS_NOSERVER + +static vop_t *nfs_prev_vop_lease_check; +static int nfs_prev_getfh_sy_narg; +static sy_call_t *nfs_prev_getfh_sy_call; + +/* + * Mapping of old NFS Version 2 RPC numbers to generic numbers. + */ +int nfsv3_procid[NFS_NPROCS] = { + NFSPROC_NULL, + NFSPROC_GETATTR, + NFSPROC_SETATTR, + NFSPROC_NOOP, + NFSPROC_LOOKUP, + NFSPROC_READLINK, + NFSPROC_READ, + NFSPROC_NOOP, + NFSPROC_WRITE, + NFSPROC_CREATE, + NFSPROC_REMOVE, + NFSPROC_RENAME, + NFSPROC_LINK, + NFSPROC_SYMLINK, + NFSPROC_MKDIR, + NFSPROC_RMDIR, + NFSPROC_READDIR, + NFSPROC_FSSTAT, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP +}; + +#endif /* NFS_NOSERVER */ +/* + * and the reverse mapping from generic to Version 2 procedure numbers + */ +int nfsv2_procid[NFS_NPROCS] = { + NFSV2PROC_NULL, + NFSV2PROC_GETATTR, + NFSV2PROC_SETATTR, + NFSV2PROC_LOOKUP, + NFSV2PROC_NOOP, + NFSV2PROC_READLINK, + NFSV2PROC_READ, + NFSV2PROC_WRITE, + NFSV2PROC_CREATE, + NFSV2PROC_MKDIR, + NFSV2PROC_SYMLINK, + NFSV2PROC_CREATE, + NFSV2PROC_REMOVE, + NFSV2PROC_RMDIR, + NFSV2PROC_RENAME, + NFSV2PROC_LINK, + NFSV2PROC_READDIR, + NFSV2PROC_NOOP, + NFSV2PROC_STATFS, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, +}; + +#ifndef NFS_NOSERVER +/* + * Maps errno values to nfs error numbers. + * Use NFSERR_IO as the catch all for ones not specifically defined in + * RFC 1094. + */ +static u_char nfsrv_v2errmap[ELAST] = { + NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, + NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, + NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO /* << Last is 86 */ +}; + +/* + * Maps errno values to nfs error numbers. + * Although it is not obvious whether or not NFS clients really care if + * a returned error value is in the specified list for the procedure, the + * safest thing to do is filter them appropriately. For Version 2, the + * X/Open XNFS document is the only specification that defines error values + * for each RPC (The RFC simply lists all possible error values for all RPCs), + * so I have decided to not do this for Version 2. + * The first entry is the default error return and the rest are the valid + * errors for that RPC in increasing numeric order. + */ +static short nfsv3err_null[] = { + 0, + 0, +}; + +static short nfsv3err_getattr[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_setattr[] = { + NFSERR_IO, + NFSERR_PERM, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOT_SYNC, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_lookup[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_NAMETOL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_access[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_readlink[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_read[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_NXIO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_write[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_FBIG, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_create[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_mkdir[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_symlink[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_mknod[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_BADTYPE, + 0, +}; + +static short nfsv3err_remove[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_rmdir[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_INVAL, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_NOTEMPTY, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_rename[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_XDEV, + NFSERR_NOTDIR, + NFSERR_ISDIR, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_MLINK, + NFSERR_NAMETOL, + NFSERR_NOTEMPTY, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_link[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_XDEV, + NFSERR_NOTDIR, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_MLINK, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_readdir[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_BAD_COOKIE, + NFSERR_TOOSMALL, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_readdirplus[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_BAD_COOKIE, + NFSERR_NOTSUPP, + NFSERR_TOOSMALL, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_fsstat[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_fsinfo[] = { + NFSERR_STALE, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_pathconf[] = { + NFSERR_STALE, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_commit[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short *nfsrv_v3errmap[] = { + nfsv3err_null, + nfsv3err_getattr, + nfsv3err_setattr, + nfsv3err_lookup, + nfsv3err_access, + nfsv3err_readlink, + nfsv3err_read, + nfsv3err_write, + nfsv3err_create, + nfsv3err_mkdir, + nfsv3err_symlink, + nfsv3err_mknod, + nfsv3err_remove, + nfsv3err_rmdir, + nfsv3err_rename, + nfsv3err_link, + nfsv3err_readdir, + nfsv3err_readdirplus, + nfsv3err_fsstat, + nfsv3err_fsinfo, + nfsv3err_pathconf, + nfsv3err_commit, +}; + +#endif /* NFS_NOSERVER */ + +extern struct nfsrtt nfsrtt; +extern time_t nqnfsstarttime; +extern int nqsrv_clockskew; +extern int nqsrv_writeslack; +extern int nqsrv_maxlease; +extern struct nfsstats nfsstats; +extern int nqnfs_piggy[NFS_NPROCS]; +extern nfstype nfsv2_type[9]; +extern nfstype nfsv3_type[9]; +extern struct nfsnodehashhead *nfsnodehashtbl; +extern u_long nfsnodehash; + +struct getfh_args; +extern int getfh(struct proc *, struct getfh_args *, int *); +struct nfssvc_args; +extern int nfssvc(struct proc *, struct nfssvc_args *, int *); + +LIST_HEAD(nfsnodehashhead, nfsnode); + +int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *)); + +u_quad_t +nfs_curusec() +{ + struct timeval tv; + + getmicrotime(&tv); + return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec); +} + +/* + * Create the header for an rpc request packet + * The hsiz is the size of the rest of the nfs request header. + * (just used to decide if a cluster is a good idea) + */ +struct mbuf * +nfsm_reqh(vp, procid, hsiz, bposp) + struct vnode *vp; + u_long procid; + int hsiz; + caddr_t *bposp; +{ + register struct mbuf *mb; + register u_int32_t *tl; + register caddr_t bpos; + struct mbuf *mb2; + struct nfsmount *nmp; + int nqflag; + + MGET(mb, M_WAIT, MT_DATA); + if (hsiz >= MINCLSIZE) + MCLGET(mb, M_WAIT); + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + + /* + * For NQNFS, add lease request. + */ + if (vp) { + nmp = VFSTONFS(vp->v_mount); + if (nmp->nm_flag & NFSMNT_NQNFS) { + nqflag = NQNFS_NEEDLEASE(vp, procid); + if (nqflag) { + nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nqflag); + *tl = txdr_unsigned(nmp->nm_leaseterm); + } else { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = 0; + } + } + } + /* Finally, return values */ + *bposp = bpos; + return (mb); +} + +/* + * Build the RPC header and fill in the authorization info. + * The authorization string argument is only used when the credentials + * come from outside of the kernel. + * Returns the head of the mbuf list. + */ +struct mbuf * +nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, + verf_str, mrest, mrest_len, mbp, xidp) + register struct ucred *cr; + int nmflag; + int procid; + int auth_type; + int auth_len; + char *auth_str; + int verf_len; + char *verf_str; + struct mbuf *mrest; + int mrest_len; + struct mbuf **mbp; + u_int32_t *xidp; +{ + register struct mbuf *mb; + register u_int32_t *tl; + register caddr_t bpos; + register int i; + struct mbuf *mreq, *mb2; + int siz, grpsiz, authsiz; + + authsiz = nfsm_rndup(auth_len); + MGETHDR(mb, M_WAIT, MT_DATA); + if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { + MCLGET(mb, M_WAIT); + } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { + MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); + } else { + MH_ALIGN(mb, 8 * NFSX_UNSIGNED); + } + mb->m_len = 0; + mreq = mb; + bpos = mtod(mb, caddr_t); + + /* + * First the RPC header. + */ + nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED); + + /* Get a pretty random xid to start with */ + if (!nfs_xid) + nfs_xid = random(); + /* + * Skip zero xid if it should ever happen. + */ + if (++nfs_xid == 0) + nfs_xid++; + + *tl++ = *xidp = txdr_unsigned(nfs_xid); + *tl++ = rpc_call; + *tl++ = rpc_vers; + if (nmflag & NFSMNT_NQNFS) { + *tl++ = txdr_unsigned(NQNFS_PROG); + *tl++ = txdr_unsigned(NQNFS_VER3); + } else { + *tl++ = txdr_unsigned(NFS_PROG); + if (nmflag & NFSMNT_NFSV3) + *tl++ = txdr_unsigned(NFS_VER3); + else + *tl++ = txdr_unsigned(NFS_VER2); + } + if (nmflag & NFSMNT_NFSV3) + *tl++ = txdr_unsigned(procid); + else + *tl++ = txdr_unsigned(nfsv2_procid[procid]); + + /* + * And then the authorization cred. + */ + *tl++ = txdr_unsigned(auth_type); + *tl = txdr_unsigned(authsiz); + switch (auth_type) { + case RPCAUTH_UNIX: + nfsm_build(tl, u_int32_t *, auth_len); + *tl++ = 0; /* stamp ?? */ + *tl++ = 0; /* NULL hostname */ + *tl++ = txdr_unsigned(cr->cr_uid); + *tl++ = txdr_unsigned(cr->cr_groups[0]); + grpsiz = (auth_len >> 2) - 5; + *tl++ = txdr_unsigned(grpsiz); + for (i = 1; i <= grpsiz; i++) + *tl++ = txdr_unsigned(cr->cr_groups[i]); + break; + case RPCAUTH_KERB4: + siz = auth_len; + while (siz > 0) { + if (M_TRAILINGSPACE(mb) == 0) { + MGET(mb2, M_WAIT, MT_DATA); + if (siz >= MINCLSIZE) + MCLGET(mb2, M_WAIT); + mb->m_next = mb2; + mb = mb2; + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + } + i = min(siz, M_TRAILINGSPACE(mb)); + bcopy(auth_str, bpos, i); + mb->m_len += i; + auth_str += i; + bpos += i; + siz -= i; + } + if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { + for (i = 0; i < siz; i++) + *bpos++ = '\0'; + mb->m_len += siz; + } + break; + }; + + /* + * And the verifier... + */ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if (verf_str) { + *tl++ = txdr_unsigned(RPCAUTH_KERB4); + *tl = txdr_unsigned(verf_len); + siz = verf_len; + while (siz > 0) { + if (M_TRAILINGSPACE(mb) == 0) { + MGET(mb2, M_WAIT, MT_DATA); + if (siz >= MINCLSIZE) + MCLGET(mb2, M_WAIT); + mb->m_next = mb2; + mb = mb2; + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + } + i = min(siz, M_TRAILINGSPACE(mb)); + bcopy(verf_str, bpos, i); + mb->m_len += i; + verf_str += i; + bpos += i; + siz -= i; + } + if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { + for (i = 0; i < siz; i++) + *bpos++ = '\0'; + mb->m_len += siz; + } + } else { + *tl++ = txdr_unsigned(RPCAUTH_NULL); + *tl = 0; + } + mb->m_next = mrest; + mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; + mreq->m_pkthdr.rcvif = (struct ifnet *)0; + *mbp = mb; + return (mreq); +} + +/* + * copies mbuf chain to the uio scatter/gather list + */ +int +nfsm_mbuftouio(mrep, uiop, siz, dpos) + struct mbuf **mrep; + register struct uio *uiop; + int siz; + caddr_t *dpos; +{ + register char *mbufcp, *uiocp; + register int xfer, left, len; + register struct mbuf *mp; + long uiosiz, rem; + int error = 0; + + mp = *mrep; + mbufcp = *dpos; + len = mtod(mp, caddr_t)+mp->m_len-mbufcp; + rem = nfsm_rndup(siz)-siz; + while (siz > 0) { + if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) + return (EFBIG); + left = uiop->uio_iov->iov_len; + uiocp = uiop->uio_iov->iov_base; + if (left > siz) + left = siz; + uiosiz = left; + while (left > 0) { + while (len == 0) { + mp = mp->m_next; + if (mp == NULL) + return (EBADRPC); + mbufcp = mtod(mp, caddr_t); + len = mp->m_len; + } + xfer = (left > len) ? len : left; +#ifdef notdef + /* Not Yet.. */ + if (uiop->uio_iov->iov_op != NULL) + (*(uiop->uio_iov->iov_op)) + (mbufcp, uiocp, xfer); + else +#endif + if (uiop->uio_segflg == UIO_SYSSPACE) + bcopy(mbufcp, uiocp, xfer); + else + copyout(mbufcp, uiocp, xfer); + left -= xfer; + len -= xfer; + mbufcp += xfer; + uiocp += xfer; + uiop->uio_offset += xfer; + uiop->uio_resid -= xfer; + } + if (uiop->uio_iov->iov_len <= siz) { + uiop->uio_iovcnt--; + uiop->uio_iov++; + } else { + uiop->uio_iov->iov_base += uiosiz; + uiop->uio_iov->iov_len -= uiosiz; + } + siz -= uiosiz; + } + *dpos = mbufcp; + *mrep = mp; + if (rem > 0) { + if (len < rem) + error = nfs_adv(mrep, dpos, rem, len); + else + *dpos += rem; + } + return (error); +} + +/* + * copies a uio scatter/gather list to an mbuf chain. + * NOTE: can ony handle iovcnt == 1 + */ +int +nfsm_uiotombuf(uiop, mq, siz, bpos) + register struct uio *uiop; + struct mbuf **mq; + int siz; + caddr_t *bpos; +{ + register char *uiocp; + register struct mbuf *mp, *mp2; + register int xfer, left, mlen; + int uiosiz, clflg, rem; + char *cp; + +#ifdef DIAGNOSTIC + if (uiop->uio_iovcnt != 1) + panic("nfsm_uiotombuf: iovcnt != 1"); +#endif + + if (siz > MLEN) /* or should it >= MCLBYTES ?? */ + clflg = 1; + else + clflg = 0; + rem = nfsm_rndup(siz)-siz; + mp = mp2 = *mq; + while (siz > 0) { + left = uiop->uio_iov->iov_len; + uiocp = uiop->uio_iov->iov_base; + if (left > siz) + left = siz; + uiosiz = left; + while (left > 0) { + mlen = M_TRAILINGSPACE(mp); + if (mlen == 0) { + MGET(mp, M_WAIT, MT_DATA); + if (clflg) + MCLGET(mp, M_WAIT); + mp->m_len = 0; + mp2->m_next = mp; + mp2 = mp; + mlen = M_TRAILINGSPACE(mp); + } + xfer = (left > mlen) ? mlen : left; +#ifdef notdef + /* Not Yet.. */ + if (uiop->uio_iov->iov_op != NULL) + (*(uiop->uio_iov->iov_op)) + (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + else +#endif + if (uiop->uio_segflg == UIO_SYSSPACE) + bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + else + copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + mp->m_len += xfer; + left -= xfer; + uiocp += xfer; + uiop->uio_offset += xfer; + uiop->uio_resid -= xfer; + } + uiop->uio_iov->iov_base += uiosiz; + uiop->uio_iov->iov_len -= uiosiz; + siz -= uiosiz; + } + if (rem > 0) { + if (rem > M_TRAILINGSPACE(mp)) { + MGET(mp, M_WAIT, MT_DATA); + mp->m_len = 0; + mp2->m_next = mp; + } + cp = mtod(mp, caddr_t)+mp->m_len; + for (left = 0; left < rem; left++) + *cp++ = '\0'; + mp->m_len += rem; + *bpos = cp; + } else + *bpos = mtod(mp, caddr_t)+mp->m_len; + *mq = mp; + return (0); +} + +/* + * Help break down an mbuf chain by setting the first siz bytes contiguous + * pointed to by returned val. + * This is used by the macros nfsm_dissect and nfsm_dissecton for tough + * cases. (The macros use the vars. dpos and dpos2) + */ +int +nfsm_disct(mdp, dposp, siz, left, cp2) + struct mbuf **mdp; + caddr_t *dposp; + int siz; + int left; + caddr_t *cp2; +{ + register struct mbuf *mp, *mp2; + register int siz2, xfer; + register caddr_t p; + + mp = *mdp; + while (left == 0) { + *mdp = mp = mp->m_next; + if (mp == NULL) + return (EBADRPC); + left = mp->m_len; + *dposp = mtod(mp, caddr_t); + } + if (left >= siz) { + *cp2 = *dposp; + *dposp += siz; + } else if (mp->m_next == NULL) { + return (EBADRPC); + } else if (siz > MHLEN) { + panic("nfs S too big"); + } else { + MGET(mp2, M_WAIT, MT_DATA); + mp2->m_next = mp->m_next; + mp->m_next = mp2; + mp->m_len -= left; + mp = mp2; + *cp2 = p = mtod(mp, caddr_t); + bcopy(*dposp, p, left); /* Copy what was left */ + siz2 = siz-left; + p += left; + mp2 = mp->m_next; + /* Loop around copying up the siz2 bytes */ + while (siz2 > 0) { + if (mp2 == NULL) + return (EBADRPC); + xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; + if (xfer > 0) { + bcopy(mtod(mp2, caddr_t), p, xfer); + NFSMADV(mp2, xfer); + mp2->m_len -= xfer; + p += xfer; + siz2 -= xfer; + } + if (siz2 > 0) + mp2 = mp2->m_next; + } + mp->m_len = siz; + *mdp = mp2; + *dposp = mtod(mp2, caddr_t); + } + return (0); +} + +/* + * Advance the position in the mbuf chain. + */ +int +nfs_adv(mdp, dposp, offs, left) + struct mbuf **mdp; + caddr_t *dposp; + int offs; + int left; +{ + register struct mbuf *m; + register int s; + + m = *mdp; + s = left; + while (s < offs) { + offs -= s; + m = m->m_next; + if (m == NULL) + return (EBADRPC); + s = m->m_len; + } + *mdp = m; + *dposp = mtod(m, caddr_t)+offs; + return (0); +} + +/* + * Copy a string into mbufs for the hard cases... + */ +int +nfsm_strtmbuf(mb, bpos, cp, siz) + struct mbuf **mb; + char **bpos; + const char *cp; + long siz; +{ + register struct mbuf *m1 = NULL, *m2; + long left, xfer, len, tlen; + u_int32_t *tl; + int putsize; + + putsize = 1; + m2 = *mb; + left = M_TRAILINGSPACE(m2); + if (left > 0) { + tl = ((u_int32_t *)(*bpos)); + *tl++ = txdr_unsigned(siz); + putsize = 0; + left -= NFSX_UNSIGNED; + m2->m_len += NFSX_UNSIGNED; + if (left > 0) { + bcopy(cp, (caddr_t) tl, left); + siz -= left; + cp += left; + m2->m_len += left; + left = 0; + } + } + /* Loop around adding mbufs */ + while (siz > 0) { + MGET(m1, M_WAIT, MT_DATA); + if (siz > MLEN) + MCLGET(m1, M_WAIT); + m1->m_len = NFSMSIZ(m1); + m2->m_next = m1; + m2 = m1; + tl = mtod(m1, u_int32_t *); + tlen = 0; + if (putsize) { + *tl++ = txdr_unsigned(siz); + m1->m_len -= NFSX_UNSIGNED; + tlen = NFSX_UNSIGNED; + putsize = 0; + } + if (siz < m1->m_len) { + len = nfsm_rndup(siz); + xfer = siz; + if (xfer < len) + *(tl+(xfer>>2)) = 0; + } else { + xfer = len = m1->m_len; + } + bcopy(cp, (caddr_t) tl, xfer); + m1->m_len = len+tlen; + siz -= xfer; + cp += xfer; + } + *mb = m1; + *bpos = mtod(m1, caddr_t)+m1->m_len; + return (0); +} + +/* + * Called once to initialize data structures... + */ +int +nfs_init(vfsp) + struct vfsconf *vfsp; +{ + register int i; + + nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1); + + /* + * Check to see if major data structures haven't bloated. + */ + if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) { + printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC); + printf("Try reducing NFS_UIDHASHSIZ\n"); + } + if (sizeof (struct nfsuid) > NFS_UIDALLOC) { + printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); + printf("Try unionizing the nu_nickname and nu_flag fields\n"); + } + nfs_mount_type = vfsp->vfc_typenum; + nfsrtt.pos = 0; + rpc_vers = txdr_unsigned(RPC_VER2); + rpc_call = txdr_unsigned(RPC_CALL); + rpc_reply = txdr_unsigned(RPC_REPLY); + rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); + rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); + rpc_mismatch = txdr_unsigned(RPC_MISMATCH); + rpc_autherr = txdr_unsigned(RPC_AUTHERR); + rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); + rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); + nfs_prog = txdr_unsigned(NFS_PROG); + nqnfs_prog = txdr_unsigned(NQNFS_PROG); + nfs_true = txdr_unsigned(TRUE); + nfs_false = txdr_unsigned(FALSE); + nfs_xdrneg1 = txdr_unsigned(-1); + nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; + if (nfs_ticks < 1) + nfs_ticks = 1; + /* Ensure async daemons disabled */ + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { + nfs_iodwant[i] = (struct proc *)0; + nfs_iodmount[i] = (struct nfsmount *)0; + } + nfs_nhinit(); /* Init the nfsnode table */ +#ifndef NFS_NOSERVER + nfsrv_init(0); /* Init server data structures */ + nfsrv_initcache(); /* Init the server request cache */ +#endif + + /* + * Initialize the nqnfs server stuff. + */ + if (nqnfsstarttime == 0) { + nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease + + nqsrv_clockskew + nqsrv_writeslack; + NQLOADNOVRAM(nqnfsstarttime); + CIRCLEQ_INIT(&nqtimerhead); + nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); + } + + /* + * Initialize reply list and start timer + */ + TAILQ_INIT(&nfs_reqq); + + nfs_timer(0); + + /* + * Set up lease_check and lease_updatetime so that other parts + * of the system can call us, if we are loadable. + */ +#ifndef NFS_NOSERVER + nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)]; + default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check; +#endif + nfs_prev_lease_updatetime = lease_updatetime; + lease_updatetime = nfs_lease_updatetime; + nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg; + sysent[SYS_nfssvc].sy_narg = 2; + nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call; + sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc; +#ifndef NFS_NOSERVER + nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg; + sysent[SYS_getfh].sy_narg = 2; + nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call; + sysent[SYS_getfh].sy_call = (sy_call_t *)getfh; +#endif + + nfs_pbuf_freecnt = nswbuf / 2 + 1; + + return (0); +} + +int +nfs_uninit(vfsp) + struct vfsconf *vfsp; +{ + + untimeout(nfs_timer, (void *)NULL, nfs_timer_handle); + nfs_mount_type = -1; +#ifndef NFS_NOSERVER + default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check; +#endif + lease_updatetime = nfs_prev_lease_updatetime; + sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg; + sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call; +#ifndef NFS_NOSERVER + sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg; + sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call; +#endif + return (0); +} + +/* + * Attribute cache routines. + * nfs_loadattrcache() - loads or updates the cache contents from attributes + * that are on the mbuf list + * nfs_getattrcache() - returns valid attributes if found in cache, returns + * error otherwise + */ + +/* + * Load the attribute cache (that lives in the nfsnode entry) with + * the values on the mbuf list and + * Iff vap not NULL + * copy the attributes to *vaper + */ +int +nfs_loadattrcache(vpp, mdp, dposp, vaper) + struct vnode **vpp; + struct mbuf **mdp; + caddr_t *dposp; + struct vattr *vaper; +{ + register struct vnode *vp = *vpp; + register struct vattr *vap; + register struct nfs_fattr *fp; + register struct nfsnode *np; + register int32_t t1; + caddr_t cp2; + int error = 0, rdev; + struct mbuf *md; + enum vtype vtyp; + u_short vmode; + struct timespec mtime; + int v3 = NFS_ISV3(vp); + + md = *mdp; + t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; + if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0) + return (error); + fp = (struct nfs_fattr *)cp2; + if (v3) { + vtyp = nfsv3tov_type(fp->fa_type); + vmode = fxdr_unsigned(u_short, fp->fa_mode); + rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), + fxdr_unsigned(int, fp->fa3_rdev.specdata2)); + fxdr_nfsv3time(&fp->fa3_mtime, &mtime); + } else { + vtyp = nfsv2tov_type(fp->fa_type); + vmode = fxdr_unsigned(u_short, fp->fa_mode); + /* + * XXX + * + * The duplicate information returned in fa_type and fa_mode + * is an ambiguity in the NFS version 2 protocol. + * + * VREG should be taken literally as a regular file. If a + * server intents to return some type information differently + * in the upper bits of the mode field (e.g. for sockets, or + * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we + * leave the examination of the mode bits even in the VREG + * case to avoid breakage for bogus servers, but we make sure + * that there are actually type bits set in the upper part of + * fa_mode (and failing that, trust the va_type field). + * + * NFSv3 cleared the issue, and requires fa_mode to not + * contain any type information (while also introduing sockets + * and FIFOs for fa_type). + */ + if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) + vtyp = IFTOVT(vmode); + rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); + fxdr_nfsv2time(&fp->fa2_mtime, &mtime); + + /* + * Really ugly NFSv2 kludge. + */ + if (vtyp == VCHR && rdev == 0xffffffff) + vtyp = VFIFO; + } + + /* + * If v_type == VNON it is a new node, so fill in the v_type, + * n_mtime fields. Check to see if it represents a special + * device, and if so, check for a possible alias. Once the + * correct vnode has been obtained, fill in the rest of the + * information. + */ + np = VTONFS(vp); + if (vp->v_type != vtyp) { + vp->v_type = vtyp; + if (vp->v_type == VFIFO) { + vp->v_op = fifo_nfsv2nodeop_p; + } + if (vp->v_type == VCHR || vp->v_type == VBLK) { + vp->v_op = spec_nfsv2nodeop_p; + addaliasu(vp, rdev); + } + np->n_mtime = mtime.tv_sec; + } + vap = &np->n_vattr; + vap->va_type = vtyp; + vap->va_mode = (vmode & 07777); + vap->va_rdev = rdev; + vap->va_mtime = mtime; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + if (v3) { + vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); + vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); + vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); + vap->va_size = fxdr_hyper(&fp->fa3_size); + vap->va_blocksize = NFS_FABLKSIZE; + vap->va_bytes = fxdr_hyper(&fp->fa3_used); + vap->va_fileid = fxdr_unsigned(int32_t, + fp->fa3_fileid.nfsuquad[1]); + fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); + fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); + vap->va_flags = 0; + vap->va_filerev = 0; + } else { + vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); + vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); + vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); + vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); + vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); + vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) + * NFS_FABLKSIZE; + vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); + fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); + vap->va_flags = 0; + vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t, + fp->fa2_ctime.nfsv2_sec); + vap->va_ctime.tv_nsec = 0; + vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); + vap->va_filerev = 0; + } + if (vap->va_size != np->n_size) { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else { + np->n_size = vap->va_size; + } + vnode_pager_setsize(vp, np->n_size); + } else { + np->n_size = vap->va_size; + } + } + np->n_attrstamp = time_second; + if (vaper != NULL) { + bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) + vaper->va_atime = np->n_atim; + if (np->n_flag & NUPD) + vaper->va_mtime = np->n_mtim; + } + } + return (0); +} + +#ifdef NFS_ACDEBUG +#include <sys/sysctl.h> +SYSCTL_DECL(_vfs_nfs); +static int nfs_acdebug; +SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, ""); +#endif + +/* + * Check the time stamp + * If the cache is valid, copy contents to *vap and return 0 + * otherwise return an error + */ +int +nfs_getattrcache(vp, vaper) + register struct vnode *vp; + struct vattr *vaper; +{ + register struct nfsnode *np; + register struct vattr *vap; + struct nfsmount *nmp; + int timeo; + + np = VTONFS(vp); + vap = &np->n_vattr; + nmp = VFSTONFS(vp->v_mount); + /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ + timeo = (time_second - np->n_mtime) / 10; + +#ifdef NFS_ACDEBUG + if (nfs_acdebug>1) + printf("nfs_getattrcache: initial timeo = %d\n", timeo); +#endif + + if (vap->va_type == VDIR) { + if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) + timeo = nmp->nm_acdirmin; + else if (timeo > nmp->nm_acdirmax) + timeo = nmp->nm_acdirmax; + } else { + if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) + timeo = nmp->nm_acregmin; + else if (timeo > nmp->nm_acregmax) + timeo = nmp->nm_acregmax; + } + +#ifdef NFS_ACDEBUG + if (nfs_acdebug > 2) + printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", + nmp->nm_acregmin, nmp->nm_acregmax, + nmp->nm_acdirmin, nmp->nm_acdirmax); + + if (nfs_acdebug) + printf("nfs_getattrcache: age = %d; final timeo = %d\n", + (time_second - np->n_attrstamp), timeo); +#endif + + if ((time_second - np->n_attrstamp) >= timeo) { + nfsstats.attrcache_misses++; + return (ENOENT); + } + nfsstats.attrcache_hits++; + if (vap->va_size != np->n_size) { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else { + np->n_size = vap->va_size; + } + vnode_pager_setsize(vp, np->n_size); + } else { + np->n_size = vap->va_size; + } + } + bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) + vaper->va_atime = np->n_atim; + if (np->n_flag & NUPD) + vaper->va_mtime = np->n_mtim; + } + return (0); +} + +#ifndef NFS_NOSERVER +/* + * Set up nameidata for a lookup() call and do it. + * + * If pubflag is set, this call is done for a lookup operation on the + * public filehandle. In that case we allow crossing mountpoints and + * absolute pathnames. However, the caller is expected to check that + * the lookup result is within the public fs, and deny access if + * it is not. + * + * nfs_namei() clears out garbage fields that namei() might leave garbage. + * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no + * error occurs but the parent was not requested. + * + * dirp may be set whether an error is returned or not, and must be + * released by the caller. + */ +int +nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag) + register struct nameidata *ndp; + fhandle_t *fhp; + int len; + struct nfssvc_sock *slp; + struct sockaddr *nam; + struct mbuf **mdp; + caddr_t *dposp; + struct vnode **retdirp; + struct proc *p; + int kerbflag, pubflag; +{ + register int i, rem; + register struct mbuf *md; + register char *fromcp, *tocp, *cp; + struct iovec aiov; + struct uio auio; + struct vnode *dp; + int error, rdonly, linklen; + struct componentname *cnp = &ndp->ni_cnd; + + *retdirp = (struct vnode *)0; + cnp->cn_pnbuf = zalloc(namei_zone); + + /* + * Copy the name from the mbuf list to ndp->ni_pnbuf + * and set the various ndp fields appropriately. + */ + fromcp = *dposp; + tocp = cnp->cn_pnbuf; + md = *mdp; + rem = mtod(md, caddr_t) + md->m_len - fromcp; + for (i = 0; i < len; i++) { + while (rem == 0) { + md = md->m_next; + if (md == NULL) { + error = EBADRPC; + goto out; + } + fromcp = mtod(md, caddr_t); + rem = md->m_len; + } + if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) { + error = EACCES; + goto out; + } + *tocp++ = *fromcp++; + rem--; + } + *tocp = '\0'; + *mdp = md; + *dposp = fromcp; + len = nfsm_rndup(len)-len; + if (len > 0) { + if (rem >= len) + *dposp += len; + else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0) + goto out; + } + + /* + * Extract and set starting directory. + */ + error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, + nam, &rdonly, kerbflag, pubflag); + if (error) + goto out; + if (dp->v_type != VDIR) { + vrele(dp); + error = ENOTDIR; + goto out; + } + + if (rdonly) + cnp->cn_flags |= RDONLY; + + /* + * Set return directory. Reference to dp is implicitly transfered + * to the returned pointer + */ + *retdirp = dp; + + if (pubflag) { + /* + * Oh joy. For WebNFS, handle those pesky '%' escapes, + * and the 'native path' indicator. + */ + cp = zalloc(namei_zone); + fromcp = cnp->cn_pnbuf; + tocp = cp; + if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) { + switch ((unsigned char)*fromcp) { + case WEBNFS_NATIVE_CHAR: + /* + * 'Native' path for us is the same + * as a path according to the NFS spec, + * just skip the escape char. + */ + fromcp++; + break; + /* + * More may be added in the future, range 0x80-0xff + */ + default: + error = EIO; + zfree(namei_zone, cp); + goto out; + } + } + /* + * Translate the '%' escapes, URL-style. + */ + while (*fromcp != '\0') { + if (*fromcp == WEBNFS_ESC_CHAR) { + if (fromcp[1] != '\0' && fromcp[2] != '\0') { + fromcp++; + *tocp++ = HEXSTRTOI(fromcp); + fromcp += 2; + continue; + } else { + error = ENOENT; + zfree(namei_zone, cp); + goto out; + } + } else + *tocp++ = *fromcp++; + } + *tocp = '\0'; + zfree(namei_zone, cnp->cn_pnbuf); + cnp->cn_pnbuf = cp; + } + + ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1; + ndp->ni_segflg = UIO_SYSSPACE; + + if (pubflag) { + ndp->ni_rootdir = rootvnode; + ndp->ni_loopcnt = 0; + if (cnp->cn_pnbuf[0] == '/') + dp = rootvnode; + } else { + cnp->cn_flags |= NOCROSSMOUNT; + } + + /* + * Initialize for scan, set ni_startdir and bump ref on dp again + * becuase lookup() will dereference ni_startdir. + */ + + cnp->cn_proc = p; + VREF(dp); + ndp->ni_startdir = dp; + + for (;;) { + cnp->cn_nameptr = cnp->cn_pnbuf; + /* + * Call lookup() to do the real work. If an error occurs, + * ndp->ni_vp and ni_dvp are left uninitialized or NULL and + * we do not have to dereference anything before returning. + * In either case ni_startdir will be dereferenced and NULLed + * out. + */ + error = lookup(ndp); + if (error) + break; + + /* + * Check for encountering a symbolic link. Trivial + * termination occurs if no symlink encountered. + * Note: zfree is safe because error is 0, so we will + * not zfree it again when we break. + */ + if ((cnp->cn_flags & ISSYMLINK) == 0) { + nfsrv_object_create(ndp->ni_vp); + if (cnp->cn_flags & (SAVENAME | SAVESTART)) + cnp->cn_flags |= HASBUF; + else + zfree(namei_zone, cnp->cn_pnbuf); + break; + } + + /* + * Validate symlink + */ + if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) + VOP_UNLOCK(ndp->ni_dvp, 0, p); + if (!pubflag) { + error = EINVAL; + goto badlink2; + } + + if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { + error = ELOOP; + goto badlink2; + } + if (ndp->ni_pathlen > 1) + cp = zalloc(namei_zone); + else + cp = cnp->cn_pnbuf; + aiov.iov_base = cp; + aiov.iov_len = MAXPATHLEN; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = (struct proc *)0; + auio.uio_resid = MAXPATHLEN; + error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); + if (error) { + badlink1: + if (ndp->ni_pathlen > 1) + zfree(namei_zone, cp); + badlink2: + vrele(ndp->ni_dvp); + vput(ndp->ni_vp); + break; + } + linklen = MAXPATHLEN - auio.uio_resid; + if (linklen == 0) { + error = ENOENT; + goto badlink1; + } + if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { + error = ENAMETOOLONG; + goto badlink1; + } + + /* + * Adjust or replace path + */ + if (ndp->ni_pathlen > 1) { + bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); + zfree(namei_zone, cnp->cn_pnbuf); + cnp->cn_pnbuf = cp; + } else + cnp->cn_pnbuf[linklen] = '\0'; + ndp->ni_pathlen += linklen; + + /* + * Cleanup refs for next loop and check if root directory + * should replace current directory. Normally ni_dvp + * becomes the new base directory and is cleaned up when + * we loop. Explicitly null pointers after invalidation + * to clarify operation. + */ + vput(ndp->ni_vp); + ndp->ni_vp = NULL; + + if (cnp->cn_pnbuf[0] == '/') { + vrele(ndp->ni_dvp); + ndp->ni_dvp = ndp->ni_rootdir; + VREF(ndp->ni_dvp); + } + ndp->ni_startdir = ndp->ni_dvp; + ndp->ni_dvp = NULL; + } + + /* + * nfs_namei() guarentees that fields will not contain garbage + * whether an error occurs or not. This allows the caller to track + * cleanup state trivially. + */ +out: + if (error) { + zfree(namei_zone, cnp->cn_pnbuf); + ndp->ni_vp = NULL; + ndp->ni_dvp = NULL; + ndp->ni_startdir = NULL; + cnp->cn_flags &= ~HASBUF; + } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { + ndp->ni_dvp = NULL; + } + return (error); +} + +/* + * A fiddled version of m_adj() that ensures null fill to a long + * boundary and only trims off the back end + */ +void +nfsm_adj(mp, len, nul) + struct mbuf *mp; + register int len; + int nul; +{ + register struct mbuf *m; + register int count, i; + register char *cp; + + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + count = 0; + m = mp; + for (;;) { + count += m->m_len; + if (m->m_next == (struct mbuf *)0) + break; + m = m->m_next; + } + if (m->m_len > len) { + m->m_len -= len; + if (nul > 0) { + cp = mtod(m, caddr_t)+m->m_len-nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + for (m = mp; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + if (nul > 0) { + cp = mtod(m, caddr_t)+m->m_len-nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + break; + } + count -= m->m_len; + } + for (m = m->m_next;m;m = m->m_next) + m->m_len = 0; +} + +/* + * Make these functions instead of macros, so that the kernel text size + * doesn't get too big... + */ +void +nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) + struct nfsrv_descript *nfsd; + int before_ret; + register struct vattr *before_vap; + int after_ret; + struct vattr *after_vap; + struct mbuf **mbp; + char **bposp; +{ + register struct mbuf *mb = *mbp, *mb2; + register char *bpos = *bposp; + register u_int32_t *tl; + + if (before_ret) { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = nfs_false; + } else { + nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED); + *tl++ = nfs_true; + txdr_hyper(before_vap->va_size, tl); + tl += 2; + txdr_nfsv3time(&(before_vap->va_mtime), tl); + tl += 2; + txdr_nfsv3time(&(before_vap->va_ctime), tl); + } + *bposp = bpos; + *mbp = mb; + nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); +} + +void +nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) + struct nfsrv_descript *nfsd; + int after_ret; + struct vattr *after_vap; + struct mbuf **mbp; + char **bposp; +{ + register struct mbuf *mb = *mbp, *mb2; + register char *bpos = *bposp; + register u_int32_t *tl; + register struct nfs_fattr *fp; + + if (after_ret) { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = nfs_false; + } else { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR); + *tl++ = nfs_true; + fp = (struct nfs_fattr *)tl; + nfsm_srvfattr(nfsd, after_vap, fp); + } + *mbp = mb; + *bposp = bpos; +} + +void +nfsm_srvfattr(nfsd, vap, fp) + register struct nfsrv_descript *nfsd; + register struct vattr *vap; + register struct nfs_fattr *fp; +{ + + fp->fa_nlink = txdr_unsigned(vap->va_nlink); + fp->fa_uid = txdr_unsigned(vap->va_uid); + fp->fa_gid = txdr_unsigned(vap->va_gid); + if (nfsd->nd_flag & ND_NFSV3) { + fp->fa_type = vtonfsv3_type(vap->va_type); + fp->fa_mode = vtonfsv3_mode(vap->va_mode); + txdr_hyper(vap->va_size, &fp->fa3_size); + txdr_hyper(vap->va_bytes, &fp->fa3_used); + fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev)); + fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev)); + fp->fa3_fsid.nfsuquad[0] = 0; + fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); + fp->fa3_fileid.nfsuquad[0] = 0; + fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); + txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); + txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); + txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); + } else { + fp->fa_type = vtonfsv2_type(vap->va_type); + fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); + fp->fa2_size = txdr_unsigned(vap->va_size); + fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); + if (vap->va_type == VFIFO) + fp->fa2_rdev = 0xffffffff; + else + fp->fa2_rdev = txdr_unsigned(vap->va_rdev); + fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); + fp->fa2_fsid = txdr_unsigned(vap->va_fsid); + fp->fa2_fileid = txdr_unsigned(vap->va_fileid); + txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); + txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); + txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); + } +} + +/* + * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) + * - look up fsid in mount list (if not found ret error) + * - get vp and export rights by calling VFS_FHTOVP() + * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon + * - if not lockflag unlock it with VOP_UNLOCK() + */ +int +nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag) + fhandle_t *fhp; + int lockflag; + struct vnode **vpp; + struct ucred *cred; + struct nfssvc_sock *slp; + struct sockaddr *nam; + int *rdonlyp; + int kerbflag; + int pubflag; +{ + struct proc *p = curproc; /* XXX */ + register struct mount *mp; + register int i; + struct ucred *credanon; + int error, exflags; +#ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */ + struct sockaddr_int *saddr; +#endif + + *vpp = (struct vnode *)0; + + if (nfs_ispublicfh(fhp)) { + if (!pubflag || !nfs_pub.np_valid) + return (ESTALE); + fhp = &nfs_pub.np_handle; + } + + mp = vfs_getvfs(&fhp->fh_fsid); + if (!mp) + return (ESTALE); + error = VFS_CHECKEXP(mp, nam, &exflags, &credanon); + if (error) + return (error); + error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp); + if (error) + return (error); +#ifdef MNT_EXNORESPORT + if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) { + saddr = (struct sockaddr_in *)nam; + if (saddr->sin_family == AF_INET && + ntohs(saddr->sin_port) >= IPPORT_RESERVED) { + vput(*vpp); + *vpp = NULL; + return (NFSERR_AUTHERR | AUTH_TOOWEAK); + } + } +#endif + /* + * Check/setup credentials. + */ + if (exflags & MNT_EXKERB) { + if (!kerbflag) { + vput(*vpp); + *vpp = NULL; + return (NFSERR_AUTHERR | AUTH_TOOWEAK); + } + } else if (kerbflag) { + vput(*vpp); + *vpp = NULL; + return (NFSERR_AUTHERR | AUTH_TOOWEAK); + } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { + cred->cr_uid = credanon->cr_uid; + for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) + cred->cr_groups[i] = credanon->cr_groups[i]; + cred->cr_ngroups = i; + } + if (exflags & MNT_EXRDONLY) + *rdonlyp = 1; + else + *rdonlyp = 0; + + nfsrv_object_create(*vpp); + + if (!lockflag) + VOP_UNLOCK(*vpp, 0, p); + return (0); +} + + +/* + * WebNFS: check if a filehandle is a public filehandle. For v3, this + * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has + * transformed this to all zeroes in both cases, so check for it. + */ +int +nfs_ispublicfh(fhp) + fhandle_t *fhp; +{ + char *cp = (char *)fhp; + int i; + + for (i = 0; i < NFSX_V3FH; i++) + if (*cp++ != 0) + return (FALSE); + return (TRUE); +} + +#endif /* NFS_NOSERVER */ +/* + * This function compares two net addresses by family and returns TRUE + * if they are the same host. + * If there is any doubt, return FALSE. + * The AF_INET family is handled as a special case so that address mbufs + * don't need to be saved to store "struct in_addr", which is only 4 bytes. + */ +int +netaddr_match(family, haddr, nam) + int family; + union nethostaddr *haddr; + struct sockaddr *nam; +{ + register struct sockaddr_in *inetaddr; + + switch (family) { + case AF_INET: + inetaddr = (struct sockaddr_in *)nam; + if (inetaddr->sin_family == AF_INET && + inetaddr->sin_addr.s_addr == haddr->had_inetaddr) + return (1); + break; +#ifdef ISO + case AF_ISO: + { + register struct sockaddr_iso *isoaddr1, *isoaddr2; + + isoaddr1 = (struct sockaddr_iso *)nam; + isoaddr2 = (struct sockaddr_iso *)haddr->had_nam; + if (isoaddr1->siso_family == AF_ISO && + isoaddr1->siso_nlen > 0 && + isoaddr1->siso_nlen == isoaddr2->siso_nlen && + SAME_ISOADDR(isoaddr1, isoaddr2)) + return (1); + break; + } +#endif /* ISO */ + default: + break; + }; + return (0); +} + +static nfsuint64 nfs_nullcookie = { { 0, 0 } }; +/* + * This function finds the directory cookie that corresponds to the + * logical byte offset given. + */ +nfsuint64 * +nfs_getcookie(np, off, add) + register struct nfsnode *np; + off_t off; + int add; +{ + register struct nfsdmap *dp, *dp2; + register int pos; + + pos = (uoff_t)off / NFS_DIRBLKSIZ; + if (pos == 0 || off < 0) { +#ifdef DIAGNOSTIC + if (add) + panic("nfs getcookie add at <= 0"); +#endif + return (&nfs_nullcookie); + } + pos--; + dp = np->n_cookies.lh_first; + if (!dp) { + if (add) { + MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + dp->ndm_eocookie = 0; + LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); + } else + return ((nfsuint64 *)0); + } + while (pos >= NFSNUMCOOKIES) { + pos -= NFSNUMCOOKIES; + if (dp->ndm_list.le_next) { + if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && + pos >= dp->ndm_eocookie) + return ((nfsuint64 *)0); + dp = dp->ndm_list.le_next; + } else if (add) { + MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + dp2->ndm_eocookie = 0; + LIST_INSERT_AFTER(dp, dp2, ndm_list); + dp = dp2; + } else + return ((nfsuint64 *)0); + } + if (pos >= dp->ndm_eocookie) { + if (add) + dp->ndm_eocookie = pos + 1; + else + return ((nfsuint64 *)0); + } + return (&dp->ndm_cookies[pos]); +} + +/* + * Invalidate cached directory information, except for the actual directory + * blocks (which are invalidated separately). + * Done mainly to avoid the use of stale offset cookies. + */ +void +nfs_invaldir(vp) + register struct vnode *vp; +{ + register struct nfsnode *np = VTONFS(vp); + +#ifdef DIAGNOSTIC + if (vp->v_type != VDIR) + panic("nfs: invaldir not dir"); +#endif + np->n_direofoffset = 0; + np->n_cookieverf.nfsuquad[0] = 0; + np->n_cookieverf.nfsuquad[1] = 0; + if (np->n_cookies.lh_first) + np->n_cookies.lh_first->ndm_eocookie = 0; +} + +/* + * The write verifier has changed (probably due to a server reboot), so all + * B_NEEDCOMMIT blocks will have to be written again. Since they are on the + * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT + * and B_CLUSTEROK flags. Once done the new write verifier can be set for the + * mount point. + * + * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data + * writes are not clusterable. + */ +void +nfs_clearcommit(mp) + struct mount *mp; +{ + register struct vnode *vp, *nvp; + register struct buf *bp, *nbp; + int s; + + s = splbio(); +loop: + for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { + if (vp->v_mount != mp) /* Paranoia */ + goto loop; + nvp = vp->v_mntvnodes.le_next; + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if (BUF_REFCNT(bp) == 0 && + (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) + == (B_DELWRI | B_NEEDCOMMIT)) + bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + } + } + splx(s); +} + +#ifndef NFS_NOSERVER +/* + * Map errnos to NFS error numbers. For Version 3 also filter out error + * numbers not specified for the associated procedure. + */ +int +nfsrv_errmap(nd, err) + struct nfsrv_descript *nd; + register int err; +{ + register short *defaulterrp, *errp; + + if (nd->nd_flag & ND_NFSV3) { + if (nd->nd_procnum <= NFSPROC_COMMIT) { + errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; + while (*++errp) { + if (*errp == err) + return (err); + else if (*errp > err) + break; + } + return ((int)*defaulterrp); + } else + return (err & 0xffff); + } + if (err <= ELAST) + return ((int)nfsrv_v2errmap[err - 1]); + return (NFSERR_IO); +} + +int +nfsrv_object_create(vp) + struct vnode *vp; +{ + + if (vp == NULL || vp->v_type != VREG) + return (1); + return (vfs_object_create(vp, curproc, + curproc ? curproc->p_ucred : NULL)); +} + +/* + * Sort the group list in increasing numerical order. + * (Insertion sort by Chris Torek, who was grossed out by the bubble sort + * that used to be here.) + */ +void +nfsrvw_sort(list, num) + register gid_t *list; + register int num; +{ + register int i, j; + gid_t v; + + /* Insertion sort. */ + for (i = 1; i < num; i++) { + v = list[i]; + /* find correct slot for value v, moving others up */ + for (j = i; --j >= 0 && v < list[j];) + list[j + 1] = list[j]; + list[j + 1] = v; + } +} + +/* + * copy credentials making sure that the result can be compared with bcmp(). + */ +void +nfsrv_setcred(incred, outcred) + register struct ucred *incred, *outcred; +{ + register int i; + + bzero((caddr_t)outcred, sizeof (struct ucred)); + outcred->cr_ref = 1; + outcred->cr_uid = incred->cr_uid; + outcred->cr_ngroups = incred->cr_ngroups; + for (i = 0; i < incred->cr_ngroups; i++) + outcred->cr_groups[i] = incred->cr_groups[i]; + nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups); +} +#endif /* NFS_NOSERVER */ diff --git a/sys/nfs/nfs_common.h b/sys/nfs/nfs_common.h new file mode 100644 index 0000000..b13c009 --- /dev/null +++ b/sys/nfs/nfs_common.h @@ -0,0 +1,564 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsm_subs.h 8.2 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NFSM_SUBS_H_ +#define _NFS_NFSM_SUBS_H_ + +struct ucred; +struct vnode; + +/* + * These macros do strange and peculiar things to mbuf chains for + * the assistance of the nfs code. To attempt to use them for any + * other purpose will be dangerous. (they make weird assumptions) + */ + +/* + * First define what the actual subs. return + */ +struct mbuf *nfsm_reqh __P((struct vnode *vp, u_long procid, int hsiz, + caddr_t *bposp)); +struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, + int auth_type, int auth_len, char *auth_str, + int verf_len, char *verf_str, + struct mbuf *mrest, int mrest_len, + struct mbuf **mbp, u_int32_t *xidp)); + +#define M_HASCL(m) ((m)->m_flags & M_EXT) +#define NFSMINOFF(m) \ + do { \ + if (M_HASCL(m)) \ + (m)->m_data = (m)->m_ext.ext_buf; \ + else if ((m)->m_flags & M_PKTHDR) \ + (m)->m_data = (m)->m_pktdat; \ + else \ + (m)->m_data = (m)->m_dat; \ + } while (0) +#define NFSMADV(m, s) \ + do { \ + (m)->m_data += (s); \ + } while (0) +#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ + (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) + +/* + * Now for the macros that do the simple stuff and call the functions + * for the hard stuff. + * These macros use several vars. declared in nfsm_reqhead and these + * vars. must not be used elsewhere unless you are careful not to corrupt + * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries + * that may be used so long as the value is not expected to retained + * after a macro. + * I know, this is kind of dorkey, but it makes the actual op functions + * fairly clean and deals with the mess caused by the xdr discriminating + * unions. + */ + +#define nfsm_build(a,c,s) \ + do { \ + if ((s) > M_TRAILINGSPACE(mb)) { \ + MGET(mb2, M_WAIT, MT_DATA); \ + if ((s) > MLEN) \ + panic("build > MLEN"); \ + mb->m_next = mb2; \ + mb = mb2; \ + mb->m_len = 0; \ + bpos = mtod(mb, caddr_t); \ + } \ + (a) = (c)(bpos); \ + mb->m_len += (s); \ + bpos += (s); \ + } while (0) + +#define nfsm_dissect(a, c, s) \ + do { \ + t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + (a) = (c)(dpos); \ + dpos += (s); \ + } else if ((t1 = nfsm_disct(&md, &dpos, (s), t1, &cp2)) != 0){ \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } else { \ + (a) = (c)cp2; \ + } \ + } while (0) + +#define nfsm_fhtom(v, v3) \ + do { \ + if (v3) { \ + t2 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; \ + if (t2 <= M_TRAILINGSPACE(mb)) { \ + nfsm_build(tl, u_int32_t *, t2); \ + *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); \ + *(tl + ((t2>>2) - 2)) = 0; \ + bcopy((caddr_t)VTONFS(v)->n_fhp,(caddr_t)tl, \ + VTONFS(v)->n_fhsize); \ + } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, \ + (caddr_t)VTONFS(v)->n_fhp, \ + VTONFS(v)->n_fhsize)) != 0) { \ + error = t2; \ + m_freem(mreq); \ + goto nfsmout; \ + } \ + } else { \ + nfsm_build(cp, caddr_t, NFSX_V2FH); \ + bcopy((caddr_t)VTONFS(v)->n_fhp, cp, NFSX_V2FH); \ + } \ + } while (0) + +#define nfsm_srvfhtom(f, v3) \ + do { \ + if (v3) { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FH);\ + *tl++ = txdr_unsigned(NFSX_V3FH); \ + bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \ + } else { \ + nfsm_build(cp, caddr_t, NFSX_V2FH); \ + bcopy((caddr_t)(f), cp, NFSX_V2FH); \ + } \ + } while (0) + +#define nfsm_srvpostop_fh(f) \ + do { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_V3FH); \ + *tl++ = nfs_true; \ + *tl++ = txdr_unsigned(NFSX_V3FH); \ + bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \ + } while (0) + +#define nfsm_mtofh(d, v, v3, f) \ + do { \ + struct nfsnode *ttnp; nfsfh_t *ttfhp; int ttfhsize; \ + if (v3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (f) = fxdr_unsigned(int, *tl); \ + } else \ + (f) = 1; \ + if (f) { \ + nfsm_getfh(ttfhp, ttfhsize, (v3)); \ + if ((t1 = nfs_nget((d)->v_mount, ttfhp, ttfhsize, \ + &ttnp)) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = NFSTOV(ttnp); \ + } \ + if (v3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (f) \ + (f) = fxdr_unsigned(int, *tl); \ + else if (fxdr_unsigned(int, *tl)) \ + nfsm_adv(NFSX_V3FATTR); \ + } \ + if (f) \ + nfsm_loadattr((v), (struct vattr *)0); \ + } while (0) + +#define nfsm_getfh(f, s, v3) \ + do { \ + if (v3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int, *tl)) <= 0 || \ + (s) > NFSX_V3FHMAX) { \ + m_freem(mrep); \ + error = EBADRPC; \ + goto nfsmout; \ + } \ + } else \ + (s) = NFSX_V2FH; \ + nfsm_dissect((f), nfsfh_t *, nfsm_rndup(s)); \ + } while (0) + +#define nfsm_loadattr(v, a) \ + do { \ + struct vnode *ttvp = (v); \ + if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a))) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = ttvp; \ + } while (0) + +#define nfsm_postop_attr(v, f) \ + do { \ + struct vnode *ttvp = (v); \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (((f) = fxdr_unsigned(int, *tl)) != 0) { \ + if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \ + (struct vattr *)0)) != 0) { \ + error = t1; \ + (f) = 0; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = ttvp; \ + } \ + } while (0) + +/* Used as (f) for nfsm_wcc_data() */ +#define NFSV3_WCCRATTR 0 +#define NFSV3_WCCCHK 1 + +#define nfsm_wcc_data(v, f) \ + do { \ + int ttattrf, ttretf = 0; \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED); \ + if (f) \ + ttretf = (VTONFS(v)->n_mtime == \ + fxdr_unsigned(u_int32_t, *(tl + 2))); \ + } \ + nfsm_postop_attr((v), ttattrf); \ + if (f) { \ + (f) = ttretf; \ + } else { \ + (f) = ttattrf; \ + } \ + } while (0) + +/* If full is true, set all fields, otherwise just set mode and time fields */ +#define nfsm_v3attrbuild(a, full) \ + do { \ + if ((a)->va_mode != (mode_t)VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned((a)->va_mode); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((full) && (a)->va_uid != (uid_t)VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned((a)->va_uid); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((full) && (a)->va_gid != (gid_t)VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned((a)->va_gid); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((full) && (a)->va_size != VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + txdr_hyper((a)->va_size, tl); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((a)->va_atime.tv_sec != VNOVAL) { \ + if ((a)->va_atime.tv_sec != time_second) { \ + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\ + *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\ + txdr_nfsv3time(&(a)->va_atime, tl); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ + } \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \ + } \ + if ((a)->va_mtime.tv_sec != VNOVAL) { \ + if ((a)->va_mtime.tv_sec != time_second) { \ + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\ + *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\ + txdr_nfsv3time(&(a)->va_mtime, tl); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ + } \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \ + } \ + } while (0) + + +#define nfsm_strsiz(s,m) \ + do { \ + nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int32_t,*tl)) > (m)) { \ + m_freem(mrep); \ + error = EBADRPC; \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_srvstrsiz(s,m) \ + do { \ + nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int32_t,*tl)) > (m) || (s) <= 0) { \ + error = EBADRPC; \ + nfsm_reply(0); \ + } \ + } while (0) + +#define nfsm_srvnamesiz(s) \ + do { \ + nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int32_t,*tl)) > NFS_MAXNAMLEN) \ + error = NFSERR_NAMETOL; \ + if ((s) <= 0) \ + error = EBADRPC; \ + if (error) \ + nfsm_reply(0); \ + } while (0) + +#define nfsm_mtouio(p,s) \ + do {\ + if ((s) > 0 && \ + (t1 = nfsm_mbuftouio(&md,(p),(s),&dpos)) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_uiotom(p,s) \ + do { \ + if ((t1 = nfsm_uiotombuf((p),&mb,(s),&bpos)) != 0) { \ + error = t1; \ + m_freem(mreq); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_reqhead(v,a,s) \ + do { \ + mb = mreq = nfsm_reqh((v),(a),(s),&bpos); \ + } while (0) + +#define nfsm_reqdone \ + do { \ + m_freem(mrep); \ + nfsmout: \ + } while (0) + +#define nfsm_rndup(a) (((a)+3)&(~0x3)) + +#define nfsm_request(v, t, p, c) \ + do { \ + if ((error = nfs_request((v), mreq, (t), (p), \ + (c), &mrep, &md, &dpos)) != 0) { \ + if (error & NFSERR_RETERR) \ + error &= ~NFSERR_RETERR; \ + else \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_strtom(a,s,m) \ + do {\ + if ((s) > (m)) { \ + m_freem(mreq); \ + error = ENAMETOOLONG; \ + goto nfsmout; \ + } \ + t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \ + if (t2 <= M_TRAILINGSPACE(mb)) { \ + nfsm_build(tl,u_int32_t *,t2); \ + *tl++ = txdr_unsigned(s); \ + *(tl+((t2>>2)-2)) = 0; \ + bcopy((const char *)(a), (caddr_t)tl, (s)); \ + } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, (a), (s))) != 0) { \ + error = t2; \ + m_freem(mreq); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_srvdone \ + do { \ + nfsmout: \ + return (error); \ + } while (0) + +#define nfsm_reply(s) \ + do { \ + nfsd->nd_repstat = error; \ + if (error && !(nfsd->nd_flag & ND_NFSV3)) \ + (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \ + mrq, &mb, &bpos); \ + else \ + (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \ + mrq, &mb, &bpos); \ + if (mrep != NULL) { \ + m_freem(mrep); \ + mrep = NULL; \ + } \ + mreq = *mrq; \ + if (error && (!(nfsd->nd_flag & ND_NFSV3) || \ + error == EBADRPC)) { \ + error = 0; \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_writereply(s, v3) \ + do { \ + nfsd->nd_repstat = error; \ + if (error && !(v3)) \ + (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \ + &mreq, &mb, &bpos); \ + else \ + (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \ + &mreq, &mb, &bpos); \ + } while (0) + +#define nfsm_adv(s) \ + do { \ + t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + dpos += (s); \ + } else if ((t1 = nfs_adv(&md, &dpos, (s), t1)) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_srvmtofh(f) \ + do { \ + int fhlen; \ + if (nfsd->nd_flag & ND_NFSV3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + fhlen = fxdr_unsigned(int, *tl); \ + if (fhlen != 0 && fhlen != NFSX_V3FH) { \ + error = EBADRPC; \ + nfsm_reply(0); \ + } \ + } else { \ + fhlen = NFSX_V2FH; \ + } \ + if (fhlen != 0) { \ + nfsm_dissect(tl, u_int32_t *, fhlen); \ + bcopy((caddr_t)tl, (caddr_t)(f), fhlen); \ + } else {\ + bzero((caddr_t)(f), NFSX_V3FH); \ + } \ + } while (0) + +#define nfsm_clget \ + do { \ + if (bp >= be) { \ + if (mp == mb) \ + mp->m_len += bp-bpos; \ + MGET(mp, M_WAIT, MT_DATA); \ + MCLGET(mp, M_WAIT); \ + mp->m_len = NFSMSIZ(mp); \ + mp2->m_next = mp; \ + mp2 = mp; \ + bp = mtod(mp, caddr_t); \ + be = bp+mp->m_len; \ + } \ + tl = (u_int32_t *)bp; \ + } while (0) + +#define nfsm_srvfillattr(a, f) \ + do { \ + nfsm_srvfattr(nfsd, (a), (f)); \ + } while (0) + +#define nfsm_srvwcc_data(br, b, ar, a) \ + do { \ + nfsm_srvwcc(nfsd, (br), (b), (ar), (a), &mb, &bpos); \ + } while (0) + +#define nfsm_srvpostop_attr(r, a) \ + do { \ + nfsm_srvpostopattr(nfsd, (r), (a), &mb, &bpos); \ + } while (0) + +#define nfsm_srvsattr(a) \ + do { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (a)->va_mode = nfstov_mode(*tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (a)->va_uid = fxdr_unsigned(uid_t, *tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (a)->va_gid = fxdr_unsigned(gid_t, *tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + (a)->va_size = fxdr_hyper(tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + switch (fxdr_unsigned(int, *tl)) { \ + case NFSV3SATTRTIME_TOCLIENT: \ + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + fxdr_nfsv3time(tl, &(a)->va_atime); \ + break; \ + case NFSV3SATTRTIME_TOSERVER: \ + getnanotime(&(a)->va_atime); \ + break; \ + }; \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + switch (fxdr_unsigned(int, *tl)) { \ + case NFSV3SATTRTIME_TOCLIENT: \ + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + fxdr_nfsv3time(tl, &(a)->va_mtime); \ + break; \ + case NFSV3SATTRTIME_TOSERVER: \ + getnanotime(&(a)->va_mtime); \ + break; \ + } \ + } while (0) + +#endif diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c new file mode 100644 index 0000000..89cbdce --- /dev/null +++ b/sys/nfs/nfs_node.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_node.c 8.6 (Berkeley) 5/22/95 + * $FreeBSD$ + */ + + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <vm/vm_zone.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> + +static vm_zone_t nfsnode_zone; +static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl; +static u_long nfsnodehash; + +#define TRUE 1 +#define FALSE 0 + +/* + * Initialize hash links for nfsnodes + * and build nfsnode free list. + */ +void +nfs_nhinit() +{ + nfsnode_zone = zinit("NFSNODE", sizeof(struct nfsnode), 0, 0, 1); + nfsnodehashtbl = hashinit(desiredvnodes, M_NFSHASH, &nfsnodehash); +} + +/* + * Compute an entry in the NFS hash table structure + */ +u_long +nfs_hash(fhp, fhsize) + register nfsfh_t *fhp; + int fhsize; +{ + register u_char *fhpp; + register u_long fhsum; + register int i; + + fhpp = &fhp->fh_bytes[0]; + fhsum = 0; + for (i = 0; i < fhsize; i++) + fhsum += *fhpp++; + return (fhsum); +} + +/* + * Look up a vnode/nfsnode by file handle. + * Callers must check for mount points!! + * In all cases, a pointer to a + * nfsnode structure is returned. + */ +static int nfs_node_hash_lock; + +int +nfs_nget(mntp, fhp, fhsize, npp) + struct mount *mntp; + register nfsfh_t *fhp; + int fhsize; + struct nfsnode **npp; +{ + struct proc *p = curproc; /* XXX */ + struct nfsnode *np, *np2; + struct nfsnodehashhead *nhpp; + register struct vnode *vp; + struct vnode *nvp; + int error; + +retry: + nhpp = NFSNOHASH(nfs_hash(fhp, fhsize)); +loop: + for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { + if (mntp != NFSTOV(np)->v_mount || np->n_fhsize != fhsize || + bcmp((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize)) + continue; + vp = NFSTOV(np); + if (vget(vp, LK_EXCLUSIVE, p)) + goto loop; + *npp = np; + return(0); + } + /* + * Obtain a lock to prevent a race condition if the getnewvnode() + * or MALLOC() below happens to block. + */ + if (nfs_node_hash_lock) { + while (nfs_node_hash_lock) { + nfs_node_hash_lock = -1; + tsleep(&nfs_node_hash_lock, PVM, "nfsngt", 0); + } + goto loop; + } + nfs_node_hash_lock = 1; + + /* + * Allocate before getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced + * elsewhere if zalloc should block. + */ + np = zalloc(nfsnode_zone); + + error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp); + if (error) { + if (nfs_node_hash_lock < 0) + wakeup(&nfs_node_hash_lock); + nfs_node_hash_lock = 0; + *npp = 0; + zfree(nfsnode_zone, np); + return (error); + } + vp = nvp; + bzero((caddr_t)np, sizeof *np); + vp->v_data = np; + np->n_vnode = vp; + /* + * Insert the nfsnode in the hash queue for its new file handle + */ + for (np2 = nhpp->lh_first; np2 != 0; np2 = np2->n_hash.le_next) { + if (mntp != NFSTOV(np2)->v_mount || np2->n_fhsize != fhsize || + bcmp((caddr_t)fhp, (caddr_t)np2->n_fhp, fhsize)) + continue; + vrele(vp); + if (nfs_node_hash_lock < 0) + wakeup(&nfs_node_hash_lock); + nfs_node_hash_lock = 0; + zfree(nfsnode_zone, np); + goto retry; + } + LIST_INSERT_HEAD(nhpp, np, n_hash); + if (fhsize > NFS_SMALLFH) { + MALLOC(np->n_fhp, nfsfh_t *, fhsize, M_NFSBIGFH, M_WAITOK); + } else + np->n_fhp = &np->n_fh; + bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize); + np->n_fhsize = fhsize; + lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE); + *npp = np; + + if (nfs_node_hash_lock < 0) + wakeup(&nfs_node_hash_lock); + nfs_node_hash_lock = 0; + + /* + * Lock the new nfsnode. + */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + + return (0); +} + +int +nfs_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + register struct nfsnode *np; + register struct sillyrename *sp; + struct proc *p = curproc; /* XXX */ + + np = VTONFS(ap->a_vp); + if (prtactive && ap->a_vp->v_usecount != 0) + vprint("nfs_inactive: pushing active", ap->a_vp); + if (ap->a_vp->v_type != VDIR) { + sp = np->n_sillyrename; + np->n_sillyrename = (struct sillyrename *)0; + } else + sp = (struct sillyrename *)0; + if (sp) { + /* + * We need a reference to keep the vnode from being + * recycled by getnewvnode while we do the I/O + * associated with discarding the buffers unless we + * are being forcibly unmounted in which case we already + * have our own reference. + */ + if (ap->a_vp->v_usecount > 0) + (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1); + else if (vget(ap->a_vp, 0, p)) + panic("nfs_inactive: lost vnode"); + else { + (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1); + vrele(ap->a_vp); + } + /* + * Remove the silly file that was rename'd earlier + */ + nfs_removeit(sp); + crfree(sp->s_cred); + vrele(sp->s_dvp); + FREE((caddr_t)sp, M_NFSREQ); + } + np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED | + NQNFSNONCACHE | NQNFSWRITE); + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + +/* + * Reclaim an nfsnode so that it can be used for other purposes. + */ +int +nfs_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register struct nfsmount *nmp = VFSTONFS(vp->v_mount); + register struct nfsdmap *dp, *dp2; + + if (prtactive && vp->v_usecount != 0) + vprint("nfs_reclaim: pushing active", vp); + + if (np->n_hash.le_prev != NULL) + LIST_REMOVE(np, n_hash); + + /* + * For nqnfs, take it off the timer queue as required. + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_timer.cqe_next != 0) { + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + } + + /* + * Free up any directory cookie structures and + * large file handle structures that might be associated with + * this nfs node. + */ + if (vp->v_type == VDIR) { + dp = np->n_cookies.lh_first; + while (dp) { + dp2 = dp; + dp = dp->ndm_list.le_next; + FREE((caddr_t)dp2, M_NFSDIROFF); + } + } + if (np->n_fhsize > NFS_SMALLFH) { + FREE((caddr_t)np->n_fhp, M_NFSBIGFH); + } + + cache_purge(vp); + zfree(nfsnode_zone, vp->v_data); + vp->v_data = (void *)0; + return (0); +} + +#if 0 +/* + * Lock an nfsnode + */ +int +nfs_lock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + /* + * Ugh, another place where interruptible mounts will get hung. + * If you make this sleep interruptible, then you have to fix all + * the VOP_LOCK() calls to expect interruptibility. + */ + while (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + (void) tsleep((caddr_t)vp, PINOD, "nfslck", 0); + } + if (vp->v_tag == VT_NON) + return (ENOENT); + +#if 0 + /* + * Only lock regular files. If a server crashed while we were + * holding a directory lock, we could easily end up sleeping + * until the server rebooted while holding a lock on the root. + * Locks are only needed for protecting critical sections in + * VMIO at the moment. + * New vnodes will have type VNON but they should be locked + * since they may become VREG. This is checked in loadattrcache + * and unwanted locks are released there. + */ + if (vp->v_type == VREG || vp->v_type == VNON) { + while (np->n_flag & NLOCKED) { + np->n_flag |= NWANTED; + (void) tsleep((caddr_t) np, PINOD, "nfslck2", 0); + /* + * If the vnode has transmuted into a VDIR while we + * were asleep, then skip the lock. + */ + if (vp->v_type != VREG && vp->v_type != VNON) + return (0); + } + np->n_flag |= NLOCKED; + } +#endif + + return (0); +} + +/* + * Unlock an nfsnode + */ +int +nfs_unlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + } */ *ap; +{ +#if 0 + struct vnode* vp = ap->a_vp; + struct nfsnode* np = VTONFS(vp); + + if (vp->v_type == VREG || vp->v_type == VNON) { + if (!(np->n_flag & NLOCKED)) + panic("nfs_unlock: nfsnode not locked"); + np->n_flag &= ~NLOCKED; + if (np->n_flag & NWANTED) { + np->n_flag &= ~NWANTED; + wakeup((caddr_t) np); + } + } +#endif + + return (0); +} + +/* + * Check for a locked nfsnode + */ +int +nfs_islocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + return VTONFS(ap->a_vp)->n_flag & NLOCKED ? 1 : 0; +} +#endif + diff --git a/sys/nfs/nfs_nqlease.c b/sys/nfs/nfs_nqlease.c new file mode 100644 index 0000000..ccaa848 --- /dev/null +++ b/sys/nfs/nfs_nqlease.c @@ -0,0 +1,1307 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_nqlease.c 8.9 (Berkeley) 5/20/95 + * $FreeBSD$ + */ + + +/* + * References: + * Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant + * Mechanism for Distributed File Cache Consistency", + * In Proc. of the Twelfth ACM Symposium on Operating Systems + * Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989. + * Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching + * in the Sprite Network File System", ACM TOCS 6(1), + * pages 134-154, February 1988. + * V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and + * Performance of Cache-Consistency Protocols", Digital + * Equipment Corporation WRL Research Report 89/5, May 1989. + */ +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> + +#include <vm/vm_zone.h> + +#include <netinet/in.h> +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> + +static MALLOC_DEFINE(M_NQMHOST, "NQNFS Host", "Nqnfs host address table"); + +time_t nqnfsstarttime = (time_t)0; +int nqsrv_clockskew = NQ_CLOCKSKEW; +int nqsrv_writeslack = NQ_WRITESLACK; +int nqsrv_maxlease = NQ_MAXLEASE; +#ifndef NFS_NOSERVER +static int nqsrv_maxnumlease = NQ_MAXNUMLEASE; +#endif + +struct vop_lease_args; + +#ifndef NFS_NOSERVER +static int nqsrv_cmpnam __P((struct nfssvc_sock *, struct sockaddr *, + struct nqhost *)); +static int nqnfs_vacated __P((struct vnode *vp, struct ucred *cred)); +static void nqsrv_addhost __P((struct nqhost *lph, struct nfssvc_sock *slp, + struct sockaddr *nam)); +static void nqsrv_instimeq __P((struct nqlease *lp, u_int32_t duration)); +static void nqsrv_locklease __P((struct nqlease *lp)); +static void nqsrv_send_eviction __P((struct vnode *vp, struct nqlease *lp, + struct nfssvc_sock *slp, + struct sockaddr *nam, + struct ucred *cred)); +static void nqsrv_unlocklease __P((struct nqlease *lp)); +static void nqsrv_waitfor_expiry __P((struct nqlease *lp)); +#endif +extern void nqnfs_lease_updatetime __P((int deltat)); + +/* + * Signifies which rpcs can have piggybacked lease requests + */ +int nqnfs_piggy[NFS_NPROCS] = { + 0, + 0, + ND_WRITE, + ND_READ, + 0, + ND_READ, + ND_READ, + ND_WRITE, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ND_READ, + ND_READ, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + +extern nfstype nfsv2_type[9]; +extern nfstype nfsv3_type[9]; +extern int nfsd_waiting; +extern struct nfsstats nfsstats; + +#define TRUE 1 +#define FALSE 0 + +#ifndef NFS_NOSERVER +/* + * Get or check for a lease for "vp", based on ND_CHECK flag. + * The rules are as follows: + * - if a current non-caching lease, reply non-caching + * - if a current lease for same host only, extend lease + * - if a read cachable lease and a read lease request + * add host to list any reply cachable + * - else { set non-cachable for read-write sharing } + * send eviction notice messages to all other hosts that have lease + * wait for lease termination { either by receiving vacated messages + * from all the other hosts or expiry + * via. timeout } + * modify lease to non-cachable + * - else if no current lease, issue new one + * - reply + * - return boolean TRUE iff nam should be m_freem()'d + * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep() + * in here must be framed by nqsrv_locklease() and nqsrv_unlocklease(). + * nqsrv_locklease() is coded such that at least one of LC_LOCKED and + * LC_WANTED is set whenever a process is tsleeping in it. The exception + * is when a new lease is being allocated, since it is not in the timer + * queue yet. (Ditto for the splsoftclock() and splx(s) calls) + */ +int +nqsrv_getlease(vp, duration, flags, slp, procp, nam, cachablep, frev, cred) + struct vnode *vp; + u_int32_t *duration; + int flags; + struct nfssvc_sock *slp; + struct proc *procp; + struct sockaddr *nam; + int *cachablep; + u_quad_t *frev; + struct ucred *cred; +{ + register struct nqlease *lp; + register struct nqfhhashhead *lpp = NULL; + register struct nqhost *lph = NULL; + struct nqlease *tlp; + struct nqm **lphp; + struct vattr vattr; + fhandle_t fh; + int i, ok, error, s; + + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) + return (0); + if (*duration > nqsrv_maxlease) + *duration = nqsrv_maxlease; + error = VOP_GETATTR(vp, &vattr, cred, procp); + if (error) + return (error); + *frev = vattr.va_filerev; + s = splsoftclock(); + tlp = vp->v_lease; + if ((flags & ND_CHECK) == 0) + nfsstats.srvnqnfs_getleases++; + if (tlp == 0) { + /* + * Find the lease by searching the hash list. + */ + fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(vp, &fh.fh_fid); + if (error) { + splx(s); + return (error); + } + lpp = NQFHHASH(fh.fh_fid.fid_data); + for (lp = lpp->lh_first; lp != 0; lp = lp->lc_hash.le_next) + if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] && + fh.fh_fsid.val[1] == lp->lc_fsid.val[1] && + !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata, + fh.fh_fid.fid_len - sizeof (int32_t))) { + /* Found it */ + lp->lc_vp = vp; + vp->v_lease = lp; + tlp = lp; + break; + } + } else + lp = tlp; + if (lp != 0) { + if ((lp->lc_flag & LC_NONCACHABLE) || + (lp->lc_morehosts == (struct nqm *)0 && + nqsrv_cmpnam(slp, nam, &lp->lc_host))) + goto doreply; + if ((flags & ND_READ) && (lp->lc_flag & LC_WRITE) == 0) { + if (flags & ND_CHECK) + goto doreply; + if (nqsrv_cmpnam(slp, nam, &lp->lc_host)) + goto doreply; + i = 0; + if (lp->lc_morehosts) { + lph = lp->lc_morehosts->lpm_hosts; + lphp = &lp->lc_morehosts->lpm_next; + ok = 1; + } else { + lphp = &lp->lc_morehosts; + ok = 0; + } + while (ok && (lph->lph_flag & LC_VALID)) { + if (nqsrv_cmpnam(slp, nam, lph)) + goto doreply; + if (++i == LC_MOREHOSTSIZ) { + i = 0; + if (*lphp) { + lph = (*lphp)->lpm_hosts; + lphp = &((*lphp)->lpm_next); + } else + ok = 0; + } else + lph++; + } + nqsrv_locklease(lp); + if (!ok) { + *lphp = (struct nqm *) + malloc(sizeof (struct nqm), + M_NQMHOST, M_WAITOK); + bzero((caddr_t)*lphp, sizeof (struct nqm)); + lph = (*lphp)->lpm_hosts; + } + nqsrv_addhost(lph, slp, nam); + nqsrv_unlocklease(lp); + } else { + lp->lc_flag |= LC_NONCACHABLE; + nqsrv_locklease(lp); + nqsrv_send_eviction(vp, lp, slp, nam, cred); + nqsrv_waitfor_expiry(lp); + nqsrv_unlocklease(lp); + } +doreply: + /* + * Update the lease and return + */ + if ((flags & ND_CHECK) == 0) + nqsrv_instimeq(lp, *duration); + if (lp->lc_flag & LC_NONCACHABLE) + *cachablep = 0; + else { + *cachablep = 1; + if (flags & ND_WRITE) + lp->lc_flag |= LC_WRITTEN; + } + splx(s); + return (0); + } + splx(s); + if (flags & ND_CHECK) + return (0); + + /* + * Allocate new lease + * The value of nqsrv_maxnumlease should be set generously, so that + * the following "printf" happens infrequently. + */ + if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) { + printf("Nqnfs server, too many leases\n"); + do { + (void) tsleep((caddr_t)&lbolt, PSOCK, + "nqsrvnuml", 0); + } while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease); + } + MALLOC(lp, struct nqlease *, sizeof (struct nqlease), M_NQLEASE, M_WAITOK); + bzero((caddr_t)lp, sizeof (struct nqlease)); + if (flags & ND_WRITE) + lp->lc_flag |= (LC_WRITE | LC_WRITTEN); + nqsrv_addhost(&lp->lc_host, slp, nam); + lp->lc_vp = vp; + lp->lc_fsid = fh.fh_fsid; + bcopy(fh.fh_fid.fid_data, lp->lc_fiddata, + fh.fh_fid.fid_len - sizeof (int32_t)); + if(!lpp) + panic("nfs_nqlease.c: Phoney lpp"); + LIST_INSERT_HEAD(lpp, lp, lc_hash); + vp->v_lease = lp; + s = splsoftclock(); + nqsrv_instimeq(lp, *duration); + splx(s); + *cachablep = 1; + if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases) + nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases; + return (0); +} + +/* + * Local lease check for server syscalls. + * Just set up args and let nqsrv_getlease() do the rest. + * nqnfs_vop_lease_check() is the VOP_LEASE() form of the same routine. + * Ifdef'd code in nfsnode.h renames these routines to whatever a particular + * OS needs. + */ +void +nqnfs_lease_check(vp, p, cred, flag) + struct vnode *vp; + struct proc *p; + struct ucred *cred; + int flag; +{ + u_int32_t duration = 0; + int cache; + u_quad_t frev; + + (void) nqsrv_getlease(vp, &duration, ND_CHECK | flag, NQLOCALSLP, + p, (struct sockaddr *)0, &cache, &frev, cred); +} + +int +nqnfs_vop_lease_check(ap) + struct vop_lease_args /* { + struct vnode *a_vp; + struct proc *a_p; + struct ucred *a_cred; + int a_flag; + } */ *ap; +{ + u_int32_t duration = 0; + int cache; + u_quad_t frev; + + (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, + NQLOCALSLP, ap->a_p, (struct sockaddr *)0, + &cache, &frev, ap->a_cred); + return (0); +} + + +/* + * Add a host to an nqhost structure for a lease. + */ +static void +nqsrv_addhost(lph, slp, nam) + register struct nqhost *lph; + struct nfssvc_sock *slp; + struct sockaddr *nam; +{ + struct sockaddr_in *saddr; + struct socket *nsso; + + if (slp == NQLOCALSLP) { + lph->lph_flag |= (LC_VALID | LC_LOCAL); + return; + } + nsso = slp->ns_so; + if (nsso && nsso->so_proto->pr_protocol == IPPROTO_UDP) { + saddr = (struct sockaddr_in *)nam; + lph->lph_flag |= (LC_VALID | LC_UDP); + lph->lph_inetaddr = saddr->sin_addr.s_addr; + lph->lph_port = saddr->sin_port; +#ifdef ISO + } else if (nsso && nsso->so_proto->pr_protocol == ISOPROTO_CLTP) { + lph->lph_nam = dup_sockaddr(nam, 1); + lph->lph_flag |= (LC_VALID | LC_CLTP); +#endif + } else { + lph->lph_flag |= (LC_VALID | LC_SREF); + lph->lph_slp = slp; + slp->ns_sref++; + } +} + +/* + * Update the lease expiry time and position it in the timer queue correctly. + */ +static void +nqsrv_instimeq(lp, duration) + register struct nqlease *lp; + u_int32_t duration; +{ + register struct nqlease *tlp; + time_t newexpiry; + + newexpiry = time_second + duration + nqsrv_clockskew; + if (lp->lc_expiry == newexpiry) + return; + if (lp->lc_timer.cqe_next != 0) { + CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); + } + lp->lc_expiry = newexpiry; + + /* + * Find where in the queue it should be. + */ + tlp = nqtimerhead.cqh_last; + while (tlp != (void *)&nqtimerhead && tlp->lc_expiry > newexpiry) + tlp = tlp->lc_timer.cqe_prev; +#ifdef HASNVRAM + if (tlp == nqtimerhead.cqh_last) + NQSTORENOVRAM(newexpiry); +#endif /* HASNVRAM */ + if (tlp == (void *)&nqtimerhead) { + CIRCLEQ_INSERT_HEAD(&nqtimerhead, lp, lc_timer); + } else { + CIRCLEQ_INSERT_AFTER(&nqtimerhead, tlp, lp, lc_timer); + } +} + +/* + * Compare the requesting host address with the lph entry in the lease. + * Return true iff it is the same. + * This is somewhat messy due to the union in the nqhost structure. + * The local host is indicated by the special value of NQLOCALSLP for slp. + */ +static int +nqsrv_cmpnam(slp, nam, lph) + register struct nfssvc_sock *slp; + struct sockaddr *nam; + register struct nqhost *lph; +{ + register struct sockaddr_in *saddr; + struct sockaddr *addr; + union nethostaddr lhaddr; + struct socket *nsso; + int ret; + + if (slp == NQLOCALSLP) { + if (lph->lph_flag & LC_LOCAL) + return (1); + else + return (0); + } + nsso = slp->ns_so; + if (nsso && nsso->so_proto->pr_protocol == IPPROTO_UDP) { + addr = nam; +#ifdef ISO + } else if (nsso && nsso->so_proto->pr_protocol == ISOPROTO_CLTP) { + addr = nam; +#endif + } else { + addr = slp->ns_nam; + } + if (lph->lph_flag & LC_UDP) { + ret = netaddr_match(AF_INET, &lph->lph_haddr, addr); + } else if (lph->lph_flag & LC_CLTP) { + ret = netaddr_match(AF_ISO, &lph->lph_claddr, addr); + } else { + if ((lph->lph_slp->ns_flag & SLP_VALID) == 0) + return (0); + saddr = (struct sockaddr_in *)lph->lph_slp->ns_nam; + if (saddr->sin_family == AF_INET) + lhaddr.had_inetaddr = saddr->sin_addr.s_addr; + else + lhaddr.had_nam = lph->lph_slp->ns_nam; + ret = netaddr_match(saddr->sin_family, &lhaddr, addr); + } + return (ret); +} + +/* + * Send out eviction notice messages to all other hosts for the lease. + */ +static void +nqsrv_send_eviction(vp, lp, slp, nam, cred) + struct vnode *vp; + register struct nqlease *lp; + struct nfssvc_sock *slp; + struct sockaddr *nam; + struct ucred *cred; +{ + register struct nqhost *lph = &lp->lc_host; + register int siz; + struct nqm *lphnext = lp->lc_morehosts; + struct mbuf *m, *mreq, *mb, *mb2, *mheadend; + struct socket *so; + struct sockaddr *nam2; + struct sockaddr_in *saddr; + nfsfh_t nfh; + fhandle_t *fhp; + caddr_t bpos, cp; + u_int32_t xid, *tl; + int len = 1, ok = 1, i = 0; + int sotype, *solockp; + + while (ok && (lph->lph_flag & LC_VALID)) { + if (nqsrv_cmpnam(slp, nam, lph)) + lph->lph_flag |= LC_VACATED; + else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { + if (lph->lph_flag & LC_UDP) { + MALLOC(nam2, struct sockaddr *, + sizeof *nam2, M_SONAME, M_WAITOK); + saddr = (struct sockaddr_in *)nam2; + saddr->sin_len = sizeof *saddr; + saddr->sin_family = AF_INET; + saddr->sin_addr.s_addr = lph->lph_inetaddr; + saddr->sin_port = lph->lph_port; + so = lph->lph_slp->ns_so; + } else if (lph->lph_flag & LC_CLTP) { + nam2 = lph->lph_nam; + so = lph->lph_slp->ns_so; + } else if (lph->lph_slp->ns_flag & SLP_VALID) { + nam2 = (struct sockaddr *)0; + so = lph->lph_slp->ns_so; + } else + goto nextone; + sotype = so->so_type; + if (so->so_proto->pr_flags & PR_CONNREQUIRED) + solockp = &lph->lph_slp->ns_solock; + else + solockp = (int *)0; + nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED, + NFSX_V3FH + NFSX_UNSIGNED); + fhp = &nfh.fh_generic; + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; + VFS_VPTOFH(vp, &fhp->fh_fid); + nfsm_srvfhtom(fhp, 1); + m = mreq; + siz = 0; + while (m) { + siz += m->m_len; + m = m->m_next; + } + if (siz <= 0 || siz > NFS_MAXPACKET) { + printf("mbuf siz=%d\n",siz); + panic("Bad nfs svc reply"); + } + m = nfsm_rpchead(cred, (NFSMNT_NFSV3 | NFSMNT_NQNFS), + NQNFSPROC_EVICTED, + RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0, + 0, (char *)NULL, mreq, siz, &mheadend, &xid); + /* + * For stream protocols, prepend a Sun RPC + * Record Mark. + */ + if (sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_int32_t *) = htonl(0x80000000 | + (m->m_pkthdr.len - NFSX_UNSIGNED)); + } + /* + * nfs_sndlock if PR_CONNREQUIRED XXX + */ + + if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 && + (lph->lph_slp->ns_flag & SLP_VALID) == 0) || + (nfs_slplock(lph->lph_slp, 0) == 0)) + m_freem(m); + else { + (void) nfs_send(so, nam2, m, + (struct nfsreq *)0); + if (solockp) + nfs_slpunlock(lph->lph_slp); + } + if (lph->lph_flag & LC_UDP) + FREE(nam2, M_SONAME); + } +nextone: + if (++i == len) { + if (lphnext) { + i = 0; + len = LC_MOREHOSTSIZ; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } +} + +/* + * Wait for the lease to expire. + * This will occur when all clients have sent "vacated" messages to + * this server OR when it expires do to timeout. + */ +static void +nqsrv_waitfor_expiry(lp) + register struct nqlease *lp; +{ + register struct nqhost *lph; + register int i; + struct nqm *lphnext; + int len, ok; + +tryagain: + if (time_second > lp->lc_expiry) + return; + lph = &lp->lc_host; + lphnext = lp->lc_morehosts; + len = 1; + i = 0; + ok = 1; + while (ok && (lph->lph_flag & LC_VALID)) { + if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { + lp->lc_flag |= LC_EXPIREDWANTED; + (void) tsleep((caddr_t)&lp->lc_flag, PSOCK, + "nqexp", 0); + goto tryagain; + } + if (++i == len) { + if (lphnext) { + i = 0; + len = LC_MOREHOSTSIZ; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } +} + +/* + * Nqnfs server timer that maintains the server lease queue. + * Scan the lease queue for expired entries: + * - when one is found, wakeup anyone waiting for it + * else dequeue and free + */ +void +nqnfs_serverd() +{ + register struct nqlease *lp; + register struct nqhost *lph; + struct nqlease *nextlp; + struct nqm *lphnext, *olphnext; + int i, len, ok; + + for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; + lp = nextlp) { + if (lp->lc_expiry >= time_second) + break; + nextlp = lp->lc_timer.cqe_next; + if (lp->lc_flag & LC_EXPIREDWANTED) { + lp->lc_flag &= ~LC_EXPIREDWANTED; + wakeup((caddr_t)&lp->lc_flag); + } else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) { + /* + * Make a best effort at keeping a write caching lease long + * enough by not deleting it until it has been explicitly + * vacated or there have been no writes in the previous + * write_slack seconds since expiry and the nfsds are not + * all busy. The assumption is that if the nfsds are not + * all busy now (no queue of nfs requests), then the client + * would have been able to do at least one write to the + * file during the last write_slack seconds if it was still + * trying to push writes to the server. + */ + if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE && + ((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) { + lp->lc_flag &= ~LC_WRITTEN; + nqsrv_instimeq(lp, nqsrv_writeslack); + } else { + CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); + LIST_REMOVE(lp, lc_hash); + /* + * This soft reference may no longer be valid, but + * no harm done. The worst case is if the vnode was + * recycled and has another valid lease reference, + * which is dereferenced prematurely. + */ + lp->lc_vp->v_lease = (struct nqlease *)0; + lph = &lp->lc_host; + lphnext = lp->lc_morehosts; + olphnext = (struct nqm *)0; + len = 1; + i = 0; + ok = 1; + while (ok && (lph->lph_flag & LC_VALID)) { + if (lph->lph_flag & LC_CLTP) + FREE(lph->lph_nam, M_SONAME); + if (lph->lph_flag & LC_SREF) + nfsrv_slpderef(lph->lph_slp); + if (++i == len) { + if (olphnext) { + free((caddr_t)olphnext, M_NQMHOST); + olphnext = (struct nqm *)0; + } + if (lphnext) { + olphnext = lphnext; + i = 0; + len = LC_MOREHOSTSIZ; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } + FREE((caddr_t)lp, M_NQLEASE); + if (olphnext) + free((caddr_t)olphnext, M_NQMHOST); + nfsstats.srvnqnfs_leases--; + } + } + } +} + +/* + * Called from nfssvc_nfsd() for a getlease rpc request. + * Do the from/to xdr translation and call nqsrv_getlease() to + * do the real work. + */ +int +nqnfsrv_getlease(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register struct nfs_fattr *fp; + struct vattr va; + register struct vattr *vap = &va; + struct vnode *vp; + nfsfh_t nfh; + fhandle_t *fhp; + register u_int32_t *tl; + register int32_t t1; + u_quad_t frev; + caddr_t bpos; + int error = 0; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + int flags, rdonly, cache; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + flags = fxdr_unsigned(int, *tl++); + nfsd->nd_duration = fxdr_unsigned(int, *tl); + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, + (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(0); + goto nfsmout; + } + if (rdonly && flags == ND_WRITE) { + error = EROFS; + vput(vp); + nfsm_reply(0); + } + (void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, slp, procp, + nam, &cache, &frev, cred); + error = VOP_GETATTR(vp, vap, cred, procp); + vput(vp); + nfsm_reply(NFSX_V3FATTR + 4 * NFSX_UNSIGNED); + nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(cache); + *tl++ = txdr_unsigned(nfsd->nd_duration); + txdr_hyper(frev, tl); + nfsm_build(fp, struct nfs_fattr *, NFSX_V3FATTR); + nfsm_srvfillattr(vap, fp); + nfsm_srvdone; +} + +/* + * Called from nfssvc_nfsd() when a "vacated" message is received from a + * client. Find the entry and expire it. + */ +int +nqnfsrv_vacated(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + register struct nqlease *lp; + register struct nqhost *lph; + struct nqlease *tlp = (struct nqlease *)0; + nfsfh_t nfh; + fhandle_t *fhp; + register u_int32_t *tl; + register int32_t t1; + struct nqm *lphnext; + struct mbuf *mreq, *mb; + int error = 0, i, len, ok, gotit = 0, cache = 0; + char *cp2, *bpos; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + m_freem(mrep); + /* + * Find the lease by searching the hash list. + */ + for (lp = NQFHHASH(fhp->fh_fid.fid_data)->lh_first; lp != 0; + lp = lp->lc_hash.le_next) + if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] && + fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] && + !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata, + MAXFIDSZ)) { + /* Found it */ + tlp = lp; + break; + } + if (tlp != 0) { + lp = tlp; + len = 1; + i = 0; + lph = &lp->lc_host; + lphnext = lp->lc_morehosts; + ok = 1; + while (ok && (lph->lph_flag & LC_VALID)) { + if (nqsrv_cmpnam(slp, nam, lph)) { + lph->lph_flag |= LC_VACATED; + gotit++; + break; + } + if (++i == len) { + if (lphnext) { + len = LC_MOREHOSTSIZ; + i = 0; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } + if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) { + lp->lc_flag &= ~LC_EXPIREDWANTED; + wakeup((caddr_t)&lp->lc_flag); + } +nfsmout: + return (EPERM); + } + return (EPERM); +} + +#endif /* NFS_NOSERVER */ + +/* + * Client get lease rpc function. + */ +int +nqnfs_getlease(vp, rwflag, cred, p) + register struct vnode *vp; + int rwflag; + struct ucred *cred; + struct proc *p; +{ + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + register struct nfsnode *np; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + caddr_t bpos, dpos, cp2; + time_t reqtime; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int cachable; + u_quad_t frev; + + nfsstats.rpccnt[NQNFSPROC_GETLEASE]++; + mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_V3FH+2*NFSX_UNSIGNED, + &bpos); + nfsm_fhtom(vp, 1); + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(rwflag); + *tl = txdr_unsigned(nmp->nm_leaseterm); + reqtime = time_second; + nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred); + np = VTONFS(vp); + nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + cachable = fxdr_unsigned(int, *tl++); + reqtime += fxdr_unsigned(int, *tl++); + if (reqtime > time_second) { + frev = fxdr_hyper(tl); + nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev); + nfsm_loadattr(vp, (struct vattr *)0); + } else + error = NQNFS_EXPIRED; + nfsm_reqdone; + return (error); +} + +#ifndef NFS_NOSERVER +/* + * Client vacated message function. + */ +static int +nqnfs_vacated(vp, cred) + register struct vnode *vp; + struct ucred *cred; +{ + register caddr_t cp; + register int i; + register u_int32_t *tl; + register int32_t t2; + caddr_t bpos; + u_int32_t xid; + int error = 0; + struct mbuf *m, *mreq, *mb, *mb2, *mheadend; + struct nfsmount *nmp; + struct nfsreq myrep; + + nmp = VFSTONFS(vp->v_mount); + nfsstats.rpccnt[NQNFSPROC_VACATED]++; + nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_FH(1)); + nfsm_fhtom(vp, 1); + m = mreq; + i = 0; + while (m) { + i += m->m_len; + m = m->m_next; + } + m = nfsm_rpchead(cred, nmp->nm_flag, NQNFSPROC_VACATED, + RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0, + 0, (char *)NULL, mreq, i, &mheadend, &xid); + if (nmp->nm_sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_int32_t *) = htonl(0x80000000 | (m->m_pkthdr.len - + NFSX_UNSIGNED)); + } + myrep.r_flags = 0; + myrep.r_nmp = nmp; + if (nmp->nm_soflags & PR_CONNREQUIRED) + (void) nfs_sndlock(&myrep); + (void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep); + if (nmp->nm_soflags & PR_CONNREQUIRED) + nfs_sndunlock(&myrep); +nfsmout: + return (error); +} + +/* + * Called for client side callbacks + */ +int +nqnfs_callback(nmp, mrep, md, dpos) + struct nfsmount *nmp; + struct mbuf *mrep, *md; + caddr_t dpos; +{ + register struct vnode *vp; + register u_int32_t *tl; + register int32_t t1; + nfsfh_t nfh; + fhandle_t *fhp; + struct nfsnode *np; + struct nfsd tnfsd; + struct nfssvc_sock *slp; + struct nfsrv_descript ndesc; + register struct nfsrv_descript *nfsd = &ndesc; + struct mbuf **mrq = (struct mbuf **)0, *mb, *mreq; + int error = 0, cache = 0; + char *cp2, *bpos; + u_quad_t frev; + +#ifndef nolint + slp = NULL; +#endif + nfsd->nd_mrep = mrep; + nfsd->nd_md = md; + nfsd->nd_dpos = dpos; + error = nfs_getreq(nfsd, &tnfsd, FALSE); + if (error) + return (error); + md = nfsd->nd_md; + dpos = nfsd->nd_dpos; + if (nfsd->nd_procnum != NQNFSPROC_EVICTED) { + m_freem(mrep); + return (EPERM); + } + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + m_freem(mrep); + error = nfs_nget(nmp->nm_mountp, (nfsfh_t *)fhp, NFSX_V3FH, &np); + if (error) + return (error); + vp = NFSTOV(np); + if (np->n_timer.cqe_next != 0) { + np->n_expiry = 0; + np->n_flag |= NQNFSEVICTED; + if (nmp->nm_timerhead.cqh_first != np) { + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); + } + } + vput(vp); + nfsm_srvdone; +} + + +/* + * Nqnfs client helper daemon. Runs once a second to expire leases. + * It also get authorization strings for "kerb" mounts. + * It must start at the beginning of the list again after any potential + * "sleep" since nfs_reclaim() called from vclean() can pull a node off + * the list asynchronously. + */ +int +nqnfs_clientd(nmp, cred, ncd, flag, argp, p) + register struct nfsmount *nmp; + struct ucred *cred; + struct nfsd_cargs *ncd; + int flag; + caddr_t argp; + struct proc *p; +{ + register struct nfsnode *np; + struct vnode *vp; + struct nfsreq myrep; + struct nfsuid *nuidp, *nnuidp; + int error = 0, vpid; + + /* + * First initialize some variables + */ + + /* + * If an authorization string is being passed in, get it. + */ + if ((flag & NFSSVC_GOTAUTH) && + (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_DISMNT)) == 0) { + if (nmp->nm_state & NFSSTA_HASAUTH) + panic("cld kerb"); + if ((flag & NFSSVC_AUTHINFAIL) == 0) { + if (ncd->ncd_authlen <= nmp->nm_authlen && + ncd->ncd_verflen <= nmp->nm_verflen && + !copyin(ncd->ncd_authstr,nmp->nm_authstr,ncd->ncd_authlen)&& + !copyin(ncd->ncd_verfstr,nmp->nm_verfstr,ncd->ncd_verflen)){ + nmp->nm_authtype = ncd->ncd_authtype; + nmp->nm_authlen = ncd->ncd_authlen; + nmp->nm_verflen = ncd->ncd_verflen; +#ifdef NFSKERB + nmp->nm_key = ncd->ncd_key; +#endif + } else + nmp->nm_state |= NFSSTA_AUTHERR; + } else + nmp->nm_state |= NFSSTA_AUTHERR; + nmp->nm_state |= NFSSTA_HASAUTH; + wakeup((caddr_t)&nmp->nm_authlen); + } else + nmp->nm_state |= NFSSTA_WAITAUTH; + + /* + * Loop every second updating queue until there is a termination sig. + */ + while ((nmp->nm_state & NFSSTA_DISMNT) == 0) { + if (nmp->nm_flag & NFSMNT_NQNFS) { + /* + * If there are no outstanding requests (and therefore no + * processes in nfs_reply) and there is data in the receive + * queue, poke for callbacks. + */ + if (nfs_reqq.tqh_first == 0 && nmp->nm_so && + nmp->nm_so->so_rcv.sb_cc > 0) { + myrep.r_flags = R_GETONEREP; + myrep.r_nmp = nmp; + myrep.r_mrep = (struct mbuf *)0; + myrep.r_procp = (struct proc *)0; + (void) nfs_reply(&myrep); + } + + /* + * Loop through the leases, updating as required. + */ + np = nmp->nm_timerhead.cqh_first; + while (np != (void *)&nmp->nm_timerhead && + (nmp->nm_state & NFSSTA_DISMINPROG) == 0) { + vp = NFSTOV(np); + vpid = vp->v_id; + if (np->n_expiry < time_second) { + if (vget(vp, LK_EXCLUSIVE, p) == 0) { + nmp->nm_inprog = vp; + if (vpid == vp->v_id) { + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + np->n_timer.cqe_next = 0; + if (np->n_flag & (NMODIFIED | NQNFSEVICTED)) { + if (np->n_flag & NQNFSEVICTED) { + if (vp->v_type == VDIR) + nfs_invaldir(vp); + cache_purge(vp); + (void) nfs_vinvalbuf(vp, + V_SAVE, cred, p, 0); + np->n_flag &= ~NQNFSEVICTED; + (void) nqnfs_vacated(vp, cred); + } else if (vp->v_type == VREG) { + (void) VOP_FSYNC(vp, cred, + MNT_WAIT, p); + np->n_flag &= ~NMODIFIED; + } + } + } + vput(vp); + nmp->nm_inprog = NULLVP; + } + } else if ((np->n_expiry - NQ_RENEWAL) < time_second) { + if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE)) + == NQNFSWRITE && + !TAILQ_EMPTY(&vp->v_dirtyblkhd) && + vget(vp, LK_EXCLUSIVE, p) == 0) { + nmp->nm_inprog = vp; + if (vpid == vp->v_id && + nqnfs_getlease(vp, ND_WRITE, cred, p)==0) + np->n_brev = np->n_lrev; + vput(vp); + nmp->nm_inprog = NULLVP; + } + } else + break; + if (np == nmp->nm_timerhead.cqh_first) + break; + np = nmp->nm_timerhead.cqh_first; + } + } + + /* + * Get an authorization string, if required. + */ + if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_DISMNT | NFSSTA_HASAUTH)) == 0) { + ncd->ncd_authuid = nmp->nm_authuid; + if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs))) + nmp->nm_state |= NFSSTA_WAITAUTH; + else + return (ENEEDAUTH); + } + + /* + * Wait a bit (no pun) and do it again. + */ + if ((nmp->nm_state & NFSSTA_DISMNT) == 0 && + (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) { + error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH, + "nqnfstimr", hz / 3); + if (error == EINTR || error == ERESTART) + (void) dounmount(nmp->nm_mountp, 0, p); + } + } + + /* + * Finally, we can free up the mount structure. + */ + for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + free((caddr_t)nuidp, M_NFSUID); + } + zfree(nfsmount_zone, nmp); + if (error == EWOULDBLOCK) + error = 0; + return (error); +} + +#endif /* NFS_NOSERVER */ + +/* + * Adjust all timer queue expiry times when the time of day clock is changed. + * Called from the settimeofday() syscall. + */ +void +nqnfs_lease_updatetime(deltat) + register int deltat; +{ + struct proc *p = curproc; /* XXX */ + struct nqlease *lp; + struct nfsnode *np; + struct mount *mp, *nxtmp; + struct nfsmount *nmp; + int s; + + if (nqnfsstarttime != 0) + nqnfsstarttime += deltat; + s = splsoftclock(); + for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; + lp = lp->lc_timer.cqe_next) + lp->lc_expiry += deltat; + splx(s); + + /* + * Search the mount list for all nqnfs mounts and do their timer + * queues. + */ + simple_lock(&mountlist_slock); + for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nxtmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nxtmp = TAILQ_NEXT(mp, mnt_list); + continue; + } + if (mp->mnt_stat.f_type == nfs_mount_type) { + nmp = VFSTONFS(mp); + if (nmp->nm_flag & NFSMNT_NQNFS) { + for (np = nmp->nm_timerhead.cqh_first; + np != (void *)&nmp->nm_timerhead; + np = np->n_timer.cqe_next) { + np->n_expiry += deltat; + } + } + } + simple_lock(&mountlist_slock); + nxtmp = TAILQ_NEXT(mp, mnt_list); + vfs_unbusy(mp, p); + } + simple_unlock(&mountlist_slock); +} + +#ifndef NFS_NOSERVER +/* + * Lock a server lease. + */ +static void +nqsrv_locklease(lp) + struct nqlease *lp; +{ + + while (lp->lc_flag & LC_LOCKED) { + lp->lc_flag |= LC_WANTED; + (void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0); + } + lp->lc_flag |= LC_LOCKED; + lp->lc_flag &= ~LC_WANTED; +} + +/* + * Unlock a server lease. + */ +static void +nqsrv_unlocklease(lp) + struct nqlease *lp; +{ + + lp->lc_flag &= ~LC_LOCKED; + if (lp->lc_flag & LC_WANTED) + wakeup((caddr_t)lp); +} +#endif /* NFS_NOSERVER */ + +/* + * Update a client lease. + */ +void +nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev) + register struct nfsmount *nmp; + register struct nfsnode *np; + int rwflag, cachable; + time_t expiry; + u_quad_t frev; +{ + register struct nfsnode *tp; + + if (np->n_timer.cqe_next != 0) { + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + if (rwflag == ND_WRITE) + np->n_flag |= NQNFSWRITE; + } else if (rwflag == ND_READ) + np->n_flag &= ~NQNFSWRITE; + else + np->n_flag |= NQNFSWRITE; + if (cachable) + np->n_flag &= ~NQNFSNONCACHE; + else + np->n_flag |= NQNFSNONCACHE; + np->n_expiry = expiry; + np->n_lrev = frev; + tp = nmp->nm_timerhead.cqh_last; + while (tp != (void *)&nmp->nm_timerhead && tp->n_expiry > np->n_expiry) + tp = tp->n_timer.cqe_prev; + if (tp == (void *)&nmp->nm_timerhead) { + CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); + } else { + CIRCLEQ_INSERT_AFTER(&nmp->nm_timerhead, tp, np, n_timer); + } +} diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c new file mode 100644 index 0000000..0d28670 --- /dev/null +++ b/sys/nfs/nfs_serv.c @@ -0,0 +1,4068 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_serv.c 8.8 (Berkeley) 7/31/95 + * $FreeBSD$ + */ + +/* + * nfs version 2 and 3 server calls to vnode ops + * - these routines generally have 3 phases + * 1 - break down and validate rpc request in mbuf list + * 2 - do the vnode ops for the request + * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c) + * 3 - build the rpc reply in an mbuf list + * nb: + * - do not mix the phases, since the nfsm_?? macros can return failures + * on a bad rpc or similar and do not do any vrele() or vput()'s + * + * - the nfsm_reply() macro generates an nfs rpc reply with the nfs + * error number iff error != 0 whereas + * returning an error from the server function implies a fatal error + * such as a badly constructed rpc request that should be dropped without + * a reply. + * For Version 3, nfsm_reply() does not return for the error case, since + * most version 3 rpcs return more than the status for error cases. + * + * Other notes: + * Warning: always pay careful attention to resource cleanup on return + * and note that nfsm_*() macros can terminate a procedure on certain + * errors. + * + * lookup() and namei() + * may return garbage in various structural fields/return elements + * if an error is returned, and may garbage up nd.ni_dvp even if no + * error is returned and you did not request LOCKPARENT or WANTPARENT. + * + * We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name + * buffer has been freed or not. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/namei.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/dirent.h> +#include <sys/stat.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/buf.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_zone.h> +#include <vm/vm_object.h> + +#include <nfs/nfsproto.h> +#include <nfs/rpcv2.h> +#include <nfs/nfs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nqnfs.h> + +#ifdef NFSRV_DEBUG +#define nfsdbprintf(info) printf info +#else +#define nfsdbprintf(info) +#endif + +#define MAX_COMMIT_COUNT (1024 * 1024) + +#define NUM_HEURISTIC 64 +#define NHUSE_INIT 64 +#define NHUSE_INC 16 +#define NHUSE_MAX 2048 + +static struct nfsheur { + struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ + off_t nh_nextr; /* next offset for sequential detection */ + int nh_use; /* use count for selection */ + int nh_seqcount; /* heuristic */ +} nfsheur[NUM_HEURISTIC]; + +nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, + NFFIFO, NFNON }; +#ifndef NFS_NOSERVER +nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, + NFCHR, NFNON }; +/* Global vars */ +extern u_int32_t nfs_xdrneg1; +extern u_int32_t nfs_false, nfs_true; +extern enum vtype nv3tov_type[8]; +extern struct nfsstats nfsstats; + +int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000; +int nfsrvw_procrastinate_v3 = 0; + +static struct timeval nfsver = { 0 }; + +SYSCTL_DECL(_vfs_nfs); + +static int nfs_async; +SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, ""); +static int nfs_commit_blks; +static int nfs_commit_miss; +SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); +SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); + +static int nfsrv_access __P((struct vnode *,int,struct ucred *,int, + struct proc *, int)); +static void nfsrvw_coalesce __P((struct nfsrv_descript *, + struct nfsrv_descript *)); + +/* + * Clear nameidata fields that are tested in nsfmout cleanup code prior + * to using first nfsm macro (that might jump to the cleanup code). + */ + +static __inline +void +ndclear(struct nameidata *nd) +{ + nd->ni_cnd.cn_flags = 0; + nd->ni_vp = NULL; + nd->ni_dvp = NULL; + nd->ni_startdir = NULL; +} + +/* + * nfs v3 access service + */ +int +nfsrv3_access(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct vnode *vp = NULL; + nfsfh_t nfh; + fhandle_t *fhp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, getret; + char *cp2; + struct mbuf *mb, *mreq, *mb2; + struct vattr vattr, *vap = &vattr; + u_long testmode, nfsmode; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + cache = 0; +#endif + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, + (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, (struct vattr *)0); + error = 0; + goto nfsmout; + } + nfsmode = fxdr_unsigned(u_int32_t, *tl); + if ((nfsmode & NFSV3ACCESS_READ) && + nfsrv_access(vp, VREAD, cred, rdonly, procp, 0)) + nfsmode &= ~NFSV3ACCESS_READ; + if (vp->v_type == VDIR) + testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | + NFSV3ACCESS_DELETE); + else + testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); + if ((nfsmode & testmode) && + nfsrv_access(vp, VWRITE, cred, rdonly, procp, 0)) + nfsmode &= ~testmode; + if (vp->v_type == VDIR) + testmode = NFSV3ACCESS_LOOKUP; + else + testmode = NFSV3ACCESS_EXECUTE; + if ((nfsmode & testmode) && + nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0)) + nfsmode &= ~testmode; + getret = VOP_GETATTR(vp, vap, cred, procp); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, vap); + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(nfsmode); +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * nfs getattr service + */ +int +nfsrv_getattr(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register struct nfs_fattr *fp; + struct vattr va; + register struct vattr *vap = &va; + struct vnode *vp = NULL; + nfsfh_t nfh; + fhandle_t *fhp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(0); + error = 0; + goto nfsmout; + } + nqsrv_getl(vp, ND_READ); + error = VOP_GETATTR(vp, vap, cred, procp); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); + if (error) { + error = 0; + goto nfsmout; + } + nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); + nfsm_srvfillattr(vap, fp); + /* fall through */ + +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * nfs setattr service + */ +int +nfsrv_setattr(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct vattr va, preat; + register struct vattr *vap = &va; + register struct nfsv2_sattr *sp; + register struct nfs_fattr *fp; + struct vnode *vp = NULL; + nfsfh_t nfh; + fhandle_t *fhp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + u_quad_t frev; + struct timespec guard; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + VATTR_NULL(vap); + if (v3) { + nfsm_srvsattr(vap); + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + gcheck = fxdr_unsigned(int, *tl); + if (gcheck) { + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + fxdr_nfsv3time(tl, &guard); + } + } else { + nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + /* + * Nah nah nah nah na nah + * There is a bug in the Sun client that puts 0xffff in the mode + * field of sattr when it should put in 0xffffffff. The u_short + * doesn't sign extend. + * --> check the low order 2 bytes for 0xffff + */ + if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) + vap->va_mode = nfstov_mode(sp->sa_mode); + if (sp->sa_uid != nfs_xdrneg1) + vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid); + if (sp->sa_gid != nfs_xdrneg1) + vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid); + if (sp->sa_size != nfs_xdrneg1) + vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size); + if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) { +#ifdef notyet + fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime); +#else + vap->va_atime.tv_sec = + fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec); + vap->va_atime.tv_nsec = 0; +#endif + } + if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) + fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime); + + } + + /* + * Now that we have all the fields, lets do it. + */ + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, + (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); + error = 0; + goto nfsmout; + } + + /* + * vp now an active resource, pay careful attention to cleanup + */ + + nqsrv_getl(vp, ND_WRITE); + if (v3) { + error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp); + if (!error && gcheck && + (preat.va_ctime.tv_sec != guard.tv_sec || + preat.va_ctime.tv_nsec != guard.tv_nsec)) + error = NFSERR_NOT_SYNC; + if (error) { + vput(vp); + vp = NULL; + nfsm_reply(NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); + error = 0; + goto nfsmout; + } + } + + /* + * If the size is being changed write acces is required, otherwise + * just check for a read only file system. + */ + if (vap->va_size == ((u_quad_t)((quad_t) -1))) { + if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { + error = EROFS; + goto out; + } + } else { + if (vp->v_type == VDIR) { + error = EISDIR; + goto out; + } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly, + procp, 0)) != 0) + goto out; + } + error = VOP_SETATTR(vp, vap, cred, procp); + postat_ret = VOP_GETATTR(vp, vap, cred, procp); + if (!error) + error = postat_ret; +out: + vput(vp); + vp = NULL; + nfsm_reply(NFSX_WCCORFATTR(v3)); + if (v3) { + nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); + error = 0; + goto nfsmout; + } else { + nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); + nfsm_srvfillattr(vap, fp); + } + /* fall through */ + +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * nfs lookup rpc + */ +int +nfsrv_lookup(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register struct nfs_fattr *fp; + struct nameidata nd, ind, *ndp = &nd; + struct vnode *vp, *dirp = NULL; + nfsfh_t nfh; + fhandle_t *fhp; + register caddr_t cp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, cache, len, dirattr_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vattr va, dirattr, *vap = &va; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + ndclear(&nd); + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvnamesiz(len); + + pubflag = nfs_ispublicfh(fhp); + + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; + error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), pubflag); + + /* + * namei failure, only dirp to cleanup. Clear out garbarge from + * structure in case macros jump to nfsmout. + */ + + if (error) { + if (dirp) { + if (v3) + dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, + procp); + vrele(dirp); + dirp = NULL; + } + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(dirattr_ret, &dirattr); + error = 0; + goto nfsmout; + } + + /* + * Locate index file for public filehandle + * + * error is 0 on entry and 0 on exit from this block. + */ + + if (pubflag) { + if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) { + /* + * Setup call to lookup() to see if we can find + * the index file. Arguably, this doesn't belong + * in a kernel.. Ugh. If an error occurs, do not + * try to install an index file and then clear the + * error. + * + * When we replace nd with ind and redirect ndp, + * maintenance of ni_startdir and ni_vp shift to + * ind and we have to clean them up in the old nd. + * However, the cnd resource continues to be maintained + * via the original nd. Confused? You aren't alone! + */ + ind = nd; + VOP_UNLOCK(nd.ni_vp, 0, procp); + ind.ni_pathlen = strlen(nfs_pub.np_index); + ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf = + nfs_pub.np_index; + ind.ni_startdir = nd.ni_vp; + VREF(ind.ni_startdir); + + error = lookup(&ind); + ind.ni_dvp = NULL; + + if (error == 0) { + /* + * Found an index file. Get rid of + * the old references. transfer nd.ni_vp' + */ + if (dirp) + vrele(dirp); + dirp = nd.ni_vp; + nd.ni_vp = NULL; + vrele(nd.ni_startdir); + nd.ni_startdir = NULL; + ndp = &ind; + } + error = 0; + } + /* + * If the public filehandle was used, check that this lookup + * didn't result in a filehandle outside the publicly exported + * filesystem. We clear the poor vp here to avoid lockups due + * to NFS I/O. + */ + + if (ndp->ni_vp->v_mount != nfs_pub.np_mount) { + vput(nd.ni_vp); + nd.ni_vp = NULL; + error = EPERM; + } + } + + if (dirp) { + if (v3) + dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, + procp); + vrele(dirp); + dirp = NULL; + } + + /* + * Resources at this point: + * ndp->ni_vp may not be NULL + * + */ + + if (error) { + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(dirattr_ret, &dirattr); + error = 0; + goto nfsmout; + } + + nqsrv_getl(ndp->ni_startdir, ND_READ); + + /* + * Clear out some resources prior to potentially blocking. This + * is not as critical as ni_dvp resources in other routines, but + * it helps. + */ + vrele(ndp->ni_startdir); + ndp->ni_startdir = NULL; + NDFREE(&nd, NDF_ONLY_PNBUF); + + /* + * Get underlying attribute, then release remaining resources ( for + * the same potential blocking reason ) and reply. + */ + vp = ndp->ni_vp; + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(vp, &fhp->fh_fid); + if (!error) + error = VOP_GETATTR(vp, vap, cred, procp); + + vput(vp); + ndp->ni_vp = NULL; + nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); + if (error) { + nfsm_srvpostop_attr(dirattr_ret, &dirattr); + error = 0; + goto nfsmout; + } + nfsm_srvfhtom(fhp, v3); + if (v3) { + nfsm_srvpostop_attr(0, vap); + nfsm_srvpostop_attr(dirattr_ret, &dirattr); + } else { + nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); + nfsm_srvfillattr(vap, fp); + } + +nfsmout: + if (dirp) + vrele(dirp); + NDFREE(&nd, NDF_ONLY_PNBUF); + if (ndp->ni_startdir) + vrele(ndp->ni_startdir); + if (ndp->ni_vp) + vput(ndp->ni_vp); + return (error); +} + +/* + * nfs readlink service + */ +int +nfsrv_readlink(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; + register struct iovec *ivp = iv; + register struct mbuf *mp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, i, tlen, len, getret; + int v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; + struct vnode *vp = NULL; + struct vattr attr; + nfsfh_t nfh; + fhandle_t *fhp; + struct uio io, *uiop = &io; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + mp2 = (struct mbuf *)0; +#endif + mp3 = NULL; + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + len = 0; + i = 0; + while (len < NFS_MAXPATHLEN) { + MGET(mp, M_WAIT, MT_DATA); + MCLGET(mp, M_WAIT); + mp->m_len = NFSMSIZ(mp); + if (len == 0) + mp3 = mp2 = mp; + else { + mp2->m_next = mp; + mp2 = mp; + } + if ((len+mp->m_len) > NFS_MAXPATHLEN) { + mp->m_len = NFS_MAXPATHLEN-len; + len = NFS_MAXPATHLEN; + } else + len += mp->m_len; + ivp->iov_base = mtod(mp, caddr_t); + ivp->iov_len = mp->m_len; + i++; + ivp++; + } + uiop->uio_iov = iv; + uiop->uio_iovcnt = i; + uiop->uio_offset = 0; + uiop->uio_resid = len; + uiop->uio_rw = UIO_READ; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_procp = (struct proc *)0; + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, (struct vattr *)0); + error = 0; + goto nfsmout; + } + if (vp->v_type != VLNK) { + if (v3) + error = EINVAL; + else + error = ENXIO; + goto out; + } + nqsrv_getl(vp, ND_READ); + error = VOP_READLINK(vp, uiop, cred); +out: + getret = VOP_GETATTR(vp, &attr, cred, procp); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); + if (v3) { + nfsm_srvpostop_attr(getret, &attr); + if (error) { + error = 0; + goto nfsmout; + } + } + if (uiop->uio_resid > 0) { + len -= uiop->uio_resid; + tlen = nfsm_rndup(len); + nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); + } + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(len); + mb->m_next = mp3; + mp3 = NULL; +nfsmout: + if (mp3) + m_freem(mp3); + if (vp) + vput(vp); + return(error); +} + +/* + * nfs read service + */ +int +nfsrv_read(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register struct iovec *iv; + struct iovec *iv2; + register struct mbuf *m; + register struct nfs_fattr *fp; + register u_int32_t *tl; + register int32_t t1; + register int i; + caddr_t bpos; + int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret; + int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct mbuf *m2; + struct vnode *vp = NULL; + nfsfh_t nfh; + fhandle_t *fhp; + struct uio io, *uiop = &io; + struct vattr va, *vap = &va; + struct nfsheur *nh; + off_t off; + int ioflag = 0; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + if (v3) { + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + off = fxdr_hyper(tl); + } else { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + off = (off_t)fxdr_unsigned(u_int32_t, *tl); + } + nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd)); + + /* + * Reference vp. If an error occurs, vp will be invalid, but we + * have to NULL it just in case. The macros might goto nfsmout + * as well. + */ + + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + vp = NULL; + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, (struct vattr *)0); + error = 0; + goto nfsmout; + } + + if (vp->v_type != VREG) { + if (v3) + error = EINVAL; + else + error = (vp->v_type == VDIR) ? EISDIR : EACCES; + } + if (!error) { + nqsrv_getl(vp, ND_READ); + if ((error = nfsrv_access(vp, VREAD, cred, rdonly, procp, 1)) != 0) + error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 1); + } + getret = VOP_GETATTR(vp, vap, cred, procp); + if (!error) + error = getret; + if (error) { + vput(vp); + vp = NULL; + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(getret, vap); + error = 0; + goto nfsmout; + } + + /* + * Calculate byte count to read + */ + + if (off >= vap->va_size) + cnt = 0; + else if ((off + reqlen) > vap->va_size) + cnt = vap->va_size - off; + else + cnt = reqlen; + + /* + * Calculate seqcount for heuristic + */ + + { + int hi; + int try = 4; + + /* + * Locate best candidate + */ + + hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) & (NUM_HEURISTIC - 1); + nh = &nfsheur[hi]; + + while (try--) { + if (nfsheur[hi].nh_vp == vp) { + nh = &nfsheur[hi]; + break; + } + if (nfsheur[hi].nh_use > 0) + --nfsheur[hi].nh_use; + hi = (hi + 1) & (NUM_HEURISTIC - 1); + if (nfsheur[hi].nh_use < nh->nh_use) + nh = &nfsheur[hi]; + } + + if (nh->nh_vp != vp) { + nh->nh_vp = vp; + nh->nh_nextr = off; + nh->nh_use = NHUSE_INIT; + if (off == 0) + nh->nh_seqcount = 4; + else + nh->nh_seqcount = 1; + } + + /* + * Calculate heuristic + */ + + if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) { + if (++nh->nh_seqcount > 127) + nh->nh_seqcount = 127; + } else if (nh->nh_seqcount > 1) { + nh->nh_seqcount = 1; + } else { + nh->nh_seqcount = 0; + } + nh->nh_use += NHUSE_INC; + if (nh->nh_use > NHUSE_MAX) + nh->nh_use = NHUSE_MAX; + ioflag |= nh->nh_seqcount << 16; + } + + nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); + if (v3) { + nfsm_build(tl, u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); + *tl++ = nfs_true; + fp = (struct nfs_fattr *)tl; + tl += (NFSX_V3FATTR / sizeof (u_int32_t)); + } else { + nfsm_build(tl, u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED); + fp = (struct nfs_fattr *)tl; + tl += (NFSX_V2FATTR / sizeof (u_int32_t)); + } + len = left = nfsm_rndup(cnt); + if (cnt > 0) { + /* + * Generate the mbuf list with the uio_iov ref. to it. + */ + i = 0; + m = m2 = mb; + while (left > 0) { + siz = min(M_TRAILINGSPACE(m), left); + if (siz > 0) { + left -= siz; + i++; + } + if (left > 0) { + MGET(m, M_WAIT, MT_DATA); + MCLGET(m, M_WAIT); + m->m_len = 0; + m2->m_next = m; + m2 = m; + } + } + MALLOC(iv, struct iovec *, i * sizeof (struct iovec), + M_TEMP, M_WAITOK); + uiop->uio_iov = iv2 = iv; + m = mb; + left = len; + i = 0; + while (left > 0) { + if (m == NULL) + panic("nfsrv_read iov"); + siz = min(M_TRAILINGSPACE(m), left); + if (siz > 0) { + iv->iov_base = mtod(m, caddr_t) + m->m_len; + iv->iov_len = siz; + m->m_len += siz; + left -= siz; + iv++; + i++; + } + m = m->m_next; + } + uiop->uio_iovcnt = i; + uiop->uio_offset = off; + uiop->uio_resid = len; + uiop->uio_rw = UIO_READ; + uiop->uio_segflg = UIO_SYSSPACE; + error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); + off = uiop->uio_offset; + nh->nh_nextr = off; + FREE((caddr_t)iv2, M_TEMP); + if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) { + if (!error) + error = getret; + m_freem(mreq); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(getret, vap); + error = 0; + goto nfsmout; + } + } else { + uiop->uio_resid = 0; + } + vput(vp); + vp = NULL; + nfsm_srvfillattr(vap, fp); + tlen = len - uiop->uio_resid; + cnt = cnt < tlen ? cnt : tlen; + tlen = nfsm_rndup(cnt); + if (len != tlen || tlen != cnt) + nfsm_adj(mb, len - tlen, tlen - cnt); + if (v3) { + *tl++ = txdr_unsigned(cnt); + if (len < reqlen) + *tl++ = nfs_true; + else + *tl++ = nfs_false; + } + *tl = txdr_unsigned(cnt); +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * nfs write service + */ +int +nfsrv_write(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register struct iovec *ivp; + register int i, cnt; + register struct mbuf *mp; + register struct nfs_fattr *fp; + struct iovec *iv; + struct vattr va, forat; + register struct vattr *vap = &va; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, len, forat_ret = 1; + int ioflags, aftat_ret = 1, retlen, zeroing, adjust; + int stable = NFSV3WRITE_FILESYNC; + int v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp = NULL; + nfsfh_t nfh; + fhandle_t *fhp; + struct uio io, *uiop = &io; + off_t off; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + if (mrep == NULL) { + *mrq = NULL; + error = 0; + goto nfsmout; + } + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + if (v3) { + nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED); + off = fxdr_hyper(tl); + tl += 3; + stable = fxdr_unsigned(int, *tl++); + } else { + nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + off = (off_t)fxdr_unsigned(u_int32_t, *++tl); + tl += 2; + if (nfs_async) + stable = NFSV3WRITE_UNSTABLE; + } + retlen = len = fxdr_unsigned(int32_t, *tl); + cnt = i = 0; + + /* + * For NFS Version 2, it is not obvious what a write of zero length + * should do, but I might as well be consistent with Version 3, + * which is to return ok so long as there are no permission problems. + */ + if (len > 0) { + zeroing = 1; + mp = mrep; + while (mp) { + if (mp == md) { + zeroing = 0; + adjust = dpos - mtod(mp, caddr_t); + mp->m_len -= adjust; + if (mp->m_len > 0 && adjust > 0) + NFSMADV(mp, adjust); + } + if (zeroing) + mp->m_len = 0; + else if (mp->m_len > 0) { + i += mp->m_len; + if (i > len) { + mp->m_len -= (i - len); + zeroing = 1; + } + if (mp->m_len > 0) + cnt++; + } + mp = mp->m_next; + } + } + if (len > NFS_MAXDATA || len < 0 || i < len) { + error = EIO; + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + error = 0; + goto nfsmout; + } + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + vp = NULL; + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + error = 0; + goto nfsmout; + } + if (v3) + forat_ret = VOP_GETATTR(vp, &forat, cred, procp); + if (vp->v_type != VREG) { + if (v3) + error = EINVAL; + else + error = (vp->v_type == VDIR) ? EISDIR : EACCES; + } + if (!error) { + nqsrv_getl(vp, ND_WRITE); + error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1); + } + if (error) { + vput(vp); + vp = NULL; + nfsm_reply(NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + error = 0; + goto nfsmout; + } + + if (len > 0) { + MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, + M_WAITOK); + uiop->uio_iov = iv = ivp; + uiop->uio_iovcnt = cnt; + mp = mrep; + while (mp) { + if (mp->m_len > 0) { + ivp->iov_base = mtod(mp, caddr_t); + ivp->iov_len = mp->m_len; + ivp++; + } + mp = mp->m_next; + } + + /* + * XXX + * The IO_METASYNC flag indicates that all metadata (and not just + * enough to ensure data integrity) mus be written to stable storage + * synchronously. + * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.) + */ + if (stable == NFSV3WRITE_UNSTABLE) + ioflags = IO_NODELOCKED; + else if (stable == NFSV3WRITE_DATASYNC) + ioflags = (IO_SYNC | IO_NODELOCKED); + else + ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); + uiop->uio_resid = len; + uiop->uio_rw = UIO_WRITE; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_procp = (struct proc *)0; + uiop->uio_offset = off; + error = VOP_WRITE(vp, uiop, ioflags, cred); + nfsstats.srvvop_writes++; + FREE((caddr_t)iv, M_TEMP); + } + aftat_ret = VOP_GETATTR(vp, vap, cred, procp); + vput(vp); + vp = NULL; + if (!error) + error = aftat_ret; + nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); + if (v3) { + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + if (error) { + error = 0; + goto nfsmout; + } + nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(retlen); + /* + * If nfs_async is set, then pretend the write was FILESYNC. + */ + if (stable == NFSV3WRITE_UNSTABLE && !nfs_async) + *tl++ = txdr_unsigned(stable); + else + *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC); + /* + * Actually, there is no need to txdr these fields, + * but it may make the values more human readable, + * for debugging purposes. + */ + if (nfsver.tv_sec == 0) + nfsver = boottime; + *tl++ = txdr_unsigned(nfsver.tv_sec); + *tl = txdr_unsigned(nfsver.tv_usec); + } else { + nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); + nfsm_srvfillattr(vap, fp); + } +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * NFS write service with write gathering support. Called when + * nfsrvw_procrastinate > 0. + * See: Chet Juszczak, "Improving the Write Performance of an NFS Server", + * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco, + * Jan. 1994. + */ +int +nfsrv_writegather(ndp, slp, procp, mrq) + struct nfsrv_descript **ndp; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + register struct iovec *ivp; + register struct mbuf *mp; + register struct nfsrv_descript *wp, *nfsd, *owp, *swp; + register struct nfs_fattr *fp; + register int i; + struct iovec *iov; + struct nfsrvw_delayhash *wpp; + struct ucred *cred; + struct vattr va, forat; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos, dpos; + int error = 0, rdonly, cache, len, forat_ret = 1; + int ioflags, aftat_ret = 1, s, adjust, v3, zeroing; + char *cp2; + struct mbuf *mb, *mb2, *mreq, *mrep, *md; + struct vnode *vp = NULL; + struct uio io, *uiop = &io; + u_quad_t frev, cur_usec; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + i = 0; + len = 0; +#endif + *mrq = NULL; + if (*ndp) { + nfsd = *ndp; + *ndp = NULL; + mrep = nfsd->nd_mrep; + md = nfsd->nd_md; + dpos = nfsd->nd_dpos; + cred = &nfsd->nd_cr; + v3 = (nfsd->nd_flag & ND_NFSV3); + LIST_INIT(&nfsd->nd_coalesce); + nfsd->nd_mreq = NULL; + nfsd->nd_stable = NFSV3WRITE_FILESYNC; + cur_usec = nfs_curusec(); + nfsd->nd_time = cur_usec + + (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate); + + /* + * Now, get the write header.. + */ + nfsm_srvmtofh(&nfsd->nd_fh); + if (v3) { + nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED); + nfsd->nd_off = fxdr_hyper(tl); + tl += 3; + nfsd->nd_stable = fxdr_unsigned(int, *tl++); + } else { + nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl); + tl += 2; + if (nfs_async) + nfsd->nd_stable = NFSV3WRITE_UNSTABLE; + } + len = fxdr_unsigned(int32_t, *tl); + nfsd->nd_len = len; + nfsd->nd_eoff = nfsd->nd_off + len; + + /* + * Trim the header out of the mbuf list and trim off any trailing + * junk so that the mbuf list has only the write data. + */ + zeroing = 1; + i = 0; + mp = mrep; + while (mp) { + if (mp == md) { + zeroing = 0; + adjust = dpos - mtod(mp, caddr_t); + mp->m_len -= adjust; + if (mp->m_len > 0 && adjust > 0) + NFSMADV(mp, adjust); + } + if (zeroing) + mp->m_len = 0; + else { + i += mp->m_len; + if (i > len) { + mp->m_len -= (i - len); + zeroing = 1; + } + } + mp = mp->m_next; + } + if (len > NFS_MAXDATA || len < 0 || i < len) { +nfsmout: + m_freem(mrep); + error = EIO; + nfsm_writereply(2 * NFSX_UNSIGNED, v3); + if (v3) + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); + nfsd->nd_mreq = mreq; + nfsd->nd_mrep = NULL; + nfsd->nd_time = 0; + } + + /* + * Add this entry to the hash and time queues. + */ + s = splsoftclock(); + owp = NULL; + wp = slp->ns_tq.lh_first; + while (wp && wp->nd_time < nfsd->nd_time) { + owp = wp; + wp = wp->nd_tq.le_next; + } + NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff)); + if (owp) { + LIST_INSERT_AFTER(owp, nfsd, nd_tq); + } else { + LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); + } + if (nfsd->nd_mrep) { + wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data); + owp = NULL; + wp = wpp->lh_first; + while (wp && + bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { + owp = wp; + wp = wp->nd_hash.le_next; + } + while (wp && wp->nd_off < nfsd->nd_off && + !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { + owp = wp; + wp = wp->nd_hash.le_next; + } + if (owp) { + LIST_INSERT_AFTER(owp, nfsd, nd_hash); + + /* + * Search the hash list for overlapping entries and + * coalesce. + */ + for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) { + wp = nfsd->nd_hash.le_next; + if (NFSW_SAMECRED(owp, nfsd)) + nfsrvw_coalesce(owp, nfsd); + } + } else { + LIST_INSERT_HEAD(wpp, nfsd, nd_hash); + } + } + splx(s); + } + + /* + * Now, do VOP_WRITE()s for any one(s) that need to be done now + * and generate the associated reply mbuf list(s). + */ +loop1: + cur_usec = nfs_curusec(); + s = splsoftclock(); + for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) { + owp = nfsd->nd_tq.le_next; + if (nfsd->nd_time > cur_usec) + break; + if (nfsd->nd_mreq) + continue; + NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff)); + LIST_REMOVE(nfsd, nd_tq); + LIST_REMOVE(nfsd, nd_hash); + splx(s); + mrep = nfsd->nd_mrep; + nfsd->nd_mrep = NULL; + cred = &nfsd->nd_cr; + v3 = (nfsd->nd_flag & ND_NFSV3); + forat_ret = aftat_ret = 1; + error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, + nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (!error) { + if (v3) + forat_ret = VOP_GETATTR(vp, &forat, cred, procp); + if (vp->v_type != VREG) { + if (v3) + error = EINVAL; + else + error = (vp->v_type == VDIR) ? EISDIR : EACCES; + } + } else { + vp = NULL; + } + if (!error) { + nqsrv_getl(vp, ND_WRITE); + error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1); + } + + if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) + ioflags = IO_NODELOCKED; + else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC) + ioflags = (IO_SYNC | IO_NODELOCKED); + else + ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); + uiop->uio_rw = UIO_WRITE; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_procp = (struct proc *)0; + uiop->uio_offset = nfsd->nd_off; + uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; + if (uiop->uio_resid > 0) { + mp = mrep; + i = 0; + while (mp) { + if (mp->m_len > 0) + i++; + mp = mp->m_next; + } + uiop->uio_iovcnt = i; + MALLOC(iov, struct iovec *, i * sizeof (struct iovec), + M_TEMP, M_WAITOK); + uiop->uio_iov = ivp = iov; + mp = mrep; + while (mp) { + if (mp->m_len > 0) { + ivp->iov_base = mtod(mp, caddr_t); + ivp->iov_len = mp->m_len; + ivp++; + } + mp = mp->m_next; + } + if (!error) { + error = VOP_WRITE(vp, uiop, ioflags, cred); + nfsstats.srvvop_writes++; + } + FREE((caddr_t)iov, M_TEMP); + } + m_freem(mrep); + if (vp) { + aftat_ret = VOP_GETATTR(vp, &va, cred, procp); + vput(vp); + vp = NULL; + } + + /* + * Loop around generating replies for all write rpcs that have + * now been completed. + */ + swp = nfsd; + do { + NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff)); + if (error) { + nfsm_writereply(NFSX_WCCDATA(v3), v3); + if (v3) { + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); + } + } else { + nfsm_writereply(NFSX_PREOPATTR(v3) + + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + + NFSX_WRITEVERF(v3), v3); + if (v3) { + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); + nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nfsd->nd_len); + *tl++ = txdr_unsigned(swp->nd_stable); + /* + * Actually, there is no need to txdr these fields, + * but it may make the values more human readable, + * for debugging purposes. + */ + if (nfsver.tv_sec == 0) + nfsver = boottime; + *tl++ = txdr_unsigned(nfsver.tv_sec); + *tl = txdr_unsigned(nfsver.tv_usec); + } else { + nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); + nfsm_srvfillattr(&va, fp); + } + } + nfsd->nd_mreq = mreq; + if (nfsd->nd_mrep) + panic("nfsrv_write: nd_mrep not free"); + + /* + * Done. Put it at the head of the timer queue so that + * the final phase can return the reply. + */ + s = splsoftclock(); + if (nfsd != swp) { + nfsd->nd_time = 0; + LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); + } + nfsd = swp->nd_coalesce.lh_first; + if (nfsd) { + LIST_REMOVE(nfsd, nd_tq); + } + splx(s); + } while (nfsd); + s = splsoftclock(); + swp->nd_time = 0; + LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq); + splx(s); + goto loop1; + } + splx(s); + + /* + * Search for a reply to return. + */ + s = splsoftclock(); + for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) + if (nfsd->nd_mreq) { + NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff)); + LIST_REMOVE(nfsd, nd_tq); + *mrq = nfsd->nd_mreq; + *ndp = nfsd; + break; + } + splx(s); + return (0); +} + +/* + * Coalesce the write request nfsd into owp. To do this we must: + * - remove nfsd from the queues + * - merge nfsd->nd_mrep into owp->nd_mrep + * - update the nd_eoff and nd_stable for owp + * - put nfsd on owp's nd_coalesce list + * NB: Must be called at splsoftclock(). + */ +static void +nfsrvw_coalesce(owp, nfsd) + register struct nfsrv_descript *owp; + register struct nfsrv_descript *nfsd; +{ + register int overlap; + register struct mbuf *mp; + struct nfsrv_descript *p; + + NFS_DPF(WG, ("C%03x-%03x", + nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff)); + LIST_REMOVE(nfsd, nd_hash); + LIST_REMOVE(nfsd, nd_tq); + if (owp->nd_eoff < nfsd->nd_eoff) { + overlap = owp->nd_eoff - nfsd->nd_off; + if (overlap < 0) + panic("nfsrv_coalesce: bad off"); + if (overlap > 0) + m_adj(nfsd->nd_mrep, overlap); + mp = owp->nd_mrep; + while (mp->m_next) + mp = mp->m_next; + mp->m_next = nfsd->nd_mrep; + owp->nd_eoff = nfsd->nd_eoff; + } else + m_freem(nfsd->nd_mrep); + nfsd->nd_mrep = NULL; + if (nfsd->nd_stable == NFSV3WRITE_FILESYNC) + owp->nd_stable = NFSV3WRITE_FILESYNC; + else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC && + owp->nd_stable == NFSV3WRITE_UNSTABLE) + owp->nd_stable = NFSV3WRITE_DATASYNC; + LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq); + + /* + * If nfsd had anything else coalesced into it, transfer them + * to owp, otherwise their replies will never get sent. + */ + for (p = nfsd->nd_coalesce.lh_first; p; + p = nfsd->nd_coalesce.lh_first) { + LIST_REMOVE(p, nd_tq); + LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq); + } +} + +/* + * nfs create service + * now does a truncate to 0 length via. setattr if it already exists + */ +int +nfsrv_create(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register struct nfs_fattr *fp; + struct vattr va, dirfor, diraft; + register struct vattr *vap = &va; + register struct nfsv2_sattr *sp; + register u_int32_t *tl; + struct nameidata nd; + register int32_t t1; + caddr_t bpos; + int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0; + caddr_t cp; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *dirp = (struct vnode *)0; + nfsfh_t nfh; + fhandle_t *fhp; + u_quad_t frev, tempsize; + u_char cverf[NFSX_V3CREATEVERF]; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + rdev = 0; +#endif + ndclear(&nd); + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvnamesiz(len); + + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; + + /* + * Call namei and do initial cleanup to get a few things + * out of the way. If we get an initial error we cleanup + * and return here to avoid special-casing the invalid nd + * structure through the rest of the case. dirp may be + * set even if an error occurs, but the nd structure will not + * be valid at all if an error occurs so we have to invalidate it + * prior to calling nfsm_reply ( which might goto nfsmout ). + */ + error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (dirp) { + if (v3) { + dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, + procp); + } else { + vrele(dirp); + dirp = NULL; + } + } + if (error) { + nfsm_reply(NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + goto nfsmout; + } + + /* + * No error. Continue. State: + * + * startdir is valid ( we release this immediately ) + * dirp may be valid + * nd.ni_vp may be valid + * nd.ni_dvp is valid + * + * The error state is set through the code and we may also do some + * opportunistic releasing of vnodes to avoid holding locks through + * NFS I/O. The cleanup at the end is a catch-all + */ + + VATTR_NULL(vap); + if (v3) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + how = fxdr_unsigned(int, *tl); + switch (how) { + case NFSV3CREATE_GUARDED: + if (nd.ni_vp) { + error = EEXIST; + break; + } + /* fall through */ + case NFSV3CREATE_UNCHECKED: + nfsm_srvsattr(vap); + break; + case NFSV3CREATE_EXCLUSIVE: + nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF); + bcopy(cp, cverf, NFSX_V3CREATEVERF); + exclusive_flag = 1; + if (nd.ni_vp == NULL) + vap->va_mode = 0; + break; + }; + vap->va_type = VREG; + } else { + nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode)); + if (vap->va_type == VNON) + vap->va_type = VREG; + vap->va_mode = nfstov_mode(sp->sa_mode); + switch (vap->va_type) { + case VREG: + tsize = fxdr_unsigned(int32_t, sp->sa_size); + if (tsize != -1) + vap->va_size = (u_quad_t)tsize; + break; + case VCHR: + case VBLK: + case VFIFO: + rdev = fxdr_unsigned(long, sp->sa_size); + break; + default: + break; + }; + } + + /* + * Iff doesn't exist, create it + * otherwise just truncate to 0 length + * should I set the mode too ? + * + * The only possible error we can have at this point is EEXIST. + * nd.ni_vp will also be non-NULL in that case. + */ + if (nd.ni_vp == NULL) { + if (vap->va_type == VREG || vap->va_type == VSOCK) { + nqsrv_getl(nd.ni_dvp, ND_WRITE); + error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + if (error) + NDFREE(&nd, NDF_ONLY_PNBUF); + else { + nfsrv_object_create(nd.ni_vp); + if (exclusive_flag) { + exclusive_flag = 0; + VATTR_NULL(vap); + bcopy(cverf, (caddr_t)&vap->va_atime, + NFSX_V3CREATEVERF); + error = VOP_SETATTR(nd.ni_vp, vap, cred, + procp); + } + } + } else if ( + vap->va_type == VCHR || + vap->va_type == VBLK || + vap->va_type == VFIFO + ) { + /* + * Handle SysV FIFO node special cases. All other + * devices require super user to access. + */ + if (vap->va_type == VCHR && rdev == 0xffffffff) + vap->va_type = VFIFO; + if (vap->va_type != VFIFO && + (error = suser_xxx(cred, 0, 0))) { + goto nfsmreply0; + } + vap->va_rdev = rdev; + nqsrv_getl(nd.ni_dvp, ND_WRITE); + + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + if (error) { + NDFREE(&nd, NDF_ONLY_PNBUF); + goto nfsmreply0; + } + vput(nd.ni_vp); + nd.ni_vp = NULL; + + /* + * release dvp prior to lookup + */ + vput(nd.ni_dvp); + nd.ni_dvp = NULL; + + /* + * Setup for lookup. + * + * Even though LOCKPARENT was cleared, ni_dvp may + * be garbage. + */ + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags &= ~(LOCKPARENT); + nd.ni_cnd.cn_proc = procp; + nd.ni_cnd.cn_cred = cred; + + error = lookup(&nd); + nd.ni_dvp = NULL; + + if (error != 0) { + nfsm_reply(0); + /* fall through on certain errors */ + } + nfsrv_object_create(nd.ni_vp); + if (nd.ni_cnd.cn_flags & ISSYMLINK) { + error = EINVAL; + goto nfsmreply0; + } + } else { + error = ENXIO; + } + } else { + if (vap->va_size != -1) { + error = nfsrv_access(nd.ni_vp, VWRITE, cred, + (nd.ni_cnd.cn_flags & RDONLY), procp, 0); + if (!error) { + nqsrv_getl(nd.ni_vp, ND_WRITE); + tempsize = vap->va_size; + VATTR_NULL(vap); + vap->va_size = tempsize; + error = VOP_SETATTR(nd.ni_vp, vap, cred, + procp); + } + } + } + + if (!error) { + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); + if (!error) + error = VOP_GETATTR(nd.ni_vp, vap, cred, procp); + } + if (v3) { + if (exclusive_flag && !error && + bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) + error = EEXIST; + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); + vrele(dirp); + dirp = NULL; + } + nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); + if (v3) { + if (!error) { + nfsm_srvpostop_fh(fhp); + nfsm_srvpostop_attr(0, vap); + } + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + } else { + nfsm_srvfhtom(fhp, v3); + nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); + nfsm_srvfillattr(vap, fp); + } + goto nfsmout; + +nfsmreply0: + nfsm_reply(0); + error = 0; + /* fall through */ + +nfsmout: + if (nd.ni_startdir) { + vrele(nd.ni_startdir); + nd.ni_startdir = NULL; + } + if (dirp) + vrele(dirp); + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + return (error); +} + +/* + * nfs v3 mknod service + */ +int +nfsrv_mknod(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct vattr va, dirfor, diraft; + register struct vattr *vap = &va; + register u_int32_t *tl; + struct nameidata nd; + register int32_t t1; + caddr_t bpos; + int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + u_int32_t major, minor; + enum vtype vtyp; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp, *dirp = (struct vnode *)0; + nfsfh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + ndclear(&nd); + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvnamesiz(len); + + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; + + /* + * Handle nfs_namei() call. If an error occurs, the nd structure + * is not valid. However, nfsm_*() routines may still jump to + * nfsmout. + */ + + error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (dirp) + dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); + if (error) { + nfsm_reply(NFSX_WCCDATA(1)); + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + goto nfsmout; + } + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + vtyp = nfsv3tov_type(*tl); + if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { + error = NFSERR_BADTYPE; + goto out; + } + VATTR_NULL(vap); + nfsm_srvsattr(vap); + if (vtyp == VCHR || vtyp == VBLK) { + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + major = fxdr_unsigned(u_int32_t, *tl++); + minor = fxdr_unsigned(u_int32_t, *tl); + vap->va_rdev = makeudev(major, minor); + } + + /* + * Iff doesn't exist, create it. + */ + if (nd.ni_vp) { + error = EEXIST; + goto out; + } + vap->va_type = vtyp; + if (vtyp == VSOCK) { + vrele(nd.ni_startdir); + nd.ni_startdir = NULL; + nqsrv_getl(nd.ni_dvp, ND_WRITE); + error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + if (error) + NDFREE(&nd, NDF_ONLY_PNBUF); + } else { + if (vtyp != VFIFO && (error = suser_xxx(cred, 0, 0))) + goto out; + nqsrv_getl(nd.ni_dvp, ND_WRITE); + + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + if (error) { + NDFREE(&nd, NDF_ONLY_PNBUF); + goto out; + } + vput(nd.ni_vp); + nd.ni_vp = NULL; + + /* + * Release dvp prior to lookup + */ + vput(nd.ni_dvp); + nd.ni_dvp = NULL; + + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags &= ~(LOCKPARENT); + nd.ni_cnd.cn_proc = procp; + nd.ni_cnd.cn_cred = procp->p_ucred; + + error = lookup(&nd); + nd.ni_dvp = NULL; + + if (error) + goto out; + if (nd.ni_cnd.cn_flags & ISSYMLINK) + error = EINVAL; + } + + /* + * send response, cleanup, return. + */ +out: + if (nd.ni_startdir) { + vrele(nd.ni_startdir); + nd.ni_startdir = NULL; + } + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + nd.ni_dvp = NULL; + } + vp = nd.ni_vp; + if (!error) { + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(vp, &fhp->fh_fid); + if (!error) + error = VOP_GETATTR(vp, vap, cred, procp); + vput(vp); + vp = NULL; + nd.ni_vp = NULL; + } + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); + if (dirp) { + vrele(dirp); + dirp = NULL; + } + nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); + if (!error) { + nfsm_srvpostop_fh(fhp); + nfsm_srvpostop_attr(0, vap); + } + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + return (0); +nfsmout: + if (dirp) + vrele(dirp); + if (nd.ni_startdir) + vrele(nd.ni_startdir); + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + return (error); +} + +/* + * nfs remove service + */ +int +nfsrv_remove(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct nameidata nd; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mreq; + struct vnode *dirp; + struct vattr dirfor, diraft; + nfsfh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + ndclear(&nd); + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvnamesiz(len); + + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = DELETE; + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (dirp) { + if (v3) { + dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, + procp); + } else { + vrele(dirp); + dirp = NULL; + } + } + if (error == 0) { + if (nd.ni_vp->v_type == VDIR) { + error = EPERM; /* POSIX */ + goto out; + } + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (nd.ni_vp->v_flag & VROOT) { + error = EBUSY; + goto out; + } +out: + if (!error) { + nqsrv_getl(nd.ni_dvp, ND_WRITE); + nqsrv_getl(nd.ni_vp, ND_WRITE); + error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + NDFREE(&nd, NDF_ONLY_PNBUF); + } + } + if (dirp && v3) { + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); + vrele(dirp); + dirp = NULL; + } + nfsm_reply(NFSX_WCCDATA(v3)); + if (v3) { + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + } +nfsmout: + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + return(error); +} + +/* + * nfs rename service + */ +int +nfsrv_rename(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1; + int tdirfor_ret = 1, tdiraft_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mreq; + struct nameidata fromnd, tond; + struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0; + struct vnode *tdirp = (struct vnode *)0; + struct vattr fdirfor, fdiraft, tdirfor, tdiraft; + nfsfh_t fnfh, tnfh; + fhandle_t *ffhp, *tfhp; + u_quad_t frev; + uid_t saved_uid; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + fvp = (struct vnode *)0; +#endif + ffhp = &fnfh.fh_generic; + tfhp = &tnfh.fh_generic; + + /* + * Clear fields incase goto nfsmout occurs from macro. + */ + + ndclear(&fromnd); + ndclear(&tond); + + nfsm_srvmtofh(ffhp); + nfsm_srvnamesiz(len); + /* + * Remember our original uid so that we can reset cr_uid before + * the second nfs_namei() call, in case it is remapped. + */ + saved_uid = cred->cr_uid; + fromnd.ni_cnd.cn_cred = cred; + fromnd.ni_cnd.cn_nameiop = DELETE; + fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; + error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, + &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (fdirp) { + if (v3) { + fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, + procp); + } else { + vrele(fdirp); + fdirp = NULL; + } + } + if (error) { + nfsm_reply(2 * NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); + nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); + error = 0; + goto nfsmout; + } + fvp = fromnd.ni_vp; + nfsm_srvmtofh(tfhp); + nfsm_strsiz(len2, NFS_MAXNAMLEN); + cred->cr_uid = saved_uid; + tond.ni_cnd.cn_cred = cred; + tond.ni_cnd.cn_nameiop = RENAME; + tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; + error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, + &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (tdirp) { + if (v3) { + tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, + procp); + } else { + vrele(tdirp); + tdirp = NULL; + } + } + if (error) + goto out1; + + tdvp = tond.ni_dvp; + tvp = tond.ni_vp; + if (tvp != NULL) { + if (fvp->v_type == VDIR && tvp->v_type != VDIR) { + if (v3) + error = EEXIST; + else + error = EISDIR; + goto out; + } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { + if (v3) + error = EEXIST; + else + error = ENOTDIR; + goto out; + } + if (tvp->v_type == VDIR && tvp->v_mountedhere) { + if (v3) + error = EXDEV; + else + error = ENOTEMPTY; + goto out; + } + } + if (fvp->v_type == VDIR && fvp->v_mountedhere) { + if (v3) + error = EXDEV; + else + error = ENOTEMPTY; + goto out; + } + if (fvp->v_mount != tdvp->v_mount) { + if (v3) + error = EXDEV; + else + error = ENOTEMPTY; + goto out; + } + if (fvp == tdvp) { + if (v3) + error = EINVAL; + else + error = ENOTEMPTY; + } + /* + * If source is the same as the destination (that is the + * same vnode with the same name in the same directory), + * then there is nothing to do. + */ + if (fvp == tvp && fromnd.ni_dvp == tdvp && + fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && + !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, + fromnd.ni_cnd.cn_namelen)) + error = -1; +out: + if (!error) { + /* + * The VOP_RENAME function releases all vnode references & + * locks prior to returning so we need to clear the pointers + * to bypass cleanup code later on. + */ + nqsrv_getl(fromnd.ni_dvp, ND_WRITE); + nqsrv_getl(tdvp, ND_WRITE); + if (tvp) { + nqsrv_getl(tvp, ND_WRITE); + } + error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, + tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); + fromnd.ni_dvp = NULL; + fromnd.ni_vp = NULL; + tond.ni_dvp = NULL; + tond.ni_vp = NULL; + if (error) { + fromnd.ni_cnd.cn_flags &= ~HASBUF; + tond.ni_cnd.cn_flags &= ~HASBUF; + } + } else { + if (error == -1) + error = 0; + } + /* fall through */ + +out1: + if (fdirp) + fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp); + if (tdirp) + tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp); + nfsm_reply(2 * NFSX_WCCDATA(v3)); + if (v3) { + nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); + nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); + } + error = 0; + /* fall through */ + +nfsmout: + /* + * Clear out tond related fields + */ + if (tdirp) + vrele(tdirp); + if (tond.ni_startdir) + vrele(tond.ni_startdir); + NDFREE(&tond, NDF_ONLY_PNBUF); + if (tond.ni_dvp) { + if (tond.ni_dvp == tond.ni_vp) + vrele(tond.ni_dvp); + else + vput(tond.ni_dvp); + } + if (tond.ni_vp) + vput(tond.ni_vp); + + /* + * Clear out fromnd related fields + */ + if (fdirp) + vrele(fdirp); + if (fromnd.ni_startdir) + vrele(fromnd.ni_startdir); + NDFREE(&fromnd, NDF_ONLY_PNBUF); + if (fromnd.ni_dvp) + vrele(fromnd.ni_dvp); + if (fromnd.ni_vp) + vrele(fromnd.ni_vp); + + return (error); +} + +/* + * nfs link service + */ +int +nfsrv_link(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct nameidata nd; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1; + int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mreq; + struct vnode *vp = NULL, *xp, *dirp = (struct vnode *)0; + struct vattr dirfor, diraft, at; + nfsfh_t nfh, dnfh; + fhandle_t *fhp, *dfhp; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + ndclear(&nd); + + fhp = &nfh.fh_generic; + dfhp = &dnfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvmtofh(dfhp); + nfsm_srvnamesiz(len); + + error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); + nfsm_srvpostop_attr(getret, &at); + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + vp = NULL; + error = 0; + goto nfsmout; + } + if (vp->v_type == VDIR) { + error = EPERM; /* POSIX */ + goto out1; + } + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT; + error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (dirp) { + if (v3) { + dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, + procp); + } else { + vrele(dirp); + dirp = NULL; + } + } + if (error) + goto out1; + + xp = nd.ni_vp; + if (xp != NULL) { + error = EEXIST; + goto out; + } + xp = nd.ni_dvp; + if (vp->v_mount != xp->v_mount) + error = EXDEV; +out: + if (!error) { + nqsrv_getl(vp, ND_WRITE); + nqsrv_getl(xp, ND_WRITE); + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + NDFREE(&nd, NDF_ONLY_PNBUF); + } + /* fall through */ + +out1: + if (v3) + getret = VOP_GETATTR(vp, &at, cred, procp); + if (dirp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); + if (v3) { + nfsm_srvpostop_attr(getret, &at); + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + } + /* fall through */ + +nfsmout: + NDFREE(&nd, NDF_ONLY_PNBUF); + if (dirp) + vrele(dirp); + if (vp) + vrele(vp); + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vrele(nd.ni_vp); + return(error); +} + +/* + * nfs symbolic link service + */ +int +nfsrv_symlink(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct vattr va, dirfor, diraft; + struct nameidata nd; + register struct vattr *vap = &va; + register u_int32_t *tl; + register int32_t t1; + struct nfsv2_sattr *sp; + char *bpos, *pathcp = (char *)0, *cp2; + struct uio io; + struct iovec iv; + int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3); + struct mbuf *mb, *mreq, *mb2; + struct vnode *dirp = (struct vnode *)0; + nfsfh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + ndclear(&nd); + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvnamesiz(len); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; + error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (dirp) { + if (v3) { + dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, + procp); + } else { + vrele(dirp); + dirp = NULL; + } + } + if (error) + goto out; + + VATTR_NULL(vap); + if (v3) + nfsm_srvsattr(vap); + nfsm_strsiz(len2, NFS_MAXPATHLEN); + MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); + iv.iov_base = pathcp; + iv.iov_len = len2; + io.uio_resid = len2; + io.uio_offset = 0; + io.uio_iov = &iv; + io.uio_iovcnt = 1; + io.uio_segflg = UIO_SYSSPACE; + io.uio_rw = UIO_READ; + io.uio_procp = (struct proc *)0; + nfsm_mtouio(&io, len2); + if (!v3) { + nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + vap->va_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); + } + *(pathcp + len2) = '\0'; + if (nd.ni_vp) { + error = EEXIST; + goto out; + } + + /* + * issue symlink op. SAVESTART is set so the underlying path component + * is only freed by the VOP if an error occurs. + */ + nqsrv_getl(nd.ni_dvp, ND_WRITE); + error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp); + if (error) + NDFREE(&nd, NDF_ONLY_PNBUF); + else { + vput(nd.ni_vp); + nd.ni_vp = NULL; + } + /* + * releases directory prior to potential lookup op. + */ + vput(nd.ni_dvp); + nd.ni_dvp = NULL; + + if (error == 0) { + if (v3) { + /* + * Issue lookup. Leave SAVESTART set so we can easily free + * the name buffer later on. + * + * since LOCKPARENT is not set, ni_dvp will be garbage on + * return whether an error occurs or not. + */ + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW); + nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); + nd.ni_cnd.cn_proc = procp; + nd.ni_cnd.cn_cred = cred; + + error = lookup(&nd); + nd.ni_dvp = NULL; + + if (error == 0) { + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); + if (!error) + error = VOP_GETATTR(nd.ni_vp, vap, cred, + procp); + vput(nd.ni_vp); + nd.ni_vp = NULL; + } + } + } +out: + /* + * These releases aren't strictly required, does even doing them + * make any sense? XXX can nfsm_reply() block? + */ + if (pathcp) { + FREE(pathcp, M_TEMP); + pathcp = NULL; + } + if (dirp) { + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); + vrele(dirp); + dirp = NULL; + } + if (nd.ni_startdir) { + vrele(nd.ni_startdir); + nd.ni_startdir = NULL; + } + nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); + if (v3) { + if (!error) { + nfsm_srvpostop_fh(fhp); + nfsm_srvpostop_attr(0, vap); + } + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + } + error = 0; + /* fall through */ + +nfsmout: + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vrele(nd.ni_vp); + if (nd.ni_startdir) + vrele(nd.ni_startdir); + if (dirp) + vrele(dirp); + if (pathcp) + FREE(pathcp, M_TEMP); + + return (error); +} + +/* + * nfs mkdir service + */ +int +nfsrv_mkdir(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct vattr va, dirfor, diraft; + register struct vattr *vap = &va; + register struct nfs_fattr *fp; + struct nameidata nd; + register caddr_t cp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *dirp = NULL; + int vpexcl = 0; + nfsfh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + ndclear(&nd); + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvnamesiz(len); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT; + + error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (dirp) { + if (v3) { + dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, + procp); + } else { + vrele(dirp); + dirp = NULL; + } + } + if (error) { + nfsm_reply(NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + goto nfsmout; + } + VATTR_NULL(vap); + if (v3) { + nfsm_srvsattr(vap); + } else { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + vap->va_mode = nfstov_mode(*tl++); + } + + /* + * At this point nd.ni_dvp is referenced and exclusively locked and + * nd.ni_vp, if it exists, is referenced but not locked. + */ + + vap->va_type = VDIR; + if (nd.ni_vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); + error = EEXIST; + goto out; + } + + /* + * Issue mkdir op. Since SAVESTART is not set, the pathname + * component is freed by the VOP call. This will fill-in + * nd.ni_vp, reference, and exclusively lock it. + */ + nqsrv_getl(nd.ni_dvp, ND_WRITE); + error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + NDFREE(&nd, NDF_ONLY_PNBUF); + vpexcl = 1; + + vput(nd.ni_dvp); + nd.ni_dvp = NULL; + + if (!error) { + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); + if (!error) + error = VOP_GETATTR(nd.ni_vp, vap, cred, procp); + } +out: + if (dirp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); + nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); + if (v3) { + if (!error) { + nfsm_srvpostop_fh(fhp); + nfsm_srvpostop_attr(0, vap); + } + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + } else { + nfsm_srvfhtom(fhp, v3); + nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); + nfsm_srvfillattr(vap, fp); + } + error = 0; + /* fall through */ + +nfsmout: + if (dirp) + vrele(dirp); + if (nd.ni_dvp) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == nd.ni_vp && vpexcl) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) { + if (vpexcl) + vput(nd.ni_vp); + else + vrele(nd.ni_vp); + } + return (error); +} + +/* + * nfs rmdir service + */ +int +nfsrv_rmdir(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mreq; + struct vnode *vp, *dirp = (struct vnode *)0; + struct vattr dirfor, diraft; + nfsfh_t nfh; + fhandle_t *fhp; + struct nameidata nd; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + ndclear(&nd); + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvnamesiz(len); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = DELETE; + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, + &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + if (dirp) { + if (v3) { + dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, + procp); + } else { + vrele(dirp); + dirp = NULL; + } + } + if (error) { + nfsm_reply(NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + goto nfsmout; + } + vp = nd.ni_vp; + if (vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + /* + * No rmdir "." please. + */ + if (nd.ni_dvp == vp) { + error = EINVAL; + goto out; + } + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) + error = EBUSY; +out: + /* + * Issue or abort op. Since SAVESTART is not set, path name + * component is freed by the VOP after either. + */ + if (!error) { + nqsrv_getl(nd.ni_dvp, ND_WRITE); + nqsrv_getl(vp, ND_WRITE); + error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + + if (dirp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); + nfsm_reply(NFSX_WCCDATA(v3)); + if (v3) { + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + error = 0; + } + /* fall through */ + +nfsmout: + NDFREE(&nd, NDF_ONLY_PNBUF); + if (dirp) + vrele(dirp); + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + + return(error); +} + +/* + * nfs readdir service + * - mallocs what it thinks is enough to read + * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR + * - calls VOP_READDIR() + * - loops around building the reply + * if the output generated exceeds count break out of loop + * The nfsm_clget macro is used here so that the reply will be packed + * tightly in mbuf clusters. + * - it only knows that it has encountered eof when the VOP_READDIR() + * reads nothing + * - as such one readdir rpc will return eof false although you are there + * and then the next will return eof + * - it trims out records with d_fileno == 0 + * this doesn't matter for Unix clients, but they might confuse clients + * for other os'. + * NB: It is tempting to set eof to true if the VOP_READDIR() reads less + * than requested, but this may not apply to all filesystems. For + * example, client NFS does not { although it is never remote mounted + * anyhow } + * The alternate call nfsrv_readdirplus() does lookups as well. + * PS: The NFS protocol spec. does not clarify what the "count" byte + * argument is a count of.. just name strings and file id's or the + * entire reply rpc or ... + * I tried just file name and id sizes and it confused the Sun client, + * so I am using the full rpc size now. The "paranoia.." comment refers + * to including the status longwords that are not a part of the dir. + * "entry" structures, but are in the rpc. + */ +struct flrep { + nfsuint64 fl_off; + u_int32_t fl_postopok; + u_int32_t fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)]; + u_int32_t fl_fhok; + u_int32_t fl_fhsize; + u_int32_t fl_nfh[NFSX_V3FH / sizeof (u_int32_t)]; +}; + +int +nfsrv_readdir(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register char *bp, *be; + register struct mbuf *mp; + register struct dirent *dp; + register caddr_t cp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + struct mbuf *mb, *mb2, *mreq, *mp2; + char *cpos, *cend, *cp2, *rbuf; + struct vnode *vp = NULL; + struct vattr at; + nfsfh_t nfh; + fhandle_t *fhp; + struct uio io; + struct iovec iv; + int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; + int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies; + int v3 = (nfsd->nd_flag & ND_NFSV3); + u_quad_t frev, off, toff, verf; + u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */ + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + if (v3) { + nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED); + toff = fxdr_hyper(tl); + tl += 2; + verf = fxdr_hyper(tl); + tl += 2; + } else { + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + toff = fxdr_unsigned(u_quad_t, *tl++); + verf = 0; /* shut up gcc */ + } + off = toff; + cnt = fxdr_unsigned(int, *tl); + siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); + xfer = NFS_SRVMAXDATA(nfsd); + if (siz > xfer) + siz = xfer; + fullsiz = siz; + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (!error && vp->v_type != VDIR) { + error = ENOTDIR; + vput(vp); + vp = NULL; + } + if (error) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + + /* + * Obtain lock on vnode for this section of the code + */ + + nqsrv_getl(vp, ND_READ); + if (v3) { + error = getret = VOP_GETATTR(vp, &at, cred, procp); +#if 0 + /* + * XXX This check may be too strict for Solaris 2.5 clients. + */ + if (!error && toff && verf && verf != at.va_filerev) + error = NFSERR_BAD_COOKIE; +#endif + } + if (!error) + error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0); + if (error) { + vput(vp); + vp = NULL; + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + VOP_UNLOCK(vp, 0, procp); + + /* + * end section. Allocate rbuf and continue + */ + MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); +again: + iv.iov_base = rbuf; + iv.iov_len = fullsiz; + io.uio_iov = &iv; + io.uio_iovcnt = 1; + io.uio_offset = (off_t)off; + io.uio_resid = fullsiz; + io.uio_segflg = UIO_SYSSPACE; + io.uio_rw = UIO_READ; + io.uio_procp = (struct proc *)0; + eofflag = 0; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); + if (cookies) { + free((caddr_t)cookies, M_TEMP); + cookies = NULL; + } + error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); + off = (off_t)io.uio_offset; + if (!cookies && !error) + error = NFSERR_PERM; + if (v3) { + getret = VOP_GETATTR(vp, &at, cred, procp); + if (!error) + error = getret; + } + VOP_UNLOCK(vp, 0, procp); + if (error) { + vrele(vp); + vp = NULL; + free((caddr_t)rbuf, M_TEMP); + if (cookies) + free((caddr_t)cookies, M_TEMP); + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + if (io.uio_resid) { + siz -= io.uio_resid; + + /* + * If nothing read, return eof + * rpc reply + */ + if (siz == 0) { + vrele(vp); + vp = NULL; + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + + 2 * NFSX_UNSIGNED); + if (v3) { + nfsm_srvpostop_attr(getret, &at); + nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + txdr_hyper(at.va_filerev, tl); + tl += 2; + } else + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = nfs_false; + *tl = nfs_true; + FREE((caddr_t)rbuf, M_TEMP); + FREE((caddr_t)cookies, M_TEMP); + error = 0; + goto nfsmout; + } + } + + /* + * Check for degenerate cases of nothing useful read. + * If so go try again + */ + cpos = rbuf; + cend = rbuf + siz; + dp = (struct dirent *)cpos; + cookiep = cookies; + /* + * For some reason FreeBSD's ufs_readdir() chooses to back the + * directory offset up to a block boundary, so it is necessary to + * skip over the records that preceed the requested offset. This + * requires the assumption that file offset cookies monotonically + * increase. + */ + while (cpos < cend && ncookies > 0 && + (dp->d_fileno == 0 || dp->d_type == DT_WHT || + ((u_quad_t)(*cookiep)) <= toff)) { + cpos += dp->d_reclen; + dp = (struct dirent *)cpos; + cookiep++; + ncookies--; + } + if (cpos >= cend || ncookies == 0) { + toff = off; + siz = fullsiz; + goto again; + } + + len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */ + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); + if (v3) { + nfsm_srvpostop_attr(getret, &at); + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + txdr_hyper(at.va_filerev, tl); + } + mp = mp2 = mb; + bp = bpos; + be = bp + M_TRAILINGSPACE(mp); + + /* Loop through the records and build reply */ + while (cpos < cend && ncookies > 0) { + if (dp->d_fileno != 0 && dp->d_type != DT_WHT) { + nlen = dp->d_namlen; + rem = nfsm_rndup(nlen) - nlen; + len += (4 * NFSX_UNSIGNED + nlen + rem); + if (v3) + len += 2 * NFSX_UNSIGNED; + if (len > cnt) { + eofflag = 0; + break; + } + /* + * Build the directory record xdr from + * the dirent entry. + */ + nfsm_clget; + *tl = nfs_true; + bp += NFSX_UNSIGNED; + if (v3) { + nfsm_clget; + *tl = 0; + bp += NFSX_UNSIGNED; + } + nfsm_clget; + *tl = txdr_unsigned(dp->d_fileno); + bp += NFSX_UNSIGNED; + nfsm_clget; + *tl = txdr_unsigned(nlen); + bp += NFSX_UNSIGNED; + + /* And loop around copying the name */ + xfer = nlen; + cp = dp->d_name; + while (xfer > 0) { + nfsm_clget; + if ((bp+xfer) > be) + tsiz = be-bp; + else + tsiz = xfer; + bcopy(cp, bp, tsiz); + bp += tsiz; + xfer -= tsiz; + if (xfer > 0) + cp += tsiz; + } + /* And null pad to a int32_t boundary */ + for (i = 0; i < rem; i++) + *bp++ = '\0'; + nfsm_clget; + + /* Finish off the record */ + if (v3) { + *tl = 0; + bp += NFSX_UNSIGNED; + nfsm_clget; + } + *tl = txdr_unsigned(*cookiep); + bp += NFSX_UNSIGNED; + } + cpos += dp->d_reclen; + dp = (struct dirent *)cpos; + cookiep++; + ncookies--; + } + vrele(vp); + vp = NULL; + nfsm_clget; + *tl = nfs_false; + bp += NFSX_UNSIGNED; + nfsm_clget; + if (eofflag) + *tl = nfs_true; + else + *tl = nfs_false; + bp += NFSX_UNSIGNED; + if (mp != mb) { + if (bp < be) + mp->m_len = bp - mtod(mp, caddr_t); + } else + mp->m_len += bp - bpos; + FREE((caddr_t)rbuf, M_TEMP); + FREE((caddr_t)cookies, M_TEMP); + +nfsmout: + if (vp) + vrele(vp); + return(error); +} + +int +nfsrv_readdirplus(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register char *bp, *be; + register struct mbuf *mp; + register struct dirent *dp; + register caddr_t cp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + struct mbuf *mb, *mb2, *mreq, *mp2; + char *cpos, *cend, *cp2, *rbuf; + struct vnode *vp = NULL, *nvp; + struct flrep fl; + nfsfh_t nfh; + fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh; + struct uio io; + struct iovec iv; + struct vattr va, at, *vap = &va; + struct nfs_fattr *fp; + int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; + int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies; + u_quad_t frev, off, toff, verf; + u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */ + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED); + toff = fxdr_hyper(tl); + tl += 2; + verf = fxdr_hyper(tl); + tl += 2; + siz = fxdr_unsigned(int, *tl++); + cnt = fxdr_unsigned(int, *tl); + off = toff; + siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); + xfer = NFS_SRVMAXDATA(nfsd); + if (siz > xfer) + siz = xfer; + fullsiz = siz; + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (!error && vp->v_type != VDIR) { + error = ENOTDIR; + vput(vp); + vp = NULL; + } + if (error) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + error = getret = VOP_GETATTR(vp, &at, cred, procp); +#if 0 + /* + * XXX This check may be too strict for Solaris 2.5 clients. + */ + if (!error && toff && verf && verf != at.va_filerev) + error = NFSERR_BAD_COOKIE; +#endif + if (!error) { + nqsrv_getl(vp, ND_READ); + error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0); + } + if (error) { + vput(vp); + vp = NULL; + nfsm_reply(NFSX_V3POSTOPATTR); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + VOP_UNLOCK(vp, 0, procp); + MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); +again: + iv.iov_base = rbuf; + iv.iov_len = fullsiz; + io.uio_iov = &iv; + io.uio_iovcnt = 1; + io.uio_offset = (off_t)off; + io.uio_resid = fullsiz; + io.uio_segflg = UIO_SYSSPACE; + io.uio_rw = UIO_READ; + io.uio_procp = (struct proc *)0; + eofflag = 0; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); + if (cookies) { + free((caddr_t)cookies, M_TEMP); + cookies = NULL; + } + error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); + off = (u_quad_t)io.uio_offset; + getret = VOP_GETATTR(vp, &at, cred, procp); + VOP_UNLOCK(vp, 0, procp); + if (!cookies && !error) + error = NFSERR_PERM; + if (!error) + error = getret; + if (error) { + vrele(vp); + vp = NULL; + if (cookies) + free((caddr_t)cookies, M_TEMP); + free((caddr_t)rbuf, M_TEMP); + nfsm_reply(NFSX_V3POSTOPATTR); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + if (io.uio_resid) { + siz -= io.uio_resid; + + /* + * If nothing read, return eof + * rpc reply + */ + if (siz == 0) { + vrele(vp); + vp = NULL; + nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + + 2 * NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + txdr_hyper(at.va_filerev, tl); + tl += 2; + *tl++ = nfs_false; + *tl = nfs_true; + FREE((caddr_t)cookies, M_TEMP); + FREE((caddr_t)rbuf, M_TEMP); + error = 0; + goto nfsmout; + } + } + + /* + * Check for degenerate cases of nothing useful read. + * If so go try again + */ + cpos = rbuf; + cend = rbuf + siz; + dp = (struct dirent *)cpos; + cookiep = cookies; + /* + * For some reason FreeBSD's ufs_readdir() chooses to back the + * directory offset up to a block boundary, so it is necessary to + * skip over the records that preceed the requested offset. This + * requires the assumption that file offset cookies monotonically + * increase. + */ + while (cpos < cend && ncookies > 0 && + (dp->d_fileno == 0 || dp->d_type == DT_WHT || + ((u_quad_t)(*cookiep)) <= toff)) { + cpos += dp->d_reclen; + dp = (struct dirent *)cpos; + cookiep++; + ncookies--; + } + if (cpos >= cend || ncookies == 0) { + toff = off; + siz = fullsiz; + goto again; + } + + /* + * Probe one of the directory entries to see if the filesystem + * supports VGET. + */ + if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp) == EOPNOTSUPP) { + error = NFSERR_NOTSUPP; + vrele(vp); + vp = NULL; + free((caddr_t)cookies, M_TEMP); + free((caddr_t)rbuf, M_TEMP); + nfsm_reply(NFSX_V3POSTOPATTR); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + vput(nvp); + nvp = NULL; + + dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; + nfsm_reply(cnt); + nfsm_srvpostop_attr(getret, &at); + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + txdr_hyper(at.va_filerev, tl); + mp = mp2 = mb; + bp = bpos; + be = bp + M_TRAILINGSPACE(mp); + + /* Loop through the records and build reply */ + while (cpos < cend && ncookies > 0) { + if (dp->d_fileno != 0 && dp->d_type != DT_WHT) { + nlen = dp->d_namlen; + rem = nfsm_rndup(nlen)-nlen; + + /* + * For readdir_and_lookup get the vnode using + * the file number. + */ + if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp)) + goto invalid; + bzero((caddr_t)nfhp, NFSX_V3FH); + nfhp->fh_fsid = + nvp->v_mount->mnt_stat.f_fsid; + if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) { + vput(nvp); + nvp = NULL; + goto invalid; + } + if (VOP_GETATTR(nvp, vap, cred, procp)) { + vput(nvp); + nvp = NULL; + goto invalid; + } + vput(nvp); + nvp = NULL; + + /* + * If either the dircount or maxcount will be + * exceeded, get out now. Both of these lengths + * are calculated conservatively, including all + * XDR overheads. + */ + len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH + + NFSX_V3POSTOPATTR); + dirlen += (6 * NFSX_UNSIGNED + nlen + rem); + if (len > cnt || dirlen > fullsiz) { + eofflag = 0; + break; + } + + /* + * Build the directory record xdr from + * the dirent entry. + */ + fp = (struct nfs_fattr *)&fl.fl_fattr; + nfsm_srvfillattr(vap, fp); + fl.fl_fhsize = txdr_unsigned(NFSX_V3FH); + fl.fl_fhok = nfs_true; + fl.fl_postopok = nfs_true; + fl.fl_off.nfsuquad[0] = 0; + fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); + + nfsm_clget; + *tl = nfs_true; + bp += NFSX_UNSIGNED; + nfsm_clget; + *tl = 0; + bp += NFSX_UNSIGNED; + nfsm_clget; + *tl = txdr_unsigned(dp->d_fileno); + bp += NFSX_UNSIGNED; + nfsm_clget; + *tl = txdr_unsigned(nlen); + bp += NFSX_UNSIGNED; + + /* And loop around copying the name */ + xfer = nlen; + cp = dp->d_name; + while (xfer > 0) { + nfsm_clget; + if ((bp + xfer) > be) + tsiz = be - bp; + else + tsiz = xfer; + bcopy(cp, bp, tsiz); + bp += tsiz; + xfer -= tsiz; + if (xfer > 0) + cp += tsiz; + } + /* And null pad to a int32_t boundary */ + for (i = 0; i < rem; i++) + *bp++ = '\0'; + + /* + * Now copy the flrep structure out. + */ + xfer = sizeof (struct flrep); + cp = (caddr_t)&fl; + while (xfer > 0) { + nfsm_clget; + if ((bp + xfer) > be) + tsiz = be - bp; + else + tsiz = xfer; + bcopy(cp, bp, tsiz); + bp += tsiz; + xfer -= tsiz; + if (xfer > 0) + cp += tsiz; + } + } +invalid: + cpos += dp->d_reclen; + dp = (struct dirent *)cpos; + cookiep++; + ncookies--; + } + vrele(vp); + vp = NULL; + nfsm_clget; + *tl = nfs_false; + bp += NFSX_UNSIGNED; + nfsm_clget; + if (eofflag) + *tl = nfs_true; + else + *tl = nfs_false; + bp += NFSX_UNSIGNED; + if (mp != mb) { + if (bp < be) + mp->m_len = bp - mtod(mp, caddr_t); + } else + mp->m_len += bp - bpos; + FREE((caddr_t)cookies, M_TEMP); + FREE((caddr_t)rbuf, M_TEMP); +nfsmout: + if (vp) + vrele(vp); + return(error); +} + +/* + * nfs commit service + */ +int +nfsrv_commit(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + struct vattr bfor, aft; + struct vnode *vp = NULL; + nfsfh_t nfh; + fhandle_t *fhp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + u_quad_t frev, off; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + cache = 0; +#endif + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + + /* + * XXX At this time VOP_FSYNC() does not accept offset and byte + * count parameters, so these arguments are useless (someday maybe). + */ + off = fxdr_hyper(tl); + tl += 2; + cnt = fxdr_unsigned(int, *tl); + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); + error = 0; + goto nfsmout; + } + for_ret = VOP_GETATTR(vp, &bfor, cred, procp); + + if (cnt > MAX_COMMIT_COUNT) { + /* + * Give up and do the whole thing + */ + if (vp->v_object && + (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { + vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); + } + error = VOP_FSYNC(vp, cred, MNT_WAIT, procp); + } else { + /* + * Locate and synchronously write any buffers that fall + * into the requested range. Note: we are assuming that + * f_iosize is a power of 2. + */ + int iosize = vp->v_mount->mnt_stat.f_iosize; + int iomask = iosize - 1; + int s; + daddr_t lblkno; + + /* + * Align to iosize boundry, super-align to page boundry. + */ + if (off & iomask) { + cnt += off & iomask; + off &= ~(u_quad_t)iomask; + } + if (off & PAGE_MASK) { + cnt += off & PAGE_MASK; + off &= ~(u_quad_t)PAGE_MASK; + } + lblkno = off / iosize; + + if (vp->v_object && + (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { + vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC); + } + + s = splbio(); + while (cnt > 0) { + struct buf *bp; + + /* + * If we have a buffer and it is marked B_DELWRI we + * have to lock and write it. Otherwise the prior + * write is assumed to have already been committed. + */ + if ((bp = gbincore(vp, lblkno)) != NULL && (bp->b_flags & B_DELWRI)) { + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { + BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL); + continue; /* retry */ + } + bremfree(bp); + bp->b_flags &= ~B_ASYNC; + VOP_BWRITE(bp->b_vp, bp); + ++nfs_commit_miss; + } + ++nfs_commit_blks; + if (cnt < iosize) + break; + cnt -= iosize; + ++lblkno; + } + splx(s); + } + + aft_ret = VOP_GETATTR(vp, &aft, cred, procp); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); + nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); + if (!error) { + nfsm_build(tl, u_int32_t *, NFSX_V3WRITEVERF); + if (nfsver.tv_sec == 0) + nfsver = boottime; + *tl++ = txdr_unsigned(nfsver.tv_sec); + *tl = txdr_unsigned(nfsver.tv_usec); + } else { + error = 0; + } +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * nfs statfs service + */ +int +nfsrv_statfs(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register struct statfs *sf; + register struct nfs_statfs *sfp; + register u_int32_t *tl; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, getret = 1; + int v3 = (nfsd->nd_flag & ND_NFSV3); + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp = NULL; + struct vattr at; + nfsfh_t nfh; + fhandle_t *fhp; + struct statfs statfs; + u_quad_t frev, tval; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + cache = 0; +#endif + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + sf = &statfs; + error = VFS_STATFS(vp->v_mount, sf, procp); + getret = VOP_GETATTR(vp, &at, cred, procp); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); + if (v3) + nfsm_srvpostop_attr(getret, &at); + if (error) { + error = 0; + goto nfsmout; + } + nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); + if (v3) { + tval = (u_quad_t)sf->f_blocks; + tval *= (u_quad_t)sf->f_bsize; + txdr_hyper(tval, &sfp->sf_tbytes); + tval = (u_quad_t)sf->f_bfree; + tval *= (u_quad_t)sf->f_bsize; + txdr_hyper(tval, &sfp->sf_fbytes); + tval = (u_quad_t)sf->f_bavail; + tval *= (u_quad_t)sf->f_bsize; + txdr_hyper(tval, &sfp->sf_abytes); + sfp->sf_tfiles.nfsuquad[0] = 0; + sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files); + sfp->sf_ffiles.nfsuquad[0] = 0; + sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); + sfp->sf_afiles.nfsuquad[0] = 0; + sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); + sfp->sf_invarsec = 0; + } else { + sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); + sfp->sf_bsize = txdr_unsigned(sf->f_bsize); + sfp->sf_blocks = txdr_unsigned(sf->f_blocks); + sfp->sf_bfree = txdr_unsigned(sf->f_bfree); + sfp->sf_bavail = txdr_unsigned(sf->f_bavail); + } +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * nfs fsinfo service + */ +int +nfsrv_fsinfo(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register u_int32_t *tl; + register struct nfsv3_fsinfo *sip; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, getret = 1, pref; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp = NULL; + struct vattr at; + nfsfh_t nfh; + fhandle_t *fhp; + u_quad_t frev, maxfsize; + struct statfs sb; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + cache = 0; +#endif + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + + /* XXX Try to make a guess on the max file size. */ + VFS_STATFS(vp->v_mount, &sb, procp); + maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1; + + getret = VOP_GETATTR(vp, &at, cred, procp); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); + nfsm_srvpostop_attr(getret, &at); + nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO); + + /* + * XXX + * There should be file system VFS OP(s) to get this information. + * For now, assume ufs. + */ + if (slp->ns_so->so_type == SOCK_DGRAM) + pref = NFS_MAXDGRAMDATA; + else + pref = NFS_MAXDATA; + sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA); + sip->fs_rtpref = txdr_unsigned(pref); + sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE); + sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA); + sip->fs_wtpref = txdr_unsigned(pref); + sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE); + sip->fs_dtpref = txdr_unsigned(pref); + txdr_hyper(maxfsize, &sip->fs_maxfilesize); + sip->fs_timedelta.nfsv3_sec = 0; + sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1); + sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK | + NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | + NFSV3FSINFO_CANSETTIME); +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * nfs pathconf service + */ +int +nfsrv_pathconf(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; + struct sockaddr *nam = nfsd->nd_nam; + caddr_t dpos = nfsd->nd_dpos; + struct ucred *cred = &nfsd->nd_cr; + register u_int32_t *tl; + register struct nfsv3_pathconf *pc; + register int32_t t1; + caddr_t bpos; + int error = 0, rdonly, cache, getret = 1; + register_t linkmax, namemax, chownres, notrunc; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp = NULL; + struct vattr at; + nfsfh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + cache = 0; +#endif + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, + &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + if (error) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + error = 0; + goto nfsmout; + } + error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); + if (!error) + error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); + if (!error) + error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres); + if (!error) + error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); + getret = VOP_GETATTR(vp, &at, cred, procp); + vput(vp); + vp = NULL; + nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); + nfsm_srvpostop_attr(getret, &at); + if (error) { + error = 0; + goto nfsmout; + } + nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); + + pc->pc_linkmax = txdr_unsigned(linkmax); + pc->pc_namemax = txdr_unsigned(namemax); + pc->pc_notrunc = txdr_unsigned(notrunc); + pc->pc_chownrestricted = txdr_unsigned(chownres); + + /* + * These should probably be supported by VOP_PATHCONF(), but + * until msdosfs is exportable (why would you want to?), the + * Unix defaults should be ok. + */ + pc->pc_caseinsensitive = nfs_false; + pc->pc_casepreserving = nfs_true; +nfsmout: + if (vp) + vput(vp); + return(error); +} + +/* + * Null operation, used by clients to ping server + */ +/* ARGSUSED */ +int +nfsrv_null(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep; + caddr_t bpos; + int error = NFSERR_RETVOID, cache; + struct mbuf *mb, *mreq; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + cache = 0; +#endif + nfsm_reply(0); + nfsm_srvdone; +} + +/* + * No operation, used for obsolete procedures + */ +/* ARGSUSED */ +int +nfsrv_noop(nfsd, slp, procp, mrq) + struct nfsrv_descript *nfsd; + struct nfssvc_sock *slp; + struct proc *procp; + struct mbuf **mrq; +{ + struct mbuf *mrep = nfsd->nd_mrep; + caddr_t bpos; + int error, cache; + struct mbuf *mb, *mreq; + u_quad_t frev; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); +#ifndef nolint + cache = 0; +#endif + if (nfsd->nd_repstat) + error = nfsd->nd_repstat; + else + error = EPROCUNAVAIL; + nfsm_reply(0); + nfsm_srvdone; +} + +/* + * Perform access checking for vnodes obtained from file handles that would + * refer to files already opened by a Unix client. You cannot just use + * vn_writechk() and VOP_ACCESS() for two reasons. + * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case + * 2 - The owner is to be given access irrespective of mode bits for some + * operations, so that processes that chmod after opening a file don't + * break. I don't like this because it opens a security hole, but since + * the nfs server opens a security hole the size of a barn door anyhow, + * what the heck. + * + * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS() + * will return EPERM instead of EACCESS. EPERM is always an error. + */ +static int +nfsrv_access(vp, flags, cred, rdonly, p, override) + register struct vnode *vp; + int flags; + register struct ucred *cred; + int rdonly; + struct proc *p; + int override; +{ + struct vattr vattr; + int error; + + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); + if (flags & VWRITE) { + /* Just vn_writechk() changed to check rdonly */ + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket or a block or character + * device resident on the file system. + */ + if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } + /* + * If there's shared text associated with + * the inode, we can't allow writing. + */ + if (vp->v_flag & VTEXT) + return (ETXTBSY); + } + error = VOP_GETATTR(vp, &vattr, cred, p); + if (error) + return (error); + error = VOP_ACCESS(vp, flags, cred, p); + /* + * Allow certain operations for the owner (reads and writes + * on files that are already open). + */ + if (override && error == EACCES && cred->cr_uid == vattr.va_uid) + error = 0; + return error; +} +#endif /* NFS_NOSERVER */ + diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c new file mode 100644 index 0000000..41fd303 --- /dev/null +++ b/sys/nfs/nfs_socket.c @@ -0,0 +1,2284 @@ +/* + * Copyright (c) 1989, 1991, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + +/* + * Socket operations for use by nfs + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/vnode.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/syslog.h> +#include <sys/tprintf.h> +#include <sys/sysctl.h> +#include <sys/signalvar.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsmount.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsrtt.h> +#include <nfs/nqnfs.h> + +#define TRUE 1 +#define FALSE 0 + +/* + * Estimate rto for an nfs rpc sent via. an unreliable datagram. + * Use the mean and mean deviation of rtt for the appropriate type of rpc + * for the frequent rpcs and a default for the others. + * The justification for doing "other" this way is that these rpcs + * happen so infrequently that timer est. would probably be stale. + * Also, since many of these rpcs are + * non-idempotent, a conservative timeout is desired. + * getattr, lookup - A+2D + * read, write - A+4D + * other - nm_timeo + */ +#define NFS_RTO(n, t) \ + ((t) == 0 ? (n)->nm_timeo : \ + ((t) < 3 ? \ + (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ + ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) +#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] +#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] +/* + * External data, mostly RPC constants in XDR form + */ +extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, + rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, + rpc_auth_kerb; +extern u_int32_t nfs_prog, nqnfs_prog; +extern time_t nqnfsstarttime; +extern struct nfsstats nfsstats; +extern int nfsv3_procid[NFS_NPROCS]; +extern int nfs_ticks; + +/* + * Defines which timer to use for the procnum. + * 0 - default + * 1 - getattr + * 2 - lookup + * 3 - read + * 4 - write + */ +static int proct[NFS_NPROCS] = { + 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, + 0, 0, 0, +}; + +static int nfs_realign_test; +static int nfs_realign_count; + +SYSCTL_DECL(_vfs_nfs); + +SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RD, &nfs_realign_test, 0, ""); +SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RD, &nfs_realign_count, 0, ""); + + +/* + * There is a congestion window for outstanding rpcs maintained per mount + * point. The cwnd size is adjusted in roughly the way that: + * Van Jacobson, Congestion avoidance and Control, In "Proceedings of + * SIGCOMM '88". ACM, August 1988. + * describes for TCP. The cwnd size is chopped in half on a retransmit timeout + * and incremented by 1/cwnd when each rpc reply is received and a full cwnd + * of rpcs is in progress. + * (The sent count and cwnd are scaled for integer arith.) + * Variants of "slow start" were tried and were found to be too much of a + * performance hit (ave. rtt 3 times larger), + * I suspect due to the large rtt that nfs rpcs have. + */ +#define NFS_CWNDSCALE 256 +#define NFS_MAXCWND (NFS_CWNDSCALE * 32) +static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; +int nfsrtton = 0; +struct nfsrtt nfsrtt; +struct callout_handle nfs_timer_handle; + +static int nfs_msg __P((struct proc *,char *,char *)); +static int nfs_rcvlock __P((struct nfsreq *)); +static void nfs_rcvunlock __P((struct nfsreq *)); +static void nfs_realign __P((struct mbuf **pm, int hsiz)); +static int nfs_receive __P((struct nfsreq *rep, struct sockaddr **aname, + struct mbuf **mp)); +static void nfs_softterm __P((struct nfsreq *rep)); +static int nfs_reconnect __P((struct nfsreq *rep)); +#ifndef NFS_NOSERVER +static int nfsrv_getstream __P((struct nfssvc_sock *,int)); + +int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, + struct nfssvc_sock *slp, + struct proc *procp, + struct mbuf **mreqp)) = { + nfsrv_null, + nfsrv_getattr, + nfsrv_setattr, + nfsrv_lookup, + nfsrv3_access, + nfsrv_readlink, + nfsrv_read, + nfsrv_write, + nfsrv_create, + nfsrv_mkdir, + nfsrv_symlink, + nfsrv_mknod, + nfsrv_remove, + nfsrv_rmdir, + nfsrv_rename, + nfsrv_link, + nfsrv_readdir, + nfsrv_readdirplus, + nfsrv_statfs, + nfsrv_fsinfo, + nfsrv_pathconf, + nfsrv_commit, + nqnfsrv_getlease, + nqnfsrv_vacated, + nfsrv_noop, + nfsrv_noop +}; +#endif /* NFS_NOSERVER */ + +/* + * Initialize sockets and congestion for a new NFS connection. + * We do not free the sockaddr if error. + */ +int +nfs_connect(nmp, rep) + register struct nfsmount *nmp; + struct nfsreq *rep; +{ + register struct socket *so; + int s, error, rcvreserve, sndreserve; + struct sockaddr *saddr; + struct sockaddr_in *sin; + struct proc *p = &proc0; /* only used for socreate and sobind */ + + nmp->nm_so = (struct socket *)0; + saddr = nmp->nm_nam; + error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, + nmp->nm_soproto, p); + if (error) + goto bad; + so = nmp->nm_so; + nmp->nm_soflags = so->so_proto->pr_flags; + + /* + * Some servers require that the client port be a reserved port number. + */ + if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { + struct sockopt sopt; + int ip; + struct sockaddr_in ssin; + + bzero(&sopt, sizeof sopt); + ip = IP_PORTRANGE_LOW; + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_IP; + sopt.sopt_name = IP_PORTRANGE; + sopt.sopt_val = (void *)&ip; + sopt.sopt_valsize = sizeof(ip); + sopt.sopt_p = NULL; + error = sosetopt(so, &sopt); + if (error) + goto bad; + bzero(&ssin, sizeof ssin); + sin = &ssin; + sin->sin_len = sizeof (struct sockaddr_in); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + sin->sin_port = htons(0); + error = sobind(so, (struct sockaddr *)sin, p); + if (error) + goto bad; + bzero(&sopt, sizeof sopt); + ip = IP_PORTRANGE_DEFAULT; + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_IP; + sopt.sopt_name = IP_PORTRANGE; + sopt.sopt_val = (void *)&ip; + sopt.sopt_valsize = sizeof(ip); + sopt.sopt_p = NULL; + error = sosetopt(so, &sopt); + if (error) + goto bad; + } + + /* + * Protocols that do not require connections may be optionally left + * unconnected for servers that reply from a port other than NFS_PORT. + */ + if (nmp->nm_flag & NFSMNT_NOCONN) { + if (nmp->nm_soflags & PR_CONNREQUIRED) { + error = ENOTCONN; + goto bad; + } + } else { + error = soconnect(so, nmp->nm_nam, p); + if (error) + goto bad; + + /* + * Wait for the connection to complete. Cribbed from the + * connect system call but with the wait timing out so + * that interruptible mounts don't hang here for a long time. + */ + s = splnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + (void) tsleep((caddr_t)&so->so_timeo, PSOCK, + "nfscon", 2 * hz); + if ((so->so_state & SS_ISCONNECTING) && + so->so_error == 0 && rep && + (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){ + so->so_state &= ~SS_ISCONNECTING; + splx(s); + goto bad; + } + } + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + goto bad; + } + splx(s); + } + if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { + so->so_rcv.sb_timeo = (5 * hz); + so->so_snd.sb_timeo = (5 * hz); + } else { + so->so_rcv.sb_timeo = 0; + so->so_snd.sb_timeo = 0; + } + if (nmp->nm_sotype == SOCK_DGRAM) { + sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; + rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + + NFS_MAXPKTHDR) * 2; + } else if (nmp->nm_sotype == SOCK_SEQPACKET) { + sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; + rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + + NFS_MAXPKTHDR) * 2; + } else { + if (nmp->nm_sotype != SOCK_STREAM) + panic("nfscon sotype"); + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + struct sockopt sopt; + int val; + + bzero(&sopt, sizeof sopt); + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_KEEPALIVE; + sopt.sopt_val = &val; + sopt.sopt_valsize = sizeof val; + val = 1; + sosetopt(so, &sopt); + } + if (so->so_proto->pr_protocol == IPPROTO_TCP) { + struct sockopt sopt; + int val; + + bzero(&sopt, sizeof sopt); + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = &val; + sopt.sopt_valsize = sizeof val; + val = 1; + sosetopt(so, &sopt); + } + sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + + sizeof (u_int32_t)) * 2; + rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + + sizeof (u_int32_t)) * 2; + } + error = soreserve(so, sndreserve, rcvreserve); + if (error) + goto bad; + so->so_rcv.sb_flags |= SB_NOINTR; + so->so_snd.sb_flags |= SB_NOINTR; + + /* Initialize other non-zero congestion variables */ + nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = + nmp->nm_srtt[3] = (NFS_TIMEO << 3); + nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = + nmp->nm_sdrtt[3] = 0; + nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ + nmp->nm_sent = 0; + nmp->nm_timeouts = 0; + return (0); + +bad: + nfs_disconnect(nmp); + return (error); +} + +/* + * Reconnect routine: + * Called when a connection is broken on a reliable protocol. + * - clean up the old socket + * - nfs_connect() again + * - set R_MUSTRESEND for all outstanding requests on mount point + * If this fails the mount point is DEAD! + * nb: Must be called with the nfs_sndlock() set on the mount point. + */ +static int +nfs_reconnect(rep) + register struct nfsreq *rep; +{ + register struct nfsreq *rp; + register struct nfsmount *nmp = rep->r_nmp; + int error; + + nfs_disconnect(nmp); + while ((error = nfs_connect(nmp, rep)) != 0) { + if (error == EINTR || error == ERESTART) + return (EINTR); + (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); + } + + /* + * Loop through outstanding request list and fix up all requests + * on old socket. + */ + for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) { + if (rp->r_nmp == nmp) + rp->r_flags |= R_MUSTRESEND; + } + return (0); +} + +/* + * NFS disconnect. Clean up and unlink. + */ +void +nfs_disconnect(nmp) + register struct nfsmount *nmp; +{ + register struct socket *so; + + if (nmp->nm_so) { + so = nmp->nm_so; + nmp->nm_so = (struct socket *)0; + soshutdown(so, 2); + soclose(so); + } +} + +void +nfs_safedisconnect(nmp) + struct nfsmount *nmp; +{ + struct nfsreq dummyreq; + + bzero(&dummyreq, sizeof(dummyreq)); + dummyreq.r_nmp = nmp; + nfs_rcvlock(&dummyreq); + nfs_disconnect(nmp); + nfs_rcvunlock(&dummyreq); +} + +/* + * This is the nfs send routine. For connection based socket types, it + * must be called with an nfs_sndlock() on the socket. + * "rep == NULL" indicates that it has been called from a server. + * For the client side: + * - return EINTR if the RPC is terminated, 0 otherwise + * - set R_MUSTRESEND if the send fails for any reason + * - do any cleanup required by recoverable socket errors (?) + * For the server side: + * - return EINTR or ERESTART if interrupted by a signal + * - return EPIPE if a connection is lost for connection based sockets (TCP...) + * - do any cleanup required by recoverable socket errors (?) + */ +int +nfs_send(so, nam, top, rep) + register struct socket *so; + struct sockaddr *nam; + register struct mbuf *top; + struct nfsreq *rep; +{ + struct sockaddr *sendnam; + int error, soflags, flags; + + if (rep) { + if (rep->r_flags & R_SOFTTERM) { + m_freem(top); + return (EINTR); + } + if ((so = rep->r_nmp->nm_so) == NULL) { + rep->r_flags |= R_MUSTRESEND; + m_freem(top); + return (0); + } + rep->r_flags &= ~R_MUSTRESEND; + soflags = rep->r_nmp->nm_soflags; + } else + soflags = so->so_proto->pr_flags; + if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) + sendnam = (struct sockaddr *)0; + else + sendnam = nam; + if (so->so_type == SOCK_SEQPACKET) + flags = MSG_EOR; + else + flags = 0; + + error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0, + flags, curproc /*XXX*/); + /* + * ENOBUFS for dgram sockets is transient and non fatal. + * No need to log, and no need to break a soft mount. + */ + if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { + error = 0; + if (rep) /* do backoff retransmit on client */ + rep->r_flags |= R_MUSTRESEND; + } + + if (error) { + if (rep) { + log(LOG_INFO, "nfs send error %d for server %s\n",error, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + /* + * Deal with errors for the client side. + */ + if (rep->r_flags & R_SOFTTERM) + error = EINTR; + else + rep->r_flags |= R_MUSTRESEND; + } else + log(LOG_INFO, "nfsd send error %d\n", error); + + /* + * Handle any recoverable (soft) socket errors here. (?) + */ + if (error != EINTR && error != ERESTART && + error != EWOULDBLOCK && error != EPIPE) + error = 0; + } + return (error); +} + +/* + * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all + * done by soreceive(), but for SOCK_STREAM we must deal with the Record + * Mark and consolidate the data into a new mbuf list. + * nb: Sometimes TCP passes the data up to soreceive() in long lists of + * small mbufs. + * For SOCK_STREAM we must be very careful to read an entire record once + * we have read any of it, even if the system call has been interrupted. + */ +static int +nfs_receive(rep, aname, mp) + register struct nfsreq *rep; + struct sockaddr **aname; + struct mbuf **mp; +{ + register struct socket *so; + struct uio auio; + struct iovec aio; + register struct mbuf *m; + struct mbuf *control; + u_int32_t len; + struct sockaddr **getnam; + int error, sotype, rcvflg; + struct proc *p = curproc; /* XXX */ + + /* + * Set up arguments for soreceive() + */ + *mp = (struct mbuf *)0; + *aname = (struct sockaddr *)0; + sotype = rep->r_nmp->nm_sotype; + + /* + * For reliable protocols, lock against other senders/receivers + * in case a reconnect is necessary. + * For SOCK_STREAM, first get the Record Mark to find out how much + * more there is to get. + * We must lock the socket against other receivers + * until we have an entire rpc request/reply. + */ + if (sotype != SOCK_DGRAM) { + error = nfs_sndlock(rep); + if (error) + return (error); +tryagain: + /* + * Check for fatal errors and resending request. + */ + /* + * Ugh: If a reconnect attempt just happened, nm_so + * would have changed. NULL indicates a failed + * attempt that has essentially shut down this + * mount point. + */ + if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { + nfs_sndunlock(rep); + return (EINTR); + } + so = rep->r_nmp->nm_so; + if (!so) { + error = nfs_reconnect(rep); + if (error) { + nfs_sndunlock(rep); + return (error); + } + goto tryagain; + } + while (rep->r_flags & R_MUSTRESEND) { + m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); + nfsstats.rpcretries++; + error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); + if (error) { + if (error == EINTR || error == ERESTART || + (error = nfs_reconnect(rep)) != 0) { + nfs_sndunlock(rep); + return (error); + } + goto tryagain; + } + } + nfs_sndunlock(rep); + if (sotype == SOCK_STREAM) { + aio.iov_base = (caddr_t) &len; + aio.iov_len = sizeof(u_int32_t); + auio.uio_iov = &aio; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_offset = 0; + auio.uio_resid = sizeof(u_int32_t); + auio.uio_procp = p; + do { + rcvflg = MSG_WAITALL; + error = so->so_proto->pr_usrreqs->pru_soreceive + (so, (struct sockaddr **)0, &auio, + (struct mbuf **)0, (struct mbuf **)0, + &rcvflg); + if (error == EWOULDBLOCK && rep) { + if (rep->r_flags & R_SOFTTERM) + return (EINTR); + } + } while (error == EWOULDBLOCK); + if (!error && auio.uio_resid > 0) { + /* + * Don't log a 0 byte receive; it means + * that the socket has been closed, and + * can happen during normal operation + * (forcible unmount or Solaris server). + */ + if (auio.uio_resid != sizeof (u_int32_t)) + log(LOG_INFO, + "short receive (%d/%d) from nfs server %s\n", + (int)(sizeof(u_int32_t) - auio.uio_resid), + (int)sizeof(u_int32_t), + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = EPIPE; + } + if (error) + goto errout; + len = ntohl(len) & ~0x80000000; + /* + * This is SERIOUS! We are out of sync with the sender + * and forcing a disconnect/reconnect is all I can do. + */ + if (len > NFS_MAXPACKET) { + log(LOG_ERR, "%s (%d) from nfs server %s\n", + "impossible packet length", + len, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = EFBIG; + goto errout; + } + auio.uio_resid = len; + do { + rcvflg = MSG_WAITALL; + error = so->so_proto->pr_usrreqs->pru_soreceive + (so, (struct sockaddr **)0, + &auio, mp, (struct mbuf **)0, &rcvflg); + } while (error == EWOULDBLOCK || error == EINTR || + error == ERESTART); + if (!error && auio.uio_resid > 0) { + if (len != auio.uio_resid) + log(LOG_INFO, + "short receive (%d/%d) from nfs server %s\n", + len - auio.uio_resid, len, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = EPIPE; + } + } else { + /* + * NB: Since uio_resid is big, MSG_WAITALL is ignored + * and soreceive() will return when it has either a + * control msg or a data msg. + * We have no use for control msg., but must grab them + * and then throw them away so we know what is going + * on. + */ + auio.uio_resid = len = 100000000; /* Anything Big */ + auio.uio_procp = p; + do { + rcvflg = 0; + error = so->so_proto->pr_usrreqs->pru_soreceive + (so, (struct sockaddr **)0, + &auio, mp, &control, &rcvflg); + if (control) + m_freem(control); + if (error == EWOULDBLOCK && rep) { + if (rep->r_flags & R_SOFTTERM) + return (EINTR); + } + } while (error == EWOULDBLOCK || + (!error && *mp == NULL && control)); + if ((rcvflg & MSG_EOR) == 0) + printf("Egad!!\n"); + if (!error && *mp == NULL) + error = EPIPE; + len -= auio.uio_resid; + } +errout: + if (error && error != EINTR && error != ERESTART) { + m_freem(*mp); + *mp = (struct mbuf *)0; + if (error != EPIPE) + log(LOG_INFO, + "receive error %d from nfs server %s\n", + error, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = nfs_sndlock(rep); + if (!error) + error = nfs_reconnect(rep); + if (!error) + goto tryagain; + else + nfs_sndunlock(rep); + } + } else { + if ((so = rep->r_nmp->nm_so) == NULL) + return (EACCES); + if (so->so_state & SS_ISCONNECTED) + getnam = (struct sockaddr **)0; + else + getnam = aname; + auio.uio_resid = len = 1000000; + auio.uio_procp = p; + do { + rcvflg = 0; + error = so->so_proto->pr_usrreqs->pru_soreceive + (so, getnam, &auio, mp, + (struct mbuf **)0, &rcvflg); + if (error == EWOULDBLOCK && + (rep->r_flags & R_SOFTTERM)) + return (EINTR); + } while (error == EWOULDBLOCK); + len -= auio.uio_resid; + } + if (error) { + m_freem(*mp); + *mp = (struct mbuf *)0; + } + /* + * Search for any mbufs that are not a multiple of 4 bytes long + * or with m_data not longword aligned. + * These could cause pointer alignment problems, so copy them to + * well aligned mbufs. + */ + nfs_realign(mp, 5 * NFSX_UNSIGNED); + return (error); +} + +/* + * Implement receipt of reply on a socket. + * We must search through the list of received datagrams matching them + * with outstanding requests using the xid, until ours is found. + */ +/* ARGSUSED */ +int +nfs_reply(myrep) + struct nfsreq *myrep; +{ + register struct nfsreq *rep; + register struct nfsmount *nmp = myrep->r_nmp; + register int32_t t1; + struct mbuf *mrep, *md; + struct sockaddr *nam; + u_int32_t rxid, *tl; + caddr_t dpos, cp2; + int error; + + /* + * Loop around until we get our own reply + */ + for (;;) { + /* + * Lock against other receivers so that I don't get stuck in + * sbwait() after someone else has received my reply for me. + * Also necessary for connection based protocols to avoid + * race conditions during a reconnect. + * If nfs_rcvlock() returns EALREADY, that means that + * the reply has already been recieved by another + * process and we can return immediately. In this + * case, the lock is not taken to avoid races with + * other processes. + */ + error = nfs_rcvlock(myrep); + if (error == EALREADY) + return (0); + if (error) + return (error); + /* + * Get the next Rpc reply off the socket + */ + error = nfs_receive(myrep, &nam, &mrep); + nfs_rcvunlock(myrep); + if (error) { + + /* + * Ignore routing errors on connectionless protocols?? + */ + if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { + nmp->nm_so->so_error = 0; + if (myrep->r_flags & R_GETONEREP) + return (0); + continue; + } + return (error); + } + if (nam) + FREE(nam, M_SONAME); + + /* + * Get the xid and check that it is an rpc reply + */ + md = mrep; + dpos = mtod(md, caddr_t); + nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); + rxid = *tl++; + if (*tl != rpc_reply) { +#ifndef NFS_NOSERVER + if (nmp->nm_flag & NFSMNT_NQNFS) { + if (nqnfs_callback(nmp, mrep, md, dpos)) + nfsstats.rpcinvalid++; + } else { + nfsstats.rpcinvalid++; + m_freem(mrep); + } +#else + nfsstats.rpcinvalid++; + m_freem(mrep); +#endif +nfsmout: + if (myrep->r_flags & R_GETONEREP) + return (0); + continue; + } + + /* + * Loop through the request list to match up the reply + * Iff no match, just drop the datagram + */ + for (rep = nfs_reqq.tqh_first; rep != 0; + rep = rep->r_chain.tqe_next) { + if (rep->r_mrep == NULL && rxid == rep->r_xid) { + /* Found it.. */ + rep->r_mrep = mrep; + rep->r_md = md; + rep->r_dpos = dpos; + if (nfsrtton) { + struct rttl *rt; + + rt = &nfsrtt.rttl[nfsrtt.pos]; + rt->proc = rep->r_procnum; + rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); + rt->sent = nmp->nm_sent; + rt->cwnd = nmp->nm_cwnd; + rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; + rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; + rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; + getmicrotime(&rt->tstamp); + if (rep->r_flags & R_TIMING) + rt->rtt = rep->r_rtt; + else + rt->rtt = 1000000; + nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; + } + /* + * Update congestion window. + * Do the additive increase of + * one rpc/rtt. + */ + if (nmp->nm_cwnd <= nmp->nm_sent) { + nmp->nm_cwnd += + (NFS_CWNDSCALE * NFS_CWNDSCALE + + (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; + if (nmp->nm_cwnd > NFS_MAXCWND) + nmp->nm_cwnd = NFS_MAXCWND; + } + if (rep->r_flags & R_SENT) { + rep->r_flags &= ~R_SENT; + nmp->nm_sent -= NFS_CWNDSCALE; + } + /* + * Update rtt using a gain of 0.125 on the mean + * and a gain of 0.25 on the deviation. + */ + if (rep->r_flags & R_TIMING) { + /* + * Since the timer resolution of + * NFS_HZ is so course, it can often + * result in r_rtt == 0. Since + * r_rtt == N means that the actual + * rtt is between N+dt and N+2-dt ticks, + * add 1. + */ + t1 = rep->r_rtt + 1; + t1 -= (NFS_SRTT(rep) >> 3); + NFS_SRTT(rep) += t1; + if (t1 < 0) + t1 = -t1; + t1 -= (NFS_SDRTT(rep) >> 2); + NFS_SDRTT(rep) += t1; + } + nmp->nm_timeouts = 0; + break; + } + } + /* + * If not matched to a request, drop it. + * If it's mine, get out. + */ + if (rep == 0) { + nfsstats.rpcunexpected++; + m_freem(mrep); + } else if (rep == myrep) { + if (rep->r_mrep == NULL) + panic("nfsreply nil"); + return (0); + } + if (myrep->r_flags & R_GETONEREP) + return (0); + } +} + +/* + * nfs_request - goes something like this + * - fill in request struct + * - links it into list + * - calls nfs_send() for first transmit + * - calls nfs_receive() to get reply + * - break down rpc header and return with nfs reply pointed to + * by mrep or error + * nb: always frees up mreq mbuf list + */ +int +nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) + struct vnode *vp; + struct mbuf *mrest; + int procnum; + struct proc *procp; + struct ucred *cred; + struct mbuf **mrp; + struct mbuf **mdp; + caddr_t *dposp; +{ + register struct mbuf *mrep, *m2; + register struct nfsreq *rep; + register u_int32_t *tl; + register int i; + struct nfsmount *nmp; + struct mbuf *m, *md, *mheadend; + struct nfsnode *np; + char nickv[RPCX_NICKVERF]; + time_t reqtime, waituntil; + caddr_t dpos, cp2; + int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; + int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; + int verf_len, verf_type; + u_int32_t xid; + u_quad_t frev; + char *auth_str, *verf_str; + NFSKERBKEY_T key; /* save session key */ + + nmp = VFSTONFS(vp->v_mount); + MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); + rep->r_nmp = nmp; + rep->r_vp = vp; + rep->r_procp = procp; + rep->r_procnum = procnum; + i = 0; + m = mrest; + while (m) { + i += m->m_len; + m = m->m_next; + } + mrest_len = i; + + /* + * Get the RPC header with authorization. + */ +kerbauth: + verf_str = auth_str = (char *)0; + if (nmp->nm_flag & NFSMNT_KERB) { + verf_str = nickv; + verf_len = sizeof (nickv); + auth_type = RPCAUTH_KERB4; + bzero((caddr_t)key, sizeof (key)); + if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, + &auth_len, verf_str, verf_len)) { + error = nfs_getauth(nmp, rep, cred, &auth_str, + &auth_len, verf_str, &verf_len, key); + if (error) { + free((caddr_t)rep, M_NFSREQ); + m_freem(mrest); + return (error); + } + } + } else { + auth_type = RPCAUTH_UNIX; + if (cred->cr_ngroups < 1) + panic("nfsreq nogrps"); + auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? + nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + + 5 * NFSX_UNSIGNED; + } + m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, + auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); + if (auth_str) + free(auth_str, M_TEMP); + + /* + * For stream protocols, insert a Sun RPC Record Mark. + */ + if (nmp->nm_sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_int32_t *) = htonl(0x80000000 | + (m->m_pkthdr.len - NFSX_UNSIGNED)); + } + rep->r_mreq = m; + rep->r_xid = xid; +tryagain: + if (nmp->nm_flag & NFSMNT_SOFT) + rep->r_retry = nmp->nm_retry; + else + rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ + rep->r_rtt = rep->r_rexmit = 0; + if (proct[procnum] > 0) + rep->r_flags = R_TIMING; + else + rep->r_flags = 0; + rep->r_mrep = NULL; + + /* + * Do the client side RPC. + */ + nfsstats.rpcrequests++; + /* + * Chain request into list of outstanding requests. Be sure + * to put it LAST so timer finds oldest requests first. + */ + s = splsoftclock(); + TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); + + /* Get send time for nqnfs */ + reqtime = time_second; + + /* + * If backing off another request or avoiding congestion, don't + * send this one now but let timer do it. If not timing a request, + * do it now. + */ + if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || + (nmp->nm_flag & NFSMNT_DUMBTIMR) || + nmp->nm_sent < nmp->nm_cwnd)) { + splx(s); + if (nmp->nm_soflags & PR_CONNREQUIRED) + error = nfs_sndlock(rep); + if (!error) { + m2 = m_copym(m, 0, M_COPYALL, M_WAIT); + error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep); + if (nmp->nm_soflags & PR_CONNREQUIRED) + nfs_sndunlock(rep); + } + if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { + nmp->nm_sent += NFS_CWNDSCALE; + rep->r_flags |= R_SENT; + } + } else { + splx(s); + rep->r_rtt = -1; + } + + /* + * Wait for the reply from our send or the timer's. + */ + if (!error || error == EPIPE) + error = nfs_reply(rep); + + /* + * RPC done, unlink the request. + */ + s = splsoftclock(); + TAILQ_REMOVE(&nfs_reqq, rep, r_chain); + splx(s); + + /* + * Decrement the outstanding request count. + */ + if (rep->r_flags & R_SENT) { + rep->r_flags &= ~R_SENT; /* paranoia */ + nmp->nm_sent -= NFS_CWNDSCALE; + } + + /* + * If there was a successful reply and a tprintf msg. + * tprintf a response. + */ + if (!error && (rep->r_flags & R_TPRINTFMSG)) + nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, + "is alive again"); + mrep = rep->r_mrep; + md = rep->r_md; + dpos = rep->r_dpos; + if (error) { + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + return (error); + } + + /* + * break down the rpc header and check if ok + */ + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + if (*tl++ == rpc_msgdenied) { + if (*tl == rpc_mismatch) + error = EOPNOTSUPP; + else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { + if (!failed_auth) { + failed_auth++; + mheadend->m_next = (struct mbuf *)0; + m_freem(mrep); + m_freem(rep->r_mreq); + goto kerbauth; + } else + error = EAUTH; + } else + error = EACCES; + m_freem(mrep); + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + return (error); + } + + /* + * Grab any Kerberos verifier, otherwise just throw it away. + */ + verf_type = fxdr_unsigned(int, *tl++); + i = fxdr_unsigned(int32_t, *tl); + if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { + error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); + if (error) + goto nfsmout; + } else if (i > 0) + nfsm_adv(nfsm_rndup(i)); + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + /* 0 == ok */ + if (*tl == 0) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + error = fxdr_unsigned(int, *tl); + if ((nmp->nm_flag & NFSMNT_NFSV3) && + error == NFSERR_TRYLATER) { + m_freem(mrep); + error = 0; + waituntil = time_second + trylater_delay; + while (time_second < waituntil) + (void) tsleep((caddr_t)&lbolt, + PSOCK, "nqnfstry", 0); + trylater_delay *= nfs_backoff[trylater_cnt]; + if (trylater_cnt < 7) + trylater_cnt++; + goto tryagain; + } + + /* + * If the File Handle was stale, invalidate the + * lookup cache, just in case. + */ + if (error == ESTALE) + cache_purge(vp); + if (nmp->nm_flag & NFSMNT_NFSV3) { + *mrp = mrep; + *mdp = md; + *dposp = dpos; + error |= NFSERR_RETERR; + } else + m_freem(mrep); + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + return (error); + } + + /* + * For nqnfs, get any lease in reply + */ + if (nmp->nm_flag & NFSMNT_NQNFS) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + if (*tl) { + np = VTONFS(vp); + nqlflag = fxdr_unsigned(int, *tl); + nfsm_dissect(tl, u_int32_t *, 4*NFSX_UNSIGNED); + cachable = fxdr_unsigned(int, *tl++); + reqtime += fxdr_unsigned(int, *tl++); + if (reqtime > time_second) { + frev = fxdr_hyper(tl); + nqnfs_clientlease(nmp, np, nqlflag, + cachable, reqtime, frev); + } + } + } + *mrp = mrep; + *mdp = md; + *dposp = dpos; + m_freem(rep->r_mreq); + FREE((caddr_t)rep, M_NFSREQ); + return (0); + } + m_freem(mrep); + error = EPROTONOSUPPORT; +nfsmout: + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + return (error); +} + +#ifndef NFS_NOSERVER +/* + * Generate the rpc reply header + * siz arg. is used to decide if adding a cluster is worthwhile + */ +int +nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) + int siz; + struct nfsrv_descript *nd; + struct nfssvc_sock *slp; + int err; + int cache; + u_quad_t *frev; + struct mbuf **mrq; + struct mbuf **mbp; + caddr_t *bposp; +{ + register u_int32_t *tl; + register struct mbuf *mreq; + caddr_t bpos; + struct mbuf *mb, *mb2; + + MGETHDR(mreq, M_WAIT, MT_DATA); + mb = mreq; + /* + * If this is a big reply, use a cluster else + * try and leave leading space for the lower level headers. + */ + siz += RPC_REPLYSIZ; + if (siz >= MINCLSIZE) { + MCLGET(mreq, M_WAIT); + } else + mreq->m_data += max_hdr; + tl = mtod(mreq, u_int32_t *); + mreq->m_len = 6 * NFSX_UNSIGNED; + bpos = ((caddr_t)tl) + mreq->m_len; + *tl++ = txdr_unsigned(nd->nd_retxid); + *tl++ = rpc_reply; + if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { + *tl++ = rpc_msgdenied; + if (err & NFSERR_AUTHERR) { + *tl++ = rpc_autherr; + *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); + mreq->m_len -= NFSX_UNSIGNED; + bpos -= NFSX_UNSIGNED; + } else { + *tl++ = rpc_mismatch; + *tl++ = txdr_unsigned(RPC_VER2); + *tl = txdr_unsigned(RPC_VER2); + } + } else { + *tl++ = rpc_msgaccepted; + + /* + * For Kerberos authentication, we must send the nickname + * verifier back, otherwise just RPCAUTH_NULL. + */ + if (nd->nd_flag & ND_KERBFULL) { + register struct nfsuid *nuidp; + struct timeval ktvin, ktvout; + + for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; + nuidp != 0; nuidp = nuidp->nu_hash.le_next) { + if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && + (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), + &nuidp->nu_haddr, nd->nd_nam2))) + break; + } + if (nuidp) { + ktvin.tv_sec = + txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1); + ktvin.tv_usec = + txdr_unsigned(nuidp->nu_timestamp.tv_usec); + + /* + * Encrypt the timestamp in ecb mode using the + * session key. + */ +#ifdef NFSKERB + XXX +#endif + + *tl++ = rpc_auth_kerb; + *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); + *tl = ktvout.tv_sec; + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + *tl++ = ktvout.tv_usec; + *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); + } else { + *tl++ = 0; + *tl++ = 0; + } + } else { + *tl++ = 0; + *tl++ = 0; + } + switch (err) { + case EPROGUNAVAIL: + *tl = txdr_unsigned(RPC_PROGUNAVAIL); + break; + case EPROGMISMATCH: + *tl = txdr_unsigned(RPC_PROGMISMATCH); + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if (nd->nd_flag & ND_NQNFS) { + *tl++ = txdr_unsigned(3); + *tl = txdr_unsigned(3); + } else { + *tl++ = txdr_unsigned(2); + *tl = txdr_unsigned(3); + } + break; + case EPROCUNAVAIL: + *tl = txdr_unsigned(RPC_PROCUNAVAIL); + break; + case EBADRPC: + *tl = txdr_unsigned(RPC_GARBAGE); + break; + default: + *tl = 0; + if (err != NFSERR_RETVOID) { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + if (err) + *tl = txdr_unsigned(nfsrv_errmap(nd, err)); + else + *tl = 0; + } + break; + }; + } + + /* + * For nqnfs, piggyback lease as requested. + */ + if ((nd->nd_flag & ND_NQNFS) && err == 0) { + if (nd->nd_flag & ND_LEASE) { + nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE); + *tl++ = txdr_unsigned(cache); + *tl++ = txdr_unsigned(nd->nd_duration); + txdr_hyper(*frev, tl); + } else { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = 0; + } + } + if (mrq != NULL) + *mrq = mreq; + *mbp = mb; + *bposp = bpos; + if (err != 0 && err != NFSERR_RETVOID) + nfsstats.srvrpc_errs++; + return (0); +} + + +#endif /* NFS_NOSERVER */ +/* + * Nfs timer routine + * Scan the nfsreq list and retranmit any requests that have timed out + * To avoid retransmission attempts on STREAM sockets (in the future) make + * sure to set the r_retry field to 0 (implies nm_retry == 0). + */ +void +nfs_timer(arg) + void *arg; /* never used */ +{ + register struct nfsreq *rep; + register struct mbuf *m; + register struct socket *so; + register struct nfsmount *nmp; + register int timeo; + int s, error; +#ifndef NFS_NOSERVER + static long lasttime = 0; + register struct nfssvc_sock *slp; + u_quad_t cur_usec; +#endif /* NFS_NOSERVER */ + struct proc *p = &proc0; /* XXX for credentials, will break if sleep */ + + s = splnet(); + for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { + nmp = rep->r_nmp; + if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) + continue; + if (nfs_sigintr(nmp, rep, rep->r_procp)) { + nfs_softterm(rep); + continue; + } + if (rep->r_rtt >= 0) { + rep->r_rtt++; + if (nmp->nm_flag & NFSMNT_DUMBTIMR) + timeo = nmp->nm_timeo; + else + timeo = NFS_RTO(nmp, proct[rep->r_procnum]); + if (nmp->nm_timeouts > 0) + timeo *= nfs_backoff[nmp->nm_timeouts - 1]; + if (rep->r_rtt <= timeo) + continue; + if (nmp->nm_timeouts < 8) + nmp->nm_timeouts++; + } + /* + * Check for server not responding + */ + if ((rep->r_flags & R_TPRINTFMSG) == 0 && + rep->r_rexmit > nmp->nm_deadthresh) { + nfs_msg(rep->r_procp, + nmp->nm_mountp->mnt_stat.f_mntfromname, + "not responding"); + rep->r_flags |= R_TPRINTFMSG; + } + if (rep->r_rexmit >= rep->r_retry) { /* too many */ + nfsstats.rpctimeouts++; + nfs_softterm(rep); + continue; + } + if (nmp->nm_sotype != SOCK_DGRAM) { + if (++rep->r_rexmit > NFS_MAXREXMIT) + rep->r_rexmit = NFS_MAXREXMIT; + continue; + } + if ((so = nmp->nm_so) == NULL) + continue; + + /* + * If there is enough space and the window allows.. + * Resend it + * Set r_rtt to -1 in case we fail to send it now. + */ + rep->r_rtt = -1; + if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && + ((nmp->nm_flag & NFSMNT_DUMBTIMR) || + (rep->r_flags & R_SENT) || + nmp->nm_sent < nmp->nm_cwnd) && + (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ + if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, m, (struct sockaddr *)0, + (struct mbuf *)0, p); + else + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, m, nmp->nm_nam, (struct mbuf *)0, + p); + if (error) { + if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) + so->so_error = 0; + } else { + /* + * Iff first send, start timing + * else turn timing off, backoff timer + * and divide congestion window by 2. + */ + if (rep->r_flags & R_SENT) { + rep->r_flags &= ~R_TIMING; + if (++rep->r_rexmit > NFS_MAXREXMIT) + rep->r_rexmit = NFS_MAXREXMIT; + nmp->nm_cwnd >>= 1; + if (nmp->nm_cwnd < NFS_CWNDSCALE) + nmp->nm_cwnd = NFS_CWNDSCALE; + nfsstats.rpcretries++; + } else { + rep->r_flags |= R_SENT; + nmp->nm_sent += NFS_CWNDSCALE; + } + rep->r_rtt = 0; + } + } + } +#ifndef NFS_NOSERVER + /* + * Call the nqnfs server timer once a second to handle leases. + */ + if (lasttime != time_second) { + lasttime = time_second; + nqnfs_serverd(); + } + + /* + * Scan the write gathering queues for writes that need to be + * completed now. + */ + cur_usec = nfs_curusec(); + for (slp = nfssvc_sockhead.tqh_first; slp != 0; + slp = slp->ns_chain.tqe_next) { + if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec) + nfsrv_wakenfsd(slp); + } +#endif /* NFS_NOSERVER */ + splx(s); + nfs_timer_handle = timeout(nfs_timer, (void *)0, nfs_ticks); +} + +/* + * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT). + * The nm_send count is decremented now to avoid deadlocks when the process in + * soreceive() hasn't yet managed to send its own request. + */ + +static void +nfs_softterm(rep) + struct nfsreq *rep; +{ + rep->r_flags |= R_SOFTTERM; + + if (rep->r_flags & R_SENT) { + rep->r_nmp->nm_sent -= NFS_CWNDSCALE; + rep->r_flags &= ~R_SENT; + } +} + +/* + * Test for a termination condition pending on the process. + * This is used for NFSMNT_INT mounts. + */ +int +nfs_sigintr(nmp, rep, p) + struct nfsmount *nmp; + struct nfsreq *rep; + register struct proc *p; +{ + sigset_t tmpset; + + if (rep && (rep->r_flags & R_SOFTTERM)) + return (EINTR); + if (!(nmp->nm_flag & NFSMNT_INT)) + return (0); + if (p == NULL) + return (0); + + tmpset = p->p_siglist; + SIGSETNAND(tmpset, p->p_sigmask); + SIGSETNAND(tmpset, p->p_sigignore); + if (SIGNOTEMPTY(p->p_siglist) && NFSINT_SIGMASK(tmpset)) + return (EINTR); + + return (0); +} + +/* + * Lock a socket against others. + * Necessary for STREAM sockets to ensure you get an entire rpc request/reply + * and also to avoid race conditions between the processes with nfs requests + * in progress when a reconnect is necessary. + */ +int +nfs_sndlock(rep) + struct nfsreq *rep; +{ + register int *statep = &rep->r_nmp->nm_state; + struct proc *p; + int slpflag = 0, slptimeo = 0; + + if (rep) { + p = rep->r_procp; + if (rep->r_nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; + } else + p = (struct proc *)0; + while (*statep & NFSSTA_SNDLOCK) { + if (nfs_sigintr(rep->r_nmp, rep, p)) + return (EINTR); + *statep |= NFSSTA_WANTSND; + (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), + "nfsndlck", slptimeo); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + *statep |= NFSSTA_SNDLOCK; + return (0); +} + +/* + * Unlock the stream socket for others. + */ +void +nfs_sndunlock(rep) + struct nfsreq *rep; +{ + register int *statep = &rep->r_nmp->nm_state; + + if ((*statep & NFSSTA_SNDLOCK) == 0) + panic("nfs sndunlock"); + *statep &= ~NFSSTA_SNDLOCK; + if (*statep & NFSSTA_WANTSND) { + *statep &= ~NFSSTA_WANTSND; + wakeup((caddr_t)statep); + } +} + +static int +nfs_rcvlock(rep) + register struct nfsreq *rep; +{ + register int *statep = &rep->r_nmp->nm_state; + int slpflag, slptimeo = 0; + + if (rep->r_nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; + else + slpflag = 0; + while (*statep & NFSSTA_RCVLOCK) { + if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) + return (EINTR); + *statep |= NFSSTA_WANTRCV; + (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk", + slptimeo); + /* + * If our reply was recieved while we were sleeping, + * then just return without taking the lock to avoid a + * situation where a single iod could 'capture' the + * recieve lock. + */ + if (rep->r_mrep != NULL) + return (EALREADY); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + *statep |= NFSSTA_RCVLOCK; + return (0); +} + +/* + * Unlock the stream socket for others. + */ +static void +nfs_rcvunlock(rep) + register struct nfsreq *rep; +{ + register int *statep = &rep->r_nmp->nm_state; + + if ((*statep & NFSSTA_RCVLOCK) == 0) + panic("nfs rcvunlock"); + *statep &= ~NFSSTA_RCVLOCK; + if (*statep & NFSSTA_WANTRCV) { + *statep &= ~NFSSTA_WANTRCV; + wakeup((caddr_t)statep); + } +} + +/* + * nfs_realign: + * + * Check for badly aligned mbuf data and realign by copying the unaligned + * portion of the data into a new mbuf chain and freeing the portions + * of the old chain that were replaced. + * + * We cannot simply realign the data within the existing mbuf chain + * because the underlying buffers may contain other rpc commands and + * we cannot afford to overwrite them. + * + * We would prefer to avoid this situation entirely. The situation does + * not occur with NFS/UDP and is supposed to only occassionally occur + * with TCP. Use vfs.nfs.realign_count and realign_test to check this. + */ +static void +nfs_realign(pm, hsiz) + register struct mbuf **pm; + int hsiz; +{ + struct mbuf *m; + struct mbuf *n = NULL; + int off = 0; + + ++nfs_realign_test; + + while ((m = *pm) != NULL) { + if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { + MGET(n, M_WAIT, MT_DATA); + if (m->m_len >= MINCLSIZE) { + MCLGET(n, M_WAIT); + } + n->m_len = 0; + break; + } + pm = &m->m_next; + } + + /* + * If n is non-NULL, loop on m copying data, then replace the + * portion of the chain that had to be realigned. + */ + if (n != NULL) { + ++nfs_realign_count; + while (m) { + m_copyback(n, off, m->m_len, mtod(m, caddr_t)); + off += m->m_len; + m = m->m_next; + } + m_freem(*pm); + *pm = n; + } +} + +#ifndef NFS_NOSERVER + +/* + * Parse an RPC request + * - verify it + * - fill in the cred struct. + */ +int +nfs_getreq(nd, nfsd, has_header) + register struct nfsrv_descript *nd; + struct nfsd *nfsd; + int has_header; +{ + register int len, i; + register u_int32_t *tl; + register int32_t t1; + struct uio uio; + struct iovec iov; + caddr_t dpos, cp2, cp; + u_int32_t nfsvers, auth_type; + uid_t nickuid; + int error = 0, nqnfs = 0, ticklen; + struct mbuf *mrep, *md; + register struct nfsuid *nuidp; + struct timeval tvin, tvout; +#if 0 /* until encrypted keys are implemented */ + NFSKERBKEYSCHED_T keys; /* stores key schedule */ +#endif + + mrep = nd->nd_mrep; + md = nd->nd_md; + dpos = nd->nd_dpos; + if (has_header) { + nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED); + nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++); + if (*tl++ != rpc_call) { + m_freem(mrep); + return (EBADRPC); + } + } else + nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED); + nd->nd_repstat = 0; + nd->nd_flag = 0; + if (*tl++ != rpc_vers) { + nd->nd_repstat = ERPCMISMATCH; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + if (*tl != nfs_prog) { + if (*tl == nqnfs_prog) + nqnfs++; + else { + nd->nd_repstat = EPROGUNAVAIL; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + } + tl++; + nfsvers = fxdr_unsigned(u_int32_t, *tl++); + if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) || + (nfsvers != NQNFS_VER3 && nqnfs)) { + nd->nd_repstat = EPROGMISMATCH; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + if (nqnfs) + nd->nd_flag = (ND_NFSV3 | ND_NQNFS); + else if (nfsvers == NFS_VER3) + nd->nd_flag = ND_NFSV3; + nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++); + if (nd->nd_procnum == NFSPROC_NULL) + return (0); + if (nd->nd_procnum >= NFS_NPROCS || + (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) || + (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { + nd->nd_repstat = EPROCUNAVAIL; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + if ((nd->nd_flag & ND_NFSV3) == 0) + nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; + auth_type = *tl++; + len = fxdr_unsigned(int, *tl++); + if (len < 0 || len > RPCAUTH_MAXSIZ) { + m_freem(mrep); + return (EBADRPC); + } + + nd->nd_flag &= ~ND_KERBAUTH; + /* + * Handle auth_unix or auth_kerb. + */ + if (auth_type == rpc_auth_unix) { + len = fxdr_unsigned(int, *++tl); + if (len < 0 || len > NFS_MAXNAMLEN) { + m_freem(mrep); + return (EBADRPC); + } + nfsm_adv(nfsm_rndup(len)); + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred)); + nd->nd_cr.cr_ref = 1; + nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); + nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); + len = fxdr_unsigned(int, *tl); + if (len < 0 || len > RPCAUTH_UNIXGIDS) { + m_freem(mrep); + return (EBADRPC); + } + nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED); + for (i = 1; i <= len; i++) + if (i < NGROUPS) + nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); + else + tl++; + nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); + if (nd->nd_cr.cr_ngroups > 1) + nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); + len = fxdr_unsigned(int, *++tl); + if (len < 0 || len > RPCAUTH_MAXSIZ) { + m_freem(mrep); + return (EBADRPC); + } + if (len > 0) + nfsm_adv(nfsm_rndup(len)); + } else if (auth_type == rpc_auth_kerb) { + switch (fxdr_unsigned(int, *tl++)) { + case RPCAKN_FULLNAME: + ticklen = fxdr_unsigned(int, *tl); + *((u_int32_t *)nfsd->nfsd_authstr) = *tl; + uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; + nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; + if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { + m_freem(mrep); + return (EBADRPC); + } + uio.uio_offset = 0; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; + iov.iov_len = RPCAUTH_MAXSIZ - 4; + nfsm_mtouio(&uio, uio.uio_resid); + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if (*tl++ != rpc_auth_kerb || + fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { + printf("Bad kerb verifier\n"); + nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED); + tl = (u_int32_t *)cp; + if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { + printf("Not fullname kerb verifier\n"); + nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + cp += NFSX_UNSIGNED; + bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED); + nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; + nd->nd_flag |= ND_KERBFULL; + nfsd->nfsd_flag |= NFSD_NEEDAUTH; + break; + case RPCAKN_NICKNAME: + if (len != 2 * NFSX_UNSIGNED) { + printf("Kerb nickname short\n"); + nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + nickuid = fxdr_unsigned(uid_t, *tl); + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if (*tl++ != rpc_auth_kerb || + fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { + printf("Kerb nick verifier bad\n"); + nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + tvin.tv_sec = *tl++; + tvin.tv_usec = *tl; + + for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first; + nuidp != 0; nuidp = nuidp->nu_hash.le_next) { + if (nuidp->nu_cr.cr_uid == nickuid && + (!nd->nd_nam2 || + netaddr_match(NU_NETFAM(nuidp), + &nuidp->nu_haddr, nd->nd_nam2))) + break; + } + if (!nuidp) { + nd->nd_repstat = + (NFSERR_AUTHERR|AUTH_REJECTCRED); + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + + /* + * Now, decrypt the timestamp using the session key + * and validate it. + */ +#ifdef NFSKERB + XXX +#endif + + tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); + tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); + if (nuidp->nu_expire < time_second || + nuidp->nu_timestamp.tv_sec > tvout.tv_sec || + (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && + nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { + nuidp->nu_expire = 0; + nd->nd_repstat = + (NFSERR_AUTHERR|AUTH_REJECTVERF); + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); + nd->nd_flag |= ND_KERBNICK; + }; + } else { + nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + + /* + * For nqnfs, get piggybacked lease request. + */ + if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + nd->nd_flag |= fxdr_unsigned(int, *tl); + if (nd->nd_flag & ND_LEASE) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + nd->nd_duration = fxdr_unsigned(int32_t, *tl); + } else + nd->nd_duration = NQ_MINLEASE; + } else + nd->nd_duration = NQ_MINLEASE; + nd->nd_md = md; + nd->nd_dpos = dpos; + return (0); +nfsmout: + return (error); +} + +#endif + +static int +nfs_msg(p, server, msg) + struct proc *p; + char *server, *msg; +{ + tpr_t tpr; + + if (p) + tpr = tprintf_open(p); + else + tpr = NULL; + tprintf(tpr, "nfs server %s: %s\n", server, msg); + tprintf_close(tpr); + return (0); +} + +#ifndef NFS_NOSERVER +/* + * Socket upcall routine for the nfsd sockets. + * The caddr_t arg is a pointer to the "struct nfssvc_sock". + * Essentially do as much as possible non-blocking, else punt and it will + * be called with M_WAIT from an nfsd. + */ +void +nfsrv_rcv(so, arg, waitflag) + struct socket *so; + void *arg; + int waitflag; +{ + register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; + register struct mbuf *m; + struct mbuf *mp; + struct sockaddr *nam; + struct uio auio; + int flags, error; + + if ((slp->ns_flag & SLP_VALID) == 0) + return; +#ifdef notdef + /* + * Define this to test for nfsds handling this under heavy load. + */ + if (waitflag == M_DONTWAIT) { + slp->ns_flag |= SLP_NEEDQ; goto dorecs; + } +#endif + auio.uio_procp = NULL; + if (so->so_type == SOCK_STREAM) { + /* + * If there are already records on the queue, defer soreceive() + * to an nfsd so that there is feedback to the TCP layer that + * the nfs servers are heavily loaded. + */ + if (STAILQ_FIRST(&slp->ns_rec) && waitflag == M_DONTWAIT) { + slp->ns_flag |= SLP_NEEDQ; + goto dorecs; + } + + /* + * Do soreceive(). + */ + auio.uio_resid = 1000000000; + flags = MSG_DONTWAIT; + error = so->so_proto->pr_usrreqs->pru_soreceive + (so, &nam, &auio, &mp, (struct mbuf **)0, &flags); + if (error || mp == (struct mbuf *)0) { + if (error == EWOULDBLOCK) + slp->ns_flag |= SLP_NEEDQ; + else + slp->ns_flag |= SLP_DISCONN; + goto dorecs; + } + m = mp; + if (slp->ns_rawend) { + slp->ns_rawend->m_next = m; + slp->ns_cc += 1000000000 - auio.uio_resid; + } else { + slp->ns_raw = m; + slp->ns_cc = 1000000000 - auio.uio_resid; + } + while (m->m_next) + m = m->m_next; + slp->ns_rawend = m; + + /* + * Now try and parse record(s) out of the raw stream data. + */ + error = nfsrv_getstream(slp, waitflag); + if (error) { + if (error == EPERM) + slp->ns_flag |= SLP_DISCONN; + else + slp->ns_flag |= SLP_NEEDQ; + } + } else { + do { + auio.uio_resid = 1000000000; + flags = MSG_DONTWAIT; + error = so->so_proto->pr_usrreqs->pru_soreceive + (so, &nam, &auio, &mp, + (struct mbuf **)0, &flags); + if (mp) { + struct nfsrv_rec *rec; + rec = malloc(sizeof(struct nfsrv_rec), + M_NFSRVDESC, waitflag); + if (!rec) { + if (nam) + FREE(nam, M_SONAME); + m_freem(mp); + continue; + } + nfs_realign(&mp, 10 * NFSX_UNSIGNED); + rec->nr_address = nam; + rec->nr_packet = mp; + STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link); + } + if (error) { + if ((so->so_proto->pr_flags & PR_CONNREQUIRED) + && error != EWOULDBLOCK) { + slp->ns_flag |= SLP_DISCONN; + goto dorecs; + } + } + } while (mp); + } + + /* + * Now try and process the request records, non-blocking. + */ +dorecs: + if (waitflag == M_DONTWAIT && + (STAILQ_FIRST(&slp->ns_rec) + || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) + nfsrv_wakenfsd(slp); +} + +/* + * Try and extract an RPC request from the mbuf data list received on a + * stream socket. The "waitflag" argument indicates whether or not it + * can sleep. + */ +static int +nfsrv_getstream(slp, waitflag) + register struct nfssvc_sock *slp; + int waitflag; +{ + register struct mbuf *m, **mpp; + register char *cp1, *cp2; + register int len; + struct mbuf *om, *m2, *recm = NULL; + u_int32_t recmark; + + if (slp->ns_flag & SLP_GETSTREAM) + panic("nfs getstream"); + slp->ns_flag |= SLP_GETSTREAM; + for (;;) { + if (slp->ns_reclen == 0) { + if (slp->ns_cc < NFSX_UNSIGNED) { + slp->ns_flag &= ~SLP_GETSTREAM; + return (0); + } + m = slp->ns_raw; + if (m->m_len >= NFSX_UNSIGNED) { + bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); + m->m_data += NFSX_UNSIGNED; + m->m_len -= NFSX_UNSIGNED; + } else { + cp1 = (caddr_t)&recmark; + cp2 = mtod(m, caddr_t); + while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { + while (m->m_len == 0) { + m = m->m_next; + cp2 = mtod(m, caddr_t); + } + *cp1++ = *cp2++; + m->m_data++; + m->m_len--; + } + } + slp->ns_cc -= NFSX_UNSIGNED; + recmark = ntohl(recmark); + slp->ns_reclen = recmark & ~0x80000000; + if (recmark & 0x80000000) + slp->ns_flag |= SLP_LASTFRAG; + else + slp->ns_flag &= ~SLP_LASTFRAG; + if (slp->ns_reclen > NFS_MAXPACKET) { + slp->ns_flag &= ~SLP_GETSTREAM; + return (EPERM); + } + } + + /* + * Now get the record part. + */ + if (slp->ns_cc == slp->ns_reclen) { + recm = slp->ns_raw; + slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; + slp->ns_cc = slp->ns_reclen = 0; + } else if (slp->ns_cc > slp->ns_reclen) { + len = 0; + m = slp->ns_raw; + om = (struct mbuf *)0; + while (len < slp->ns_reclen) { + if ((len + m->m_len) > slp->ns_reclen) { + m2 = m_copym(m, 0, slp->ns_reclen - len, + waitflag); + if (m2) { + if (om) { + om->m_next = m2; + recm = slp->ns_raw; + } else + recm = m2; + m->m_data += slp->ns_reclen - len; + m->m_len -= slp->ns_reclen - len; + len = slp->ns_reclen; + } else { + slp->ns_flag &= ~SLP_GETSTREAM; + return (EWOULDBLOCK); + } + } else if ((len + m->m_len) == slp->ns_reclen) { + om = m; + len += m->m_len; + m = m->m_next; + recm = slp->ns_raw; + om->m_next = (struct mbuf *)0; + } else { + om = m; + len += m->m_len; + m = m->m_next; + } + } + slp->ns_raw = m; + slp->ns_cc -= len; + slp->ns_reclen = 0; + } else { + slp->ns_flag &= ~SLP_GETSTREAM; + return (0); + } + + /* + * Accumulate the fragments into a record. + */ + mpp = &slp->ns_frag; + while (*mpp) + mpp = &((*mpp)->m_next); + *mpp = recm; + if (slp->ns_flag & SLP_LASTFRAG) { + struct nfsrv_rec *rec; + rec = malloc(sizeof(struct nfsrv_rec), M_NFSRVDESC, waitflag); + if (!rec) { + m_freem(slp->ns_frag); + } else { + nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED); + rec->nr_address = (struct sockaddr *)0; + rec->nr_packet = slp->ns_frag; + STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link); + } + slp->ns_frag = (struct mbuf *)0; + } + } +} + +/* + * Parse an RPC header. + */ +int +nfsrv_dorec(slp, nfsd, ndp) + register struct nfssvc_sock *slp; + struct nfsd *nfsd; + struct nfsrv_descript **ndp; +{ + struct nfsrv_rec *rec; + register struct mbuf *m; + struct sockaddr *nam; + register struct nfsrv_descript *nd; + int error; + + *ndp = NULL; + if ((slp->ns_flag & SLP_VALID) == 0 || !STAILQ_FIRST(&slp->ns_rec)) + return (ENOBUFS); + rec = STAILQ_FIRST(&slp->ns_rec); + STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link); + nam = rec->nr_address; + m = rec->nr_packet; + free(rec, M_NFSRVDESC); + MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript), + M_NFSRVDESC, M_WAITOK); + nd->nd_md = nd->nd_mrep = m; + nd->nd_nam2 = nam; + nd->nd_dpos = mtod(m, caddr_t); + error = nfs_getreq(nd, nfsd, TRUE); + if (error) { + if (nam) { + FREE(nam, M_SONAME); + } + free((caddr_t)nd, M_NFSRVDESC); + return (error); + } + *ndp = nd; + nfsd->nfsd_nd = nd; + return (0); +} + +/* + * Search for a sleeping nfsd and wake it up. + * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the + * running nfsds will go look for the work in the nfssvc_sock list. + */ +void +nfsrv_wakenfsd(slp) + struct nfssvc_sock *slp; +{ + register struct nfsd *nd; + + if ((slp->ns_flag & SLP_VALID) == 0) + return; + for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) { + if (nd->nfsd_flag & NFSD_WAITING) { + nd->nfsd_flag &= ~NFSD_WAITING; + if (nd->nfsd_slp) + panic("nfsd wakeup"); + slp->ns_sref++; + nd->nfsd_slp = slp; + wakeup((caddr_t)nd); + return; + } + } + slp->ns_flag |= SLP_DOREC; + nfsd_head_flag |= NFSD_CHECKSLP; +} +#endif /* NFS_NOSERVER */ diff --git a/sys/nfs/nfs_srvcache.c b/sys/nfs/nfs_srvcache.c new file mode 100644 index 0000000..6f9d42f --- /dev/null +++ b/sys/nfs/nfs_srvcache.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_srvcache.c 8.3 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + +/* + * Reference: Chet Juszczak, "Improving the Performance and Correctness + * of an NFS Server", in Proc. Winter 1989 USENIX Conference, + * pages 53-63. San Diego, February 1989. + */ +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> /* for dup_sockaddr */ + +#include <netinet/in.h> +#ifdef ISO +#include <netiso/iso.h> +#endif +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsrvcache.h> + +#ifndef NFS_NOSERVER +extern struct nfsstats nfsstats; +extern int nfsv2_procid[NFS_NPROCS]; +static long numnfsrvcache; +static long desirednfsrvcache = NFSRVCACHESIZ; + +#define NFSRCHASH(xid) \ + (&nfsrvhashtbl[((xid) + ((xid) >> 24)) & nfsrvhash]) +static LIST_HEAD(nfsrvhash, nfsrvcache) *nfsrvhashtbl; +static TAILQ_HEAD(nfsrvlru, nfsrvcache) nfsrvlruhead; +static u_long nfsrvhash; + +#define TRUE 1 +#define FALSE 0 + +#define NETFAMILY(rp) \ + (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO) + +/* + * Static array that defines which nfs rpc's are nonidempotent + */ +static int nonidempotent[NFS_NPROCS] = { + FALSE, + FALSE, + TRUE, + FALSE, + FALSE, + FALSE, + FALSE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, +}; + +/* True iff the rpc reply is an nfs status ONLY! */ +static int nfsv2_repstat[NFS_NPROCS] = { + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + TRUE, + TRUE, + TRUE, + TRUE, + FALSE, + TRUE, + FALSE, + FALSE, +}; + +/* + * Initialize the server request cache list + */ +void +nfsrv_initcache() +{ + + nfsrvhashtbl = hashinit(desirednfsrvcache, M_NFSD, &nfsrvhash); + TAILQ_INIT(&nfsrvlruhead); +} + +/* + * Look for the request in the cache + * If found then + * return action and optionally reply + * else + * insert it in the cache + * + * The rules are as follows: + * - if in progress, return DROP request + * - if completed within DELAY of the current time, return DROP it + * - if completed a longer time ago return REPLY if the reply was cached or + * return DOIT + * Update/add new request at end of lru list + */ +int +nfsrv_getcache(nd, slp, repp) + register struct nfsrv_descript *nd; + struct nfssvc_sock *slp; + struct mbuf **repp; +{ + register struct nfsrvcache *rp; + struct mbuf *mb; + struct sockaddr_in *saddr; + caddr_t bpos; + int ret; + + /* + * Don't cache recent requests for reliable transport protocols. + * (Maybe we should for the case of a reconnect, but..) + */ + if (!nd->nd_nam2) + return (RC_DOIT); +loop: + for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0; + rp = rp->rc_hash.le_next) { + if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && + netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { + NFS_DPF(RC, ("H%03x", rp->rc_xid & 0xfff)); + if ((rp->rc_flag & RC_LOCKED) != 0) { + rp->rc_flag |= RC_WANTED; + (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); + goto loop; + } + rp->rc_flag |= RC_LOCKED; + /* If not at end of LRU chain, move it there */ + if (rp->rc_lru.tqe_next) { + TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); + TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru); + } + if (rp->rc_state == RC_UNUSED) + panic("nfsrv cache"); + if (rp->rc_state == RC_INPROG) { + nfsstats.srvcache_inproghits++; + ret = RC_DROPIT; + } else if (rp->rc_flag & RC_REPSTATUS) { + nfsstats.srvcache_nonidemdonehits++; + nfs_rephead(0, nd, slp, rp->rc_status, + 0, (u_quad_t *)0, repp, &mb, &bpos); + ret = RC_REPLY; + } else if (rp->rc_flag & RC_REPMBUF) { + nfsstats.srvcache_nonidemdonehits++; + *repp = m_copym(rp->rc_reply, 0, M_COPYALL, + M_WAIT); + ret = RC_REPLY; + } else { + nfsstats.srvcache_idemdonehits++; + rp->rc_state = RC_INPROG; + ret = RC_DOIT; + } + rp->rc_flag &= ~RC_LOCKED; + if (rp->rc_flag & RC_WANTED) { + rp->rc_flag &= ~RC_WANTED; + wakeup((caddr_t)rp); + } + return (ret); + } + } + nfsstats.srvcache_misses++; + NFS_DPF(RC, ("M%03x", nd->nd_retxid & 0xfff)); + if (numnfsrvcache < desirednfsrvcache) { + rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp, + M_NFSD, M_WAITOK); + bzero((char *)rp, sizeof *rp); + numnfsrvcache++; + rp->rc_flag = RC_LOCKED; + } else { + rp = nfsrvlruhead.tqh_first; + while ((rp->rc_flag & RC_LOCKED) != 0) { + rp->rc_flag |= RC_WANTED; + (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); + rp = nfsrvlruhead.tqh_first; + } + rp->rc_flag |= RC_LOCKED; + LIST_REMOVE(rp, rc_hash); + TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); + if (rp->rc_flag & RC_REPMBUF) + m_freem(rp->rc_reply); + if (rp->rc_flag & RC_NAM) + FREE(rp->rc_nam, M_SONAME); + rp->rc_flag &= (RC_LOCKED | RC_WANTED); + } + TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru); + rp->rc_state = RC_INPROG; + rp->rc_xid = nd->nd_retxid; + saddr = (struct sockaddr_in *)nd->nd_nam; + switch (saddr->sin_family) { + case AF_INET: + rp->rc_flag |= RC_INETADDR; + rp->rc_inetaddr = saddr->sin_addr.s_addr; + break; + case AF_ISO: + default: + rp->rc_flag |= RC_NAM; + rp->rc_nam = dup_sockaddr(nd->nd_nam, 1); + break; + }; + rp->rc_proc = nd->nd_procnum; + LIST_INSERT_HEAD(NFSRCHASH(nd->nd_retxid), rp, rc_hash); + rp->rc_flag &= ~RC_LOCKED; + if (rp->rc_flag & RC_WANTED) { + rp->rc_flag &= ~RC_WANTED; + wakeup((caddr_t)rp); + } + return (RC_DOIT); +} + +/* + * Update a request cache entry after the rpc has been done + */ +void +nfsrv_updatecache(nd, repvalid, repmbuf) + register struct nfsrv_descript *nd; + int repvalid; + struct mbuf *repmbuf; +{ + register struct nfsrvcache *rp; + + if (!nd->nd_nam2) + return; +loop: + for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0; + rp = rp->rc_hash.le_next) { + if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && + netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { + NFS_DPF(RC, ("U%03x", rp->rc_xid & 0xfff)); + if ((rp->rc_flag & RC_LOCKED) != 0) { + rp->rc_flag |= RC_WANTED; + (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); + goto loop; + } + rp->rc_flag |= RC_LOCKED; + if (rp->rc_state == RC_DONE) { + /* + * This can occur if the cache is too small. + * Retransmits of the same request aren't + * dropped so we may see the operation + * complete more then once. + */ + if (rp->rc_flag & RC_REPMBUF) { + m_freem(rp->rc_reply); + rp->rc_flag &= ~RC_REPMBUF; + } + } + rp->rc_state = RC_DONE; + /* + * If we have a valid reply update status and save + * the reply for non-idempotent rpc's. + */ + if (repvalid && nonidempotent[nd->nd_procnum]) { + if ((nd->nd_flag & ND_NFSV3) == 0 && + nfsv2_repstat[nfsv2_procid[nd->nd_procnum]]) { + rp->rc_status = nd->nd_repstat; + rp->rc_flag |= RC_REPSTATUS; + } else { + rp->rc_reply = m_copym(repmbuf, + 0, M_COPYALL, M_WAIT); + rp->rc_flag |= RC_REPMBUF; + } + } + rp->rc_flag &= ~RC_LOCKED; + if (rp->rc_flag & RC_WANTED) { + rp->rc_flag &= ~RC_WANTED; + wakeup((caddr_t)rp); + } + return; + } + } + NFS_DPF(RC, ("L%03x", nd->nd_retxid & 0xfff)); +} + +/* + * Clean out the cache. Called when the last nfsd terminates. + */ +void +nfsrv_cleancache() +{ + register struct nfsrvcache *rp, *nextrp; + + for (rp = nfsrvlruhead.tqh_first; rp != 0; rp = nextrp) { + nextrp = rp->rc_lru.tqe_next; + LIST_REMOVE(rp, rc_hash); + TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); + if (rp->rc_flag & RC_REPMBUF) + m_freem(rp->rc_reply); + if (rp->rc_flag & RC_NAM) + free(rp->rc_nam, M_SONAME); + free(rp, M_NFSD); + } + numnfsrvcache = 0; +} + +#endif /* NFS_NOSERVER */ diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c new file mode 100644 index 0000000..e63ba63 --- /dev/null +++ b/sys/nfs/nfs_subs.c @@ -0,0 +1,2272 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 + * $FreeBSD$ + */ + +/* + * These functions support the macros and help fiddle mbuf chains for + * the nfs op functions. They do things like create the rpc header and + * copy data between mbuf chains and uio lists. + */ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/malloc.h> +#include <sys/sysent.h> +#include <sys/syscall.h> +#include <sys/conf.h> + +#include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/vm_zone.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsnode.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsmount.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsrtt.h> + +#include <netinet/in.h> +#ifdef ISO +#include <netiso/iso.h> +#endif + +/* + * Data items converted to xdr at startup, since they are constant + * This is kinda hokey, but may save a little time doing byte swaps + */ +u_int32_t nfs_xdrneg1; +u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, + rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, + rpc_auth_kerb; +u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false; + +/* And other global data */ +static u_int32_t nfs_xid = 0; +static enum vtype nv2tov_type[8]= { + VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON +}; +enum vtype nv3tov_type[8]= { + VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO +}; + +int nfs_ticks; +int nfs_pbuf_freecnt = -1; /* start out unlimited */ + +struct nfs_reqq nfs_reqq; +struct nfssvc_sockhead nfssvc_sockhead; +int nfssvc_sockhead_flag; +struct nfsd_head nfsd_head; +int nfsd_head_flag; +struct nfs_bufq nfs_bufq; +struct nqtimerhead nqtimerhead; +struct nqfhhashhead *nqfhhashtbl; +u_long nqfhhash; + +static void (*nfs_prev_lease_updatetime) __P((int)); +static int nfs_prev_nfssvc_sy_narg; +static sy_call_t *nfs_prev_nfssvc_sy_call; + +#ifndef NFS_NOSERVER + +static vop_t *nfs_prev_vop_lease_check; +static int nfs_prev_getfh_sy_narg; +static sy_call_t *nfs_prev_getfh_sy_call; + +/* + * Mapping of old NFS Version 2 RPC numbers to generic numbers. + */ +int nfsv3_procid[NFS_NPROCS] = { + NFSPROC_NULL, + NFSPROC_GETATTR, + NFSPROC_SETATTR, + NFSPROC_NOOP, + NFSPROC_LOOKUP, + NFSPROC_READLINK, + NFSPROC_READ, + NFSPROC_NOOP, + NFSPROC_WRITE, + NFSPROC_CREATE, + NFSPROC_REMOVE, + NFSPROC_RENAME, + NFSPROC_LINK, + NFSPROC_SYMLINK, + NFSPROC_MKDIR, + NFSPROC_RMDIR, + NFSPROC_READDIR, + NFSPROC_FSSTAT, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP, + NFSPROC_NOOP +}; + +#endif /* NFS_NOSERVER */ +/* + * and the reverse mapping from generic to Version 2 procedure numbers + */ +int nfsv2_procid[NFS_NPROCS] = { + NFSV2PROC_NULL, + NFSV2PROC_GETATTR, + NFSV2PROC_SETATTR, + NFSV2PROC_LOOKUP, + NFSV2PROC_NOOP, + NFSV2PROC_READLINK, + NFSV2PROC_READ, + NFSV2PROC_WRITE, + NFSV2PROC_CREATE, + NFSV2PROC_MKDIR, + NFSV2PROC_SYMLINK, + NFSV2PROC_CREATE, + NFSV2PROC_REMOVE, + NFSV2PROC_RMDIR, + NFSV2PROC_RENAME, + NFSV2PROC_LINK, + NFSV2PROC_READDIR, + NFSV2PROC_NOOP, + NFSV2PROC_STATFS, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, +}; + +#ifndef NFS_NOSERVER +/* + * Maps errno values to nfs error numbers. + * Use NFSERR_IO as the catch all for ones not specifically defined in + * RFC 1094. + */ +static u_char nfsrv_v2errmap[ELAST] = { + NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, + NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, + NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO /* << Last is 86 */ +}; + +/* + * Maps errno values to nfs error numbers. + * Although it is not obvious whether or not NFS clients really care if + * a returned error value is in the specified list for the procedure, the + * safest thing to do is filter them appropriately. For Version 2, the + * X/Open XNFS document is the only specification that defines error values + * for each RPC (The RFC simply lists all possible error values for all RPCs), + * so I have decided to not do this for Version 2. + * The first entry is the default error return and the rest are the valid + * errors for that RPC in increasing numeric order. + */ +static short nfsv3err_null[] = { + 0, + 0, +}; + +static short nfsv3err_getattr[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_setattr[] = { + NFSERR_IO, + NFSERR_PERM, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOT_SYNC, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_lookup[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_NAMETOL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_access[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_readlink[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_read[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_NXIO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_write[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_FBIG, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_create[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_mkdir[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_symlink[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_mknod[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_BADTYPE, + 0, +}; + +static short nfsv3err_remove[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_rmdir[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_INVAL, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_NOTEMPTY, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_rename[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_XDEV, + NFSERR_NOTDIR, + NFSERR_ISDIR, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_MLINK, + NFSERR_NAMETOL, + NFSERR_NOTEMPTY, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_link[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_XDEV, + NFSERR_NOTDIR, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_MLINK, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_readdir[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_BAD_COOKIE, + NFSERR_TOOSMALL, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_readdirplus[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_BAD_COOKIE, + NFSERR_NOTSUPP, + NFSERR_TOOSMALL, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_fsstat[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_fsinfo[] = { + NFSERR_STALE, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_pathconf[] = { + NFSERR_STALE, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short nfsv3err_commit[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + 0, +}; + +static short *nfsrv_v3errmap[] = { + nfsv3err_null, + nfsv3err_getattr, + nfsv3err_setattr, + nfsv3err_lookup, + nfsv3err_access, + nfsv3err_readlink, + nfsv3err_read, + nfsv3err_write, + nfsv3err_create, + nfsv3err_mkdir, + nfsv3err_symlink, + nfsv3err_mknod, + nfsv3err_remove, + nfsv3err_rmdir, + nfsv3err_rename, + nfsv3err_link, + nfsv3err_readdir, + nfsv3err_readdirplus, + nfsv3err_fsstat, + nfsv3err_fsinfo, + nfsv3err_pathconf, + nfsv3err_commit, +}; + +#endif /* NFS_NOSERVER */ + +extern struct nfsrtt nfsrtt; +extern time_t nqnfsstarttime; +extern int nqsrv_clockskew; +extern int nqsrv_writeslack; +extern int nqsrv_maxlease; +extern struct nfsstats nfsstats; +extern int nqnfs_piggy[NFS_NPROCS]; +extern nfstype nfsv2_type[9]; +extern nfstype nfsv3_type[9]; +extern struct nfsnodehashhead *nfsnodehashtbl; +extern u_long nfsnodehash; + +struct getfh_args; +extern int getfh(struct proc *, struct getfh_args *, int *); +struct nfssvc_args; +extern int nfssvc(struct proc *, struct nfssvc_args *, int *); + +LIST_HEAD(nfsnodehashhead, nfsnode); + +int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *)); + +u_quad_t +nfs_curusec() +{ + struct timeval tv; + + getmicrotime(&tv); + return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec); +} + +/* + * Create the header for an rpc request packet + * The hsiz is the size of the rest of the nfs request header. + * (just used to decide if a cluster is a good idea) + */ +struct mbuf * +nfsm_reqh(vp, procid, hsiz, bposp) + struct vnode *vp; + u_long procid; + int hsiz; + caddr_t *bposp; +{ + register struct mbuf *mb; + register u_int32_t *tl; + register caddr_t bpos; + struct mbuf *mb2; + struct nfsmount *nmp; + int nqflag; + + MGET(mb, M_WAIT, MT_DATA); + if (hsiz >= MINCLSIZE) + MCLGET(mb, M_WAIT); + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + + /* + * For NQNFS, add lease request. + */ + if (vp) { + nmp = VFSTONFS(vp->v_mount); + if (nmp->nm_flag & NFSMNT_NQNFS) { + nqflag = NQNFS_NEEDLEASE(vp, procid); + if (nqflag) { + nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nqflag); + *tl = txdr_unsigned(nmp->nm_leaseterm); + } else { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = 0; + } + } + } + /* Finally, return values */ + *bposp = bpos; + return (mb); +} + +/* + * Build the RPC header and fill in the authorization info. + * The authorization string argument is only used when the credentials + * come from outside of the kernel. + * Returns the head of the mbuf list. + */ +struct mbuf * +nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, + verf_str, mrest, mrest_len, mbp, xidp) + register struct ucred *cr; + int nmflag; + int procid; + int auth_type; + int auth_len; + char *auth_str; + int verf_len; + char *verf_str; + struct mbuf *mrest; + int mrest_len; + struct mbuf **mbp; + u_int32_t *xidp; +{ + register struct mbuf *mb; + register u_int32_t *tl; + register caddr_t bpos; + register int i; + struct mbuf *mreq, *mb2; + int siz, grpsiz, authsiz; + + authsiz = nfsm_rndup(auth_len); + MGETHDR(mb, M_WAIT, MT_DATA); + if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { + MCLGET(mb, M_WAIT); + } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { + MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); + } else { + MH_ALIGN(mb, 8 * NFSX_UNSIGNED); + } + mb->m_len = 0; + mreq = mb; + bpos = mtod(mb, caddr_t); + + /* + * First the RPC header. + */ + nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED); + + /* Get a pretty random xid to start with */ + if (!nfs_xid) + nfs_xid = random(); + /* + * Skip zero xid if it should ever happen. + */ + if (++nfs_xid == 0) + nfs_xid++; + + *tl++ = *xidp = txdr_unsigned(nfs_xid); + *tl++ = rpc_call; + *tl++ = rpc_vers; + if (nmflag & NFSMNT_NQNFS) { + *tl++ = txdr_unsigned(NQNFS_PROG); + *tl++ = txdr_unsigned(NQNFS_VER3); + } else { + *tl++ = txdr_unsigned(NFS_PROG); + if (nmflag & NFSMNT_NFSV3) + *tl++ = txdr_unsigned(NFS_VER3); + else + *tl++ = txdr_unsigned(NFS_VER2); + } + if (nmflag & NFSMNT_NFSV3) + *tl++ = txdr_unsigned(procid); + else + *tl++ = txdr_unsigned(nfsv2_procid[procid]); + + /* + * And then the authorization cred. + */ + *tl++ = txdr_unsigned(auth_type); + *tl = txdr_unsigned(authsiz); + switch (auth_type) { + case RPCAUTH_UNIX: + nfsm_build(tl, u_int32_t *, auth_len); + *tl++ = 0; /* stamp ?? */ + *tl++ = 0; /* NULL hostname */ + *tl++ = txdr_unsigned(cr->cr_uid); + *tl++ = txdr_unsigned(cr->cr_groups[0]); + grpsiz = (auth_len >> 2) - 5; + *tl++ = txdr_unsigned(grpsiz); + for (i = 1; i <= grpsiz; i++) + *tl++ = txdr_unsigned(cr->cr_groups[i]); + break; + case RPCAUTH_KERB4: + siz = auth_len; + while (siz > 0) { + if (M_TRAILINGSPACE(mb) == 0) { + MGET(mb2, M_WAIT, MT_DATA); + if (siz >= MINCLSIZE) + MCLGET(mb2, M_WAIT); + mb->m_next = mb2; + mb = mb2; + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + } + i = min(siz, M_TRAILINGSPACE(mb)); + bcopy(auth_str, bpos, i); + mb->m_len += i; + auth_str += i; + bpos += i; + siz -= i; + } + if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { + for (i = 0; i < siz; i++) + *bpos++ = '\0'; + mb->m_len += siz; + } + break; + }; + + /* + * And the verifier... + */ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if (verf_str) { + *tl++ = txdr_unsigned(RPCAUTH_KERB4); + *tl = txdr_unsigned(verf_len); + siz = verf_len; + while (siz > 0) { + if (M_TRAILINGSPACE(mb) == 0) { + MGET(mb2, M_WAIT, MT_DATA); + if (siz >= MINCLSIZE) + MCLGET(mb2, M_WAIT); + mb->m_next = mb2; + mb = mb2; + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + } + i = min(siz, M_TRAILINGSPACE(mb)); + bcopy(verf_str, bpos, i); + mb->m_len += i; + verf_str += i; + bpos += i; + siz -= i; + } + if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { + for (i = 0; i < siz; i++) + *bpos++ = '\0'; + mb->m_len += siz; + } + } else { + *tl++ = txdr_unsigned(RPCAUTH_NULL); + *tl = 0; + } + mb->m_next = mrest; + mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; + mreq->m_pkthdr.rcvif = (struct ifnet *)0; + *mbp = mb; + return (mreq); +} + +/* + * copies mbuf chain to the uio scatter/gather list + */ +int +nfsm_mbuftouio(mrep, uiop, siz, dpos) + struct mbuf **mrep; + register struct uio *uiop; + int siz; + caddr_t *dpos; +{ + register char *mbufcp, *uiocp; + register int xfer, left, len; + register struct mbuf *mp; + long uiosiz, rem; + int error = 0; + + mp = *mrep; + mbufcp = *dpos; + len = mtod(mp, caddr_t)+mp->m_len-mbufcp; + rem = nfsm_rndup(siz)-siz; + while (siz > 0) { + if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) + return (EFBIG); + left = uiop->uio_iov->iov_len; + uiocp = uiop->uio_iov->iov_base; + if (left > siz) + left = siz; + uiosiz = left; + while (left > 0) { + while (len == 0) { + mp = mp->m_next; + if (mp == NULL) + return (EBADRPC); + mbufcp = mtod(mp, caddr_t); + len = mp->m_len; + } + xfer = (left > len) ? len : left; +#ifdef notdef + /* Not Yet.. */ + if (uiop->uio_iov->iov_op != NULL) + (*(uiop->uio_iov->iov_op)) + (mbufcp, uiocp, xfer); + else +#endif + if (uiop->uio_segflg == UIO_SYSSPACE) + bcopy(mbufcp, uiocp, xfer); + else + copyout(mbufcp, uiocp, xfer); + left -= xfer; + len -= xfer; + mbufcp += xfer; + uiocp += xfer; + uiop->uio_offset += xfer; + uiop->uio_resid -= xfer; + } + if (uiop->uio_iov->iov_len <= siz) { + uiop->uio_iovcnt--; + uiop->uio_iov++; + } else { + uiop->uio_iov->iov_base += uiosiz; + uiop->uio_iov->iov_len -= uiosiz; + } + siz -= uiosiz; + } + *dpos = mbufcp; + *mrep = mp; + if (rem > 0) { + if (len < rem) + error = nfs_adv(mrep, dpos, rem, len); + else + *dpos += rem; + } + return (error); +} + +/* + * copies a uio scatter/gather list to an mbuf chain. + * NOTE: can ony handle iovcnt == 1 + */ +int +nfsm_uiotombuf(uiop, mq, siz, bpos) + register struct uio *uiop; + struct mbuf **mq; + int siz; + caddr_t *bpos; +{ + register char *uiocp; + register struct mbuf *mp, *mp2; + register int xfer, left, mlen; + int uiosiz, clflg, rem; + char *cp; + +#ifdef DIAGNOSTIC + if (uiop->uio_iovcnt != 1) + panic("nfsm_uiotombuf: iovcnt != 1"); +#endif + + if (siz > MLEN) /* or should it >= MCLBYTES ?? */ + clflg = 1; + else + clflg = 0; + rem = nfsm_rndup(siz)-siz; + mp = mp2 = *mq; + while (siz > 0) { + left = uiop->uio_iov->iov_len; + uiocp = uiop->uio_iov->iov_base; + if (left > siz) + left = siz; + uiosiz = left; + while (left > 0) { + mlen = M_TRAILINGSPACE(mp); + if (mlen == 0) { + MGET(mp, M_WAIT, MT_DATA); + if (clflg) + MCLGET(mp, M_WAIT); + mp->m_len = 0; + mp2->m_next = mp; + mp2 = mp; + mlen = M_TRAILINGSPACE(mp); + } + xfer = (left > mlen) ? mlen : left; +#ifdef notdef + /* Not Yet.. */ + if (uiop->uio_iov->iov_op != NULL) + (*(uiop->uio_iov->iov_op)) + (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + else +#endif + if (uiop->uio_segflg == UIO_SYSSPACE) + bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + else + copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + mp->m_len += xfer; + left -= xfer; + uiocp += xfer; + uiop->uio_offset += xfer; + uiop->uio_resid -= xfer; + } + uiop->uio_iov->iov_base += uiosiz; + uiop->uio_iov->iov_len -= uiosiz; + siz -= uiosiz; + } + if (rem > 0) { + if (rem > M_TRAILINGSPACE(mp)) { + MGET(mp, M_WAIT, MT_DATA); + mp->m_len = 0; + mp2->m_next = mp; + } + cp = mtod(mp, caddr_t)+mp->m_len; + for (left = 0; left < rem; left++) + *cp++ = '\0'; + mp->m_len += rem; + *bpos = cp; + } else + *bpos = mtod(mp, caddr_t)+mp->m_len; + *mq = mp; + return (0); +} + +/* + * Help break down an mbuf chain by setting the first siz bytes contiguous + * pointed to by returned val. + * This is used by the macros nfsm_dissect and nfsm_dissecton for tough + * cases. (The macros use the vars. dpos and dpos2) + */ +int +nfsm_disct(mdp, dposp, siz, left, cp2) + struct mbuf **mdp; + caddr_t *dposp; + int siz; + int left; + caddr_t *cp2; +{ + register struct mbuf *mp, *mp2; + register int siz2, xfer; + register caddr_t p; + + mp = *mdp; + while (left == 0) { + *mdp = mp = mp->m_next; + if (mp == NULL) + return (EBADRPC); + left = mp->m_len; + *dposp = mtod(mp, caddr_t); + } + if (left >= siz) { + *cp2 = *dposp; + *dposp += siz; + } else if (mp->m_next == NULL) { + return (EBADRPC); + } else if (siz > MHLEN) { + panic("nfs S too big"); + } else { + MGET(mp2, M_WAIT, MT_DATA); + mp2->m_next = mp->m_next; + mp->m_next = mp2; + mp->m_len -= left; + mp = mp2; + *cp2 = p = mtod(mp, caddr_t); + bcopy(*dposp, p, left); /* Copy what was left */ + siz2 = siz-left; + p += left; + mp2 = mp->m_next; + /* Loop around copying up the siz2 bytes */ + while (siz2 > 0) { + if (mp2 == NULL) + return (EBADRPC); + xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; + if (xfer > 0) { + bcopy(mtod(mp2, caddr_t), p, xfer); + NFSMADV(mp2, xfer); + mp2->m_len -= xfer; + p += xfer; + siz2 -= xfer; + } + if (siz2 > 0) + mp2 = mp2->m_next; + } + mp->m_len = siz; + *mdp = mp2; + *dposp = mtod(mp2, caddr_t); + } + return (0); +} + +/* + * Advance the position in the mbuf chain. + */ +int +nfs_adv(mdp, dposp, offs, left) + struct mbuf **mdp; + caddr_t *dposp; + int offs; + int left; +{ + register struct mbuf *m; + register int s; + + m = *mdp; + s = left; + while (s < offs) { + offs -= s; + m = m->m_next; + if (m == NULL) + return (EBADRPC); + s = m->m_len; + } + *mdp = m; + *dposp = mtod(m, caddr_t)+offs; + return (0); +} + +/* + * Copy a string into mbufs for the hard cases... + */ +int +nfsm_strtmbuf(mb, bpos, cp, siz) + struct mbuf **mb; + char **bpos; + const char *cp; + long siz; +{ + register struct mbuf *m1 = NULL, *m2; + long left, xfer, len, tlen; + u_int32_t *tl; + int putsize; + + putsize = 1; + m2 = *mb; + left = M_TRAILINGSPACE(m2); + if (left > 0) { + tl = ((u_int32_t *)(*bpos)); + *tl++ = txdr_unsigned(siz); + putsize = 0; + left -= NFSX_UNSIGNED; + m2->m_len += NFSX_UNSIGNED; + if (left > 0) { + bcopy(cp, (caddr_t) tl, left); + siz -= left; + cp += left; + m2->m_len += left; + left = 0; + } + } + /* Loop around adding mbufs */ + while (siz > 0) { + MGET(m1, M_WAIT, MT_DATA); + if (siz > MLEN) + MCLGET(m1, M_WAIT); + m1->m_len = NFSMSIZ(m1); + m2->m_next = m1; + m2 = m1; + tl = mtod(m1, u_int32_t *); + tlen = 0; + if (putsize) { + *tl++ = txdr_unsigned(siz); + m1->m_len -= NFSX_UNSIGNED; + tlen = NFSX_UNSIGNED; + putsize = 0; + } + if (siz < m1->m_len) { + len = nfsm_rndup(siz); + xfer = siz; + if (xfer < len) + *(tl+(xfer>>2)) = 0; + } else { + xfer = len = m1->m_len; + } + bcopy(cp, (caddr_t) tl, xfer); + m1->m_len = len+tlen; + siz -= xfer; + cp += xfer; + } + *mb = m1; + *bpos = mtod(m1, caddr_t)+m1->m_len; + return (0); +} + +/* + * Called once to initialize data structures... + */ +int +nfs_init(vfsp) + struct vfsconf *vfsp; +{ + register int i; + + nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1); + + /* + * Check to see if major data structures haven't bloated. + */ + if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) { + printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC); + printf("Try reducing NFS_UIDHASHSIZ\n"); + } + if (sizeof (struct nfsuid) > NFS_UIDALLOC) { + printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); + printf("Try unionizing the nu_nickname and nu_flag fields\n"); + } + nfs_mount_type = vfsp->vfc_typenum; + nfsrtt.pos = 0; + rpc_vers = txdr_unsigned(RPC_VER2); + rpc_call = txdr_unsigned(RPC_CALL); + rpc_reply = txdr_unsigned(RPC_REPLY); + rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); + rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); + rpc_mismatch = txdr_unsigned(RPC_MISMATCH); + rpc_autherr = txdr_unsigned(RPC_AUTHERR); + rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); + rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); + nfs_prog = txdr_unsigned(NFS_PROG); + nqnfs_prog = txdr_unsigned(NQNFS_PROG); + nfs_true = txdr_unsigned(TRUE); + nfs_false = txdr_unsigned(FALSE); + nfs_xdrneg1 = txdr_unsigned(-1); + nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; + if (nfs_ticks < 1) + nfs_ticks = 1; + /* Ensure async daemons disabled */ + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { + nfs_iodwant[i] = (struct proc *)0; + nfs_iodmount[i] = (struct nfsmount *)0; + } + nfs_nhinit(); /* Init the nfsnode table */ +#ifndef NFS_NOSERVER + nfsrv_init(0); /* Init server data structures */ + nfsrv_initcache(); /* Init the server request cache */ +#endif + + /* + * Initialize the nqnfs server stuff. + */ + if (nqnfsstarttime == 0) { + nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease + + nqsrv_clockskew + nqsrv_writeslack; + NQLOADNOVRAM(nqnfsstarttime); + CIRCLEQ_INIT(&nqtimerhead); + nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); + } + + /* + * Initialize reply list and start timer + */ + TAILQ_INIT(&nfs_reqq); + + nfs_timer(0); + + /* + * Set up lease_check and lease_updatetime so that other parts + * of the system can call us, if we are loadable. + */ +#ifndef NFS_NOSERVER + nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)]; + default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check; +#endif + nfs_prev_lease_updatetime = lease_updatetime; + lease_updatetime = nfs_lease_updatetime; + nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg; + sysent[SYS_nfssvc].sy_narg = 2; + nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call; + sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc; +#ifndef NFS_NOSERVER + nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg; + sysent[SYS_getfh].sy_narg = 2; + nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call; + sysent[SYS_getfh].sy_call = (sy_call_t *)getfh; +#endif + + nfs_pbuf_freecnt = nswbuf / 2 + 1; + + return (0); +} + +int +nfs_uninit(vfsp) + struct vfsconf *vfsp; +{ + + untimeout(nfs_timer, (void *)NULL, nfs_timer_handle); + nfs_mount_type = -1; +#ifndef NFS_NOSERVER + default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check; +#endif + lease_updatetime = nfs_prev_lease_updatetime; + sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg; + sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call; +#ifndef NFS_NOSERVER + sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg; + sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call; +#endif + return (0); +} + +/* + * Attribute cache routines. + * nfs_loadattrcache() - loads or updates the cache contents from attributes + * that are on the mbuf list + * nfs_getattrcache() - returns valid attributes if found in cache, returns + * error otherwise + */ + +/* + * Load the attribute cache (that lives in the nfsnode entry) with + * the values on the mbuf list and + * Iff vap not NULL + * copy the attributes to *vaper + */ +int +nfs_loadattrcache(vpp, mdp, dposp, vaper) + struct vnode **vpp; + struct mbuf **mdp; + caddr_t *dposp; + struct vattr *vaper; +{ + register struct vnode *vp = *vpp; + register struct vattr *vap; + register struct nfs_fattr *fp; + register struct nfsnode *np; + register int32_t t1; + caddr_t cp2; + int error = 0, rdev; + struct mbuf *md; + enum vtype vtyp; + u_short vmode; + struct timespec mtime; + int v3 = NFS_ISV3(vp); + + md = *mdp; + t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; + if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0) + return (error); + fp = (struct nfs_fattr *)cp2; + if (v3) { + vtyp = nfsv3tov_type(fp->fa_type); + vmode = fxdr_unsigned(u_short, fp->fa_mode); + rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), + fxdr_unsigned(int, fp->fa3_rdev.specdata2)); + fxdr_nfsv3time(&fp->fa3_mtime, &mtime); + } else { + vtyp = nfsv2tov_type(fp->fa_type); + vmode = fxdr_unsigned(u_short, fp->fa_mode); + /* + * XXX + * + * The duplicate information returned in fa_type and fa_mode + * is an ambiguity in the NFS version 2 protocol. + * + * VREG should be taken literally as a regular file. If a + * server intents to return some type information differently + * in the upper bits of the mode field (e.g. for sockets, or + * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we + * leave the examination of the mode bits even in the VREG + * case to avoid breakage for bogus servers, but we make sure + * that there are actually type bits set in the upper part of + * fa_mode (and failing that, trust the va_type field). + * + * NFSv3 cleared the issue, and requires fa_mode to not + * contain any type information (while also introduing sockets + * and FIFOs for fa_type). + */ + if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) + vtyp = IFTOVT(vmode); + rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); + fxdr_nfsv2time(&fp->fa2_mtime, &mtime); + + /* + * Really ugly NFSv2 kludge. + */ + if (vtyp == VCHR && rdev == 0xffffffff) + vtyp = VFIFO; + } + + /* + * If v_type == VNON it is a new node, so fill in the v_type, + * n_mtime fields. Check to see if it represents a special + * device, and if so, check for a possible alias. Once the + * correct vnode has been obtained, fill in the rest of the + * information. + */ + np = VTONFS(vp); + if (vp->v_type != vtyp) { + vp->v_type = vtyp; + if (vp->v_type == VFIFO) { + vp->v_op = fifo_nfsv2nodeop_p; + } + if (vp->v_type == VCHR || vp->v_type == VBLK) { + vp->v_op = spec_nfsv2nodeop_p; + addaliasu(vp, rdev); + } + np->n_mtime = mtime.tv_sec; + } + vap = &np->n_vattr; + vap->va_type = vtyp; + vap->va_mode = (vmode & 07777); + vap->va_rdev = rdev; + vap->va_mtime = mtime; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + if (v3) { + vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); + vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); + vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); + vap->va_size = fxdr_hyper(&fp->fa3_size); + vap->va_blocksize = NFS_FABLKSIZE; + vap->va_bytes = fxdr_hyper(&fp->fa3_used); + vap->va_fileid = fxdr_unsigned(int32_t, + fp->fa3_fileid.nfsuquad[1]); + fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); + fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); + vap->va_flags = 0; + vap->va_filerev = 0; + } else { + vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); + vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); + vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); + vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); + vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); + vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) + * NFS_FABLKSIZE; + vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); + fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); + vap->va_flags = 0; + vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t, + fp->fa2_ctime.nfsv2_sec); + vap->va_ctime.tv_nsec = 0; + vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); + vap->va_filerev = 0; + } + if (vap->va_size != np->n_size) { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else { + np->n_size = vap->va_size; + } + vnode_pager_setsize(vp, np->n_size); + } else { + np->n_size = vap->va_size; + } + } + np->n_attrstamp = time_second; + if (vaper != NULL) { + bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) + vaper->va_atime = np->n_atim; + if (np->n_flag & NUPD) + vaper->va_mtime = np->n_mtim; + } + } + return (0); +} + +#ifdef NFS_ACDEBUG +#include <sys/sysctl.h> +SYSCTL_DECL(_vfs_nfs); +static int nfs_acdebug; +SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, ""); +#endif + +/* + * Check the time stamp + * If the cache is valid, copy contents to *vap and return 0 + * otherwise return an error + */ +int +nfs_getattrcache(vp, vaper) + register struct vnode *vp; + struct vattr *vaper; +{ + register struct nfsnode *np; + register struct vattr *vap; + struct nfsmount *nmp; + int timeo; + + np = VTONFS(vp); + vap = &np->n_vattr; + nmp = VFSTONFS(vp->v_mount); + /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ + timeo = (time_second - np->n_mtime) / 10; + +#ifdef NFS_ACDEBUG + if (nfs_acdebug>1) + printf("nfs_getattrcache: initial timeo = %d\n", timeo); +#endif + + if (vap->va_type == VDIR) { + if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) + timeo = nmp->nm_acdirmin; + else if (timeo > nmp->nm_acdirmax) + timeo = nmp->nm_acdirmax; + } else { + if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) + timeo = nmp->nm_acregmin; + else if (timeo > nmp->nm_acregmax) + timeo = nmp->nm_acregmax; + } + +#ifdef NFS_ACDEBUG + if (nfs_acdebug > 2) + printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", + nmp->nm_acregmin, nmp->nm_acregmax, + nmp->nm_acdirmin, nmp->nm_acdirmax); + + if (nfs_acdebug) + printf("nfs_getattrcache: age = %d; final timeo = %d\n", + (time_second - np->n_attrstamp), timeo); +#endif + + if ((time_second - np->n_attrstamp) >= timeo) { + nfsstats.attrcache_misses++; + return (ENOENT); + } + nfsstats.attrcache_hits++; + if (vap->va_size != np->n_size) { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else { + np->n_size = vap->va_size; + } + vnode_pager_setsize(vp, np->n_size); + } else { + np->n_size = vap->va_size; + } + } + bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) + vaper->va_atime = np->n_atim; + if (np->n_flag & NUPD) + vaper->va_mtime = np->n_mtim; + } + return (0); +} + +#ifndef NFS_NOSERVER +/* + * Set up nameidata for a lookup() call and do it. + * + * If pubflag is set, this call is done for a lookup operation on the + * public filehandle. In that case we allow crossing mountpoints and + * absolute pathnames. However, the caller is expected to check that + * the lookup result is within the public fs, and deny access if + * it is not. + * + * nfs_namei() clears out garbage fields that namei() might leave garbage. + * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no + * error occurs but the parent was not requested. + * + * dirp may be set whether an error is returned or not, and must be + * released by the caller. + */ +int +nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag) + register struct nameidata *ndp; + fhandle_t *fhp; + int len; + struct nfssvc_sock *slp; + struct sockaddr *nam; + struct mbuf **mdp; + caddr_t *dposp; + struct vnode **retdirp; + struct proc *p; + int kerbflag, pubflag; +{ + register int i, rem; + register struct mbuf *md; + register char *fromcp, *tocp, *cp; + struct iovec aiov; + struct uio auio; + struct vnode *dp; + int error, rdonly, linklen; + struct componentname *cnp = &ndp->ni_cnd; + + *retdirp = (struct vnode *)0; + cnp->cn_pnbuf = zalloc(namei_zone); + + /* + * Copy the name from the mbuf list to ndp->ni_pnbuf + * and set the various ndp fields appropriately. + */ + fromcp = *dposp; + tocp = cnp->cn_pnbuf; + md = *mdp; + rem = mtod(md, caddr_t) + md->m_len - fromcp; + for (i = 0; i < len; i++) { + while (rem == 0) { + md = md->m_next; + if (md == NULL) { + error = EBADRPC; + goto out; + } + fromcp = mtod(md, caddr_t); + rem = md->m_len; + } + if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) { + error = EACCES; + goto out; + } + *tocp++ = *fromcp++; + rem--; + } + *tocp = '\0'; + *mdp = md; + *dposp = fromcp; + len = nfsm_rndup(len)-len; + if (len > 0) { + if (rem >= len) + *dposp += len; + else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0) + goto out; + } + + /* + * Extract and set starting directory. + */ + error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, + nam, &rdonly, kerbflag, pubflag); + if (error) + goto out; + if (dp->v_type != VDIR) { + vrele(dp); + error = ENOTDIR; + goto out; + } + + if (rdonly) + cnp->cn_flags |= RDONLY; + + /* + * Set return directory. Reference to dp is implicitly transfered + * to the returned pointer + */ + *retdirp = dp; + + if (pubflag) { + /* + * Oh joy. For WebNFS, handle those pesky '%' escapes, + * and the 'native path' indicator. + */ + cp = zalloc(namei_zone); + fromcp = cnp->cn_pnbuf; + tocp = cp; + if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) { + switch ((unsigned char)*fromcp) { + case WEBNFS_NATIVE_CHAR: + /* + * 'Native' path for us is the same + * as a path according to the NFS spec, + * just skip the escape char. + */ + fromcp++; + break; + /* + * More may be added in the future, range 0x80-0xff + */ + default: + error = EIO; + zfree(namei_zone, cp); + goto out; + } + } + /* + * Translate the '%' escapes, URL-style. + */ + while (*fromcp != '\0') { + if (*fromcp == WEBNFS_ESC_CHAR) { + if (fromcp[1] != '\0' && fromcp[2] != '\0') { + fromcp++; + *tocp++ = HEXSTRTOI(fromcp); + fromcp += 2; + continue; + } else { + error = ENOENT; + zfree(namei_zone, cp); + goto out; + } + } else + *tocp++ = *fromcp++; + } + *tocp = '\0'; + zfree(namei_zone, cnp->cn_pnbuf); + cnp->cn_pnbuf = cp; + } + + ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1; + ndp->ni_segflg = UIO_SYSSPACE; + + if (pubflag) { + ndp->ni_rootdir = rootvnode; + ndp->ni_loopcnt = 0; + if (cnp->cn_pnbuf[0] == '/') + dp = rootvnode; + } else { + cnp->cn_flags |= NOCROSSMOUNT; + } + + /* + * Initialize for scan, set ni_startdir and bump ref on dp again + * becuase lookup() will dereference ni_startdir. + */ + + cnp->cn_proc = p; + VREF(dp); + ndp->ni_startdir = dp; + + for (;;) { + cnp->cn_nameptr = cnp->cn_pnbuf; + /* + * Call lookup() to do the real work. If an error occurs, + * ndp->ni_vp and ni_dvp are left uninitialized or NULL and + * we do not have to dereference anything before returning. + * In either case ni_startdir will be dereferenced and NULLed + * out. + */ + error = lookup(ndp); + if (error) + break; + + /* + * Check for encountering a symbolic link. Trivial + * termination occurs if no symlink encountered. + * Note: zfree is safe because error is 0, so we will + * not zfree it again when we break. + */ + if ((cnp->cn_flags & ISSYMLINK) == 0) { + nfsrv_object_create(ndp->ni_vp); + if (cnp->cn_flags & (SAVENAME | SAVESTART)) + cnp->cn_flags |= HASBUF; + else + zfree(namei_zone, cnp->cn_pnbuf); + break; + } + + /* + * Validate symlink + */ + if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) + VOP_UNLOCK(ndp->ni_dvp, 0, p); + if (!pubflag) { + error = EINVAL; + goto badlink2; + } + + if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { + error = ELOOP; + goto badlink2; + } + if (ndp->ni_pathlen > 1) + cp = zalloc(namei_zone); + else + cp = cnp->cn_pnbuf; + aiov.iov_base = cp; + aiov.iov_len = MAXPATHLEN; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = (struct proc *)0; + auio.uio_resid = MAXPATHLEN; + error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); + if (error) { + badlink1: + if (ndp->ni_pathlen > 1) + zfree(namei_zone, cp); + badlink2: + vrele(ndp->ni_dvp); + vput(ndp->ni_vp); + break; + } + linklen = MAXPATHLEN - auio.uio_resid; + if (linklen == 0) { + error = ENOENT; + goto badlink1; + } + if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { + error = ENAMETOOLONG; + goto badlink1; + } + + /* + * Adjust or replace path + */ + if (ndp->ni_pathlen > 1) { + bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); + zfree(namei_zone, cnp->cn_pnbuf); + cnp->cn_pnbuf = cp; + } else + cnp->cn_pnbuf[linklen] = '\0'; + ndp->ni_pathlen += linklen; + + /* + * Cleanup refs for next loop and check if root directory + * should replace current directory. Normally ni_dvp + * becomes the new base directory and is cleaned up when + * we loop. Explicitly null pointers after invalidation + * to clarify operation. + */ + vput(ndp->ni_vp); + ndp->ni_vp = NULL; + + if (cnp->cn_pnbuf[0] == '/') { + vrele(ndp->ni_dvp); + ndp->ni_dvp = ndp->ni_rootdir; + VREF(ndp->ni_dvp); + } + ndp->ni_startdir = ndp->ni_dvp; + ndp->ni_dvp = NULL; + } + + /* + * nfs_namei() guarentees that fields will not contain garbage + * whether an error occurs or not. This allows the caller to track + * cleanup state trivially. + */ +out: + if (error) { + zfree(namei_zone, cnp->cn_pnbuf); + ndp->ni_vp = NULL; + ndp->ni_dvp = NULL; + ndp->ni_startdir = NULL; + cnp->cn_flags &= ~HASBUF; + } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { + ndp->ni_dvp = NULL; + } + return (error); +} + +/* + * A fiddled version of m_adj() that ensures null fill to a long + * boundary and only trims off the back end + */ +void +nfsm_adj(mp, len, nul) + struct mbuf *mp; + register int len; + int nul; +{ + register struct mbuf *m; + register int count, i; + register char *cp; + + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + count = 0; + m = mp; + for (;;) { + count += m->m_len; + if (m->m_next == (struct mbuf *)0) + break; + m = m->m_next; + } + if (m->m_len > len) { + m->m_len -= len; + if (nul > 0) { + cp = mtod(m, caddr_t)+m->m_len-nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + for (m = mp; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + if (nul > 0) { + cp = mtod(m, caddr_t)+m->m_len-nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + break; + } + count -= m->m_len; + } + for (m = m->m_next;m;m = m->m_next) + m->m_len = 0; +} + +/* + * Make these functions instead of macros, so that the kernel text size + * doesn't get too big... + */ +void +nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) + struct nfsrv_descript *nfsd; + int before_ret; + register struct vattr *before_vap; + int after_ret; + struct vattr *after_vap; + struct mbuf **mbp; + char **bposp; +{ + register struct mbuf *mb = *mbp, *mb2; + register char *bpos = *bposp; + register u_int32_t *tl; + + if (before_ret) { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = nfs_false; + } else { + nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED); + *tl++ = nfs_true; + txdr_hyper(before_vap->va_size, tl); + tl += 2; + txdr_nfsv3time(&(before_vap->va_mtime), tl); + tl += 2; + txdr_nfsv3time(&(before_vap->va_ctime), tl); + } + *bposp = bpos; + *mbp = mb; + nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); +} + +void +nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) + struct nfsrv_descript *nfsd; + int after_ret; + struct vattr *after_vap; + struct mbuf **mbp; + char **bposp; +{ + register struct mbuf *mb = *mbp, *mb2; + register char *bpos = *bposp; + register u_int32_t *tl; + register struct nfs_fattr *fp; + + if (after_ret) { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = nfs_false; + } else { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR); + *tl++ = nfs_true; + fp = (struct nfs_fattr *)tl; + nfsm_srvfattr(nfsd, after_vap, fp); + } + *mbp = mb; + *bposp = bpos; +} + +void +nfsm_srvfattr(nfsd, vap, fp) + register struct nfsrv_descript *nfsd; + register struct vattr *vap; + register struct nfs_fattr *fp; +{ + + fp->fa_nlink = txdr_unsigned(vap->va_nlink); + fp->fa_uid = txdr_unsigned(vap->va_uid); + fp->fa_gid = txdr_unsigned(vap->va_gid); + if (nfsd->nd_flag & ND_NFSV3) { + fp->fa_type = vtonfsv3_type(vap->va_type); + fp->fa_mode = vtonfsv3_mode(vap->va_mode); + txdr_hyper(vap->va_size, &fp->fa3_size); + txdr_hyper(vap->va_bytes, &fp->fa3_used); + fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev)); + fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev)); + fp->fa3_fsid.nfsuquad[0] = 0; + fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); + fp->fa3_fileid.nfsuquad[0] = 0; + fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); + txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); + txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); + txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); + } else { + fp->fa_type = vtonfsv2_type(vap->va_type); + fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); + fp->fa2_size = txdr_unsigned(vap->va_size); + fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); + if (vap->va_type == VFIFO) + fp->fa2_rdev = 0xffffffff; + else + fp->fa2_rdev = txdr_unsigned(vap->va_rdev); + fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); + fp->fa2_fsid = txdr_unsigned(vap->va_fsid); + fp->fa2_fileid = txdr_unsigned(vap->va_fileid); + txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); + txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); + txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); + } +} + +/* + * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) + * - look up fsid in mount list (if not found ret error) + * - get vp and export rights by calling VFS_FHTOVP() + * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon + * - if not lockflag unlock it with VOP_UNLOCK() + */ +int +nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag) + fhandle_t *fhp; + int lockflag; + struct vnode **vpp; + struct ucred *cred; + struct nfssvc_sock *slp; + struct sockaddr *nam; + int *rdonlyp; + int kerbflag; + int pubflag; +{ + struct proc *p = curproc; /* XXX */ + register struct mount *mp; + register int i; + struct ucred *credanon; + int error, exflags; +#ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */ + struct sockaddr_int *saddr; +#endif + + *vpp = (struct vnode *)0; + + if (nfs_ispublicfh(fhp)) { + if (!pubflag || !nfs_pub.np_valid) + return (ESTALE); + fhp = &nfs_pub.np_handle; + } + + mp = vfs_getvfs(&fhp->fh_fsid); + if (!mp) + return (ESTALE); + error = VFS_CHECKEXP(mp, nam, &exflags, &credanon); + if (error) + return (error); + error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp); + if (error) + return (error); +#ifdef MNT_EXNORESPORT + if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) { + saddr = (struct sockaddr_in *)nam; + if (saddr->sin_family == AF_INET && + ntohs(saddr->sin_port) >= IPPORT_RESERVED) { + vput(*vpp); + *vpp = NULL; + return (NFSERR_AUTHERR | AUTH_TOOWEAK); + } + } +#endif + /* + * Check/setup credentials. + */ + if (exflags & MNT_EXKERB) { + if (!kerbflag) { + vput(*vpp); + *vpp = NULL; + return (NFSERR_AUTHERR | AUTH_TOOWEAK); + } + } else if (kerbflag) { + vput(*vpp); + *vpp = NULL; + return (NFSERR_AUTHERR | AUTH_TOOWEAK); + } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { + cred->cr_uid = credanon->cr_uid; + for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) + cred->cr_groups[i] = credanon->cr_groups[i]; + cred->cr_ngroups = i; + } + if (exflags & MNT_EXRDONLY) + *rdonlyp = 1; + else + *rdonlyp = 0; + + nfsrv_object_create(*vpp); + + if (!lockflag) + VOP_UNLOCK(*vpp, 0, p); + return (0); +} + + +/* + * WebNFS: check if a filehandle is a public filehandle. For v3, this + * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has + * transformed this to all zeroes in both cases, so check for it. + */ +int +nfs_ispublicfh(fhp) + fhandle_t *fhp; +{ + char *cp = (char *)fhp; + int i; + + for (i = 0; i < NFSX_V3FH; i++) + if (*cp++ != 0) + return (FALSE); + return (TRUE); +} + +#endif /* NFS_NOSERVER */ +/* + * This function compares two net addresses by family and returns TRUE + * if they are the same host. + * If there is any doubt, return FALSE. + * The AF_INET family is handled as a special case so that address mbufs + * don't need to be saved to store "struct in_addr", which is only 4 bytes. + */ +int +netaddr_match(family, haddr, nam) + int family; + union nethostaddr *haddr; + struct sockaddr *nam; +{ + register struct sockaddr_in *inetaddr; + + switch (family) { + case AF_INET: + inetaddr = (struct sockaddr_in *)nam; + if (inetaddr->sin_family == AF_INET && + inetaddr->sin_addr.s_addr == haddr->had_inetaddr) + return (1); + break; +#ifdef ISO + case AF_ISO: + { + register struct sockaddr_iso *isoaddr1, *isoaddr2; + + isoaddr1 = (struct sockaddr_iso *)nam; + isoaddr2 = (struct sockaddr_iso *)haddr->had_nam; + if (isoaddr1->siso_family == AF_ISO && + isoaddr1->siso_nlen > 0 && + isoaddr1->siso_nlen == isoaddr2->siso_nlen && + SAME_ISOADDR(isoaddr1, isoaddr2)) + return (1); + break; + } +#endif /* ISO */ + default: + break; + }; + return (0); +} + +static nfsuint64 nfs_nullcookie = { { 0, 0 } }; +/* + * This function finds the directory cookie that corresponds to the + * logical byte offset given. + */ +nfsuint64 * +nfs_getcookie(np, off, add) + register struct nfsnode *np; + off_t off; + int add; +{ + register struct nfsdmap *dp, *dp2; + register int pos; + + pos = (uoff_t)off / NFS_DIRBLKSIZ; + if (pos == 0 || off < 0) { +#ifdef DIAGNOSTIC + if (add) + panic("nfs getcookie add at <= 0"); +#endif + return (&nfs_nullcookie); + } + pos--; + dp = np->n_cookies.lh_first; + if (!dp) { + if (add) { + MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + dp->ndm_eocookie = 0; + LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); + } else + return ((nfsuint64 *)0); + } + while (pos >= NFSNUMCOOKIES) { + pos -= NFSNUMCOOKIES; + if (dp->ndm_list.le_next) { + if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && + pos >= dp->ndm_eocookie) + return ((nfsuint64 *)0); + dp = dp->ndm_list.le_next; + } else if (add) { + MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + dp2->ndm_eocookie = 0; + LIST_INSERT_AFTER(dp, dp2, ndm_list); + dp = dp2; + } else + return ((nfsuint64 *)0); + } + if (pos >= dp->ndm_eocookie) { + if (add) + dp->ndm_eocookie = pos + 1; + else + return ((nfsuint64 *)0); + } + return (&dp->ndm_cookies[pos]); +} + +/* + * Invalidate cached directory information, except for the actual directory + * blocks (which are invalidated separately). + * Done mainly to avoid the use of stale offset cookies. + */ +void +nfs_invaldir(vp) + register struct vnode *vp; +{ + register struct nfsnode *np = VTONFS(vp); + +#ifdef DIAGNOSTIC + if (vp->v_type != VDIR) + panic("nfs: invaldir not dir"); +#endif + np->n_direofoffset = 0; + np->n_cookieverf.nfsuquad[0] = 0; + np->n_cookieverf.nfsuquad[1] = 0; + if (np->n_cookies.lh_first) + np->n_cookies.lh_first->ndm_eocookie = 0; +} + +/* + * The write verifier has changed (probably due to a server reboot), so all + * B_NEEDCOMMIT blocks will have to be written again. Since they are on the + * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT + * and B_CLUSTEROK flags. Once done the new write verifier can be set for the + * mount point. + * + * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data + * writes are not clusterable. + */ +void +nfs_clearcommit(mp) + struct mount *mp; +{ + register struct vnode *vp, *nvp; + register struct buf *bp, *nbp; + int s; + + s = splbio(); +loop: + for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { + if (vp->v_mount != mp) /* Paranoia */ + goto loop; + nvp = vp->v_mntvnodes.le_next; + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if (BUF_REFCNT(bp) == 0 && + (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) + == (B_DELWRI | B_NEEDCOMMIT)) + bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + } + } + splx(s); +} + +#ifndef NFS_NOSERVER +/* + * Map errnos to NFS error numbers. For Version 3 also filter out error + * numbers not specified for the associated procedure. + */ +int +nfsrv_errmap(nd, err) + struct nfsrv_descript *nd; + register int err; +{ + register short *defaulterrp, *errp; + + if (nd->nd_flag & ND_NFSV3) { + if (nd->nd_procnum <= NFSPROC_COMMIT) { + errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; + while (*++errp) { + if (*errp == err) + return (err); + else if (*errp > err) + break; + } + return ((int)*defaulterrp); + } else + return (err & 0xffff); + } + if (err <= ELAST) + return ((int)nfsrv_v2errmap[err - 1]); + return (NFSERR_IO); +} + +int +nfsrv_object_create(vp) + struct vnode *vp; +{ + + if (vp == NULL || vp->v_type != VREG) + return (1); + return (vfs_object_create(vp, curproc, + curproc ? curproc->p_ucred : NULL)); +} + +/* + * Sort the group list in increasing numerical order. + * (Insertion sort by Chris Torek, who was grossed out by the bubble sort + * that used to be here.) + */ +void +nfsrvw_sort(list, num) + register gid_t *list; + register int num; +{ + register int i, j; + gid_t v; + + /* Insertion sort. */ + for (i = 1; i < num; i++) { + v = list[i]; + /* find correct slot for value v, moving others up */ + for (j = i; --j >= 0 && v < list[j];) + list[j + 1] = list[j]; + list[j + 1] = v; + } +} + +/* + * copy credentials making sure that the result can be compared with bcmp(). + */ +void +nfsrv_setcred(incred, outcred) + register struct ucred *incred, *outcred; +{ + register int i; + + bzero((caddr_t)outcred, sizeof (struct ucred)); + outcred->cr_ref = 1; + outcred->cr_uid = incred->cr_uid; + outcred->cr_ngroups = incred->cr_ngroups; + for (i = 0; i < incred->cr_ngroups; i++) + outcred->cr_groups[i] = incred->cr_groups[i]; + nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups); +} +#endif /* NFS_NOSERVER */ diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c new file mode 100644 index 0000000..da18842 --- /dev/null +++ b/sys/nfs/nfs_syscalls.c @@ -0,0 +1,1199 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysproto.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/buf.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/namei.h> +#include <vm/vm_zone.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> +#ifdef ISO +#include <netiso/iso.h> +#endif +#include <nfs/xdr_subs.h> +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsrvcache.h> +#include <nfs/nfsmount.h> +#include <nfs/nfsnode.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsrtt.h> + +static MALLOC_DEFINE(M_NFSSVC, "NFS srvsock", "Nfs server structure"); + +/* Global defs. */ +extern int32_t (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, + struct nfssvc_sock *slp, + struct proc *procp, + struct mbuf **mreqp)); +extern int nfs_numasync; +extern time_t nqnfsstarttime; +extern int nqsrv_writeslack; +extern int nfsrtton; +extern struct nfsstats nfsstats; +extern int nfsrvw_procrastinate; +extern int nfsrvw_procrastinate_v3; +static int nuidhash_max = NFS_MAXUIDHASH; + +#ifndef NFS_NOSERVER +static void nfsrv_zapsock __P((struct nfssvc_sock *slp)); +#endif +static int nfssvc_iod __P((struct proc *)); + +#define TRUE 1 +#define FALSE 0 + +static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; + +SYSCTL_DECL(_vfs_nfs); + +#ifndef NFS_NOSERVER +int nfsd_waiting = 0; +static struct nfsdrt nfsdrt; +static int nfs_numnfsd = 0; +static int notstarted = 1; +static int modify_flag = 0; +static void nfsd_rt __P((int sotype, struct nfsrv_descript *nd, + int cacherep)); +static int nfssvc_addsock __P((struct file *, struct sockaddr *, + struct proc *)); +static int nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *)); + +static int nfs_privport = 0; +SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, ""); +SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, ""); +SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, ""); + +/* + * NFS server system calls + */ + +#endif /* NFS_NOSERVER */ +/* + * Nfs server psuedo system call for the nfsd's + * Based on the flag value it either: + * - adds a socket to the selection list + * - remains in the kernel as an nfsd + * - remains in the kernel as an nfsiod + */ +#ifndef _SYS_SYSPROTO_H_ +struct nfssvc_args { + int flag; + caddr_t argp; +}; +#endif +int +nfssvc(p, uap) + struct proc *p; + register struct nfssvc_args *uap; +{ +#ifndef NFS_NOSERVER + struct nameidata nd; + struct file *fp; + struct sockaddr *nam; + struct nfsd_args nfsdarg; + struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs; + struct nfsd_cargs ncd; + struct nfsd *nfsd; + struct nfssvc_sock *slp; + struct nfsuid *nuidp; + struct nfsmount *nmp; +#endif /* NFS_NOSERVER */ + int error; + + /* + * Must be super user + */ + error = suser(p); + if(error) + return (error); + while (nfssvc_sockhead_flag & SLP_INIT) { + nfssvc_sockhead_flag |= SLP_WANTINIT; + (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0); + } + if (uap->flag & NFSSVC_BIOD) + error = nfssvc_iod(p); +#ifdef NFS_NOSERVER + else + error = ENXIO; +#else /* !NFS_NOSERVER */ + else if (uap->flag & NFSSVC_MNTD) { + error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)); + if (error) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + ncd.ncd_dirp, p); + error = namei(&nd); + if (error) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + if ((nd.ni_vp->v_flag & VROOT) == 0) + error = EINVAL; + nmp = VFSTONFS(nd.ni_vp->v_mount); + vput(nd.ni_vp); + if (error) + return (error); + if ((nmp->nm_state & NFSSTA_MNTD) && + (uap->flag & NFSSVC_GOTAUTH) == 0) + return (0); + nmp->nm_state |= NFSSTA_MNTD; + error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag, + uap->argp, p); + } else if (uap->flag & NFSSVC_ADDSOCK) { + error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); + if (error) + return (error); + error = getsock(p->p_fd, nfsdarg.sock, &fp); + if (error) + return (error); + /* + * Get the client address for connected sockets. + */ + if (nfsdarg.name == NULL || nfsdarg.namelen == 0) + nam = (struct sockaddr *)0; + else { + error = getsockaddr(&nam, nfsdarg.name, + nfsdarg.namelen); + if (error) + return (error); + } + error = nfssvc_addsock(fp, nam, p); + } else { + error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)); + if (error) + return (error); + if ((uap->flag & NFSSVC_AUTHIN) && + ((nfsd = nsd->nsd_nfsd)) != NULL && + (nfsd->nfsd_slp->ns_flag & SLP_VALID)) { + slp = nfsd->nfsd_slp; + + /* + * First check to see if another nfsd has already + * added this credential. + */ + for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first; + nuidp != 0; nuidp = nuidp->nu_hash.le_next) { + if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid && + (!nfsd->nfsd_nd->nd_nam2 || + netaddr_match(NU_NETFAM(nuidp), + &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2))) + break; + } + if (nuidp) { + nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr); + nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; + } else { + /* + * Nope, so we will. + */ + if (slp->ns_numuids < nuidhash_max) { + slp->ns_numuids++; + nuidp = (struct nfsuid *) + malloc(sizeof (struct nfsuid), M_NFSUID, + M_WAITOK); + } else + nuidp = (struct nfsuid *)0; + if ((slp->ns_flag & SLP_VALID) == 0) { + if (nuidp) + free((caddr_t)nuidp, M_NFSUID); + } else { + if (nuidp == (struct nfsuid *)0) { + nuidp = slp->ns_uidlruhead.tqh_first; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, + nu_lru); + if (nuidp->nu_flag & NU_NAM) + FREE(nuidp->nu_nam, M_SONAME); + } + nuidp->nu_flag = 0; + nuidp->nu_cr = nsd->nsd_cr; + if (nuidp->nu_cr.cr_ngroups > NGROUPS) + nuidp->nu_cr.cr_ngroups = NGROUPS; + nuidp->nu_cr.cr_ref = 1; + nuidp->nu_timestamp = nsd->nsd_timestamp; + nuidp->nu_expire = time_second + nsd->nsd_ttl; + /* + * and save the session key in nu_key. + */ + bcopy(nsd->nsd_key, nuidp->nu_key, + sizeof (nsd->nsd_key)); + if (nfsd->nfsd_nd->nd_nam2) { + struct sockaddr_in *saddr; + + saddr = (struct sockaddr_in *) + nfsd->nfsd_nd->nd_nam2; + switch (saddr->sin_family) { + case AF_INET: + nuidp->nu_flag |= NU_INETADDR; + nuidp->nu_inetaddr = + saddr->sin_addr.s_addr; + break; + case AF_ISO: + default: + nuidp->nu_flag |= NU_NAM; + nuidp->nu_nam = + dup_sockaddr(nfsd->nfsd_nd-> + nd_nam2, 1); + break; + }; + } + TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp, + nu_lru); + LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid), + nuidp, nu_hash); + nfsrv_setcred(&nuidp->nu_cr, + &nfsd->nfsd_nd->nd_cr); + nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; + } + } + } + if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd)) + nfsd->nfsd_flag |= NFSD_AUTHFAIL; + error = nfssvc_nfsd(nsd, uap->argp, p); + } +#endif /* NFS_NOSERVER */ + if (error == EINTR || error == ERESTART) + error = 0; + return (error); +} + +#ifndef NFS_NOSERVER +/* + * Adds a socket to the list for servicing by nfsds. + */ +static int +nfssvc_addsock(fp, mynam, p) + struct file *fp; + struct sockaddr *mynam; + struct proc *p; +{ + register int siz; + register struct nfssvc_sock *slp; + register struct socket *so; + int error, s; + + so = (struct socket *)fp->f_data; +#if 0 + tslp = (struct nfssvc_sock *)0; + /* + * Add it to the list, as required. + */ + if (so->so_proto->pr_protocol == IPPROTO_UDP) { + tslp = nfs_udpsock; + if (tslp->ns_flag & SLP_VALID) { + if (mynam != NULL) + FREE(mynam, M_SONAME); + return (EPERM); + } +#ifdef ISO + } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) { + tslp = nfs_cltpsock; + if (tslp->ns_flag & SLP_VALID) { + if (mynam != NULL) + FREE(mynam, M_SONAME); + return (EPERM); + } +#endif /* ISO */ + } +#endif + if (so->so_type == SOCK_STREAM) + siz = NFS_MAXPACKET + sizeof (u_long); + else + siz = NFS_MAXPACKET; + error = soreserve(so, siz, siz); + if (error) { + if (mynam != NULL) + FREE(mynam, M_SONAME); + return (error); + } + + /* + * Set protocol specific options { for now TCP only } and + * reserve some space. For datagram sockets, this can get called + * repeatedly for the same socket, but that isn't harmful. + */ + if (so->so_type == SOCK_STREAM) { + struct sockopt sopt; + int val; + + bzero(&sopt, sizeof sopt); + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_KEEPALIVE; + sopt.sopt_val = &val; + sopt.sopt_valsize = sizeof val; + val = 1; + sosetopt(so, &sopt); + } + if (so->so_proto->pr_domain->dom_family == AF_INET && + so->so_proto->pr_protocol == IPPROTO_TCP) { + struct sockopt sopt; + int val; + + bzero(&sopt, sizeof sopt); + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = &val; + sopt.sopt_valsize = sizeof val; + val = 1; + sosetopt(so, &sopt); + } + so->so_rcv.sb_flags &= ~SB_NOINTR; + so->so_rcv.sb_timeo = 0; + so->so_snd.sb_flags &= ~SB_NOINTR; + so->so_snd.sb_timeo = 0; + + slp = (struct nfssvc_sock *) + malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK); + bzero((caddr_t)slp, sizeof (struct nfssvc_sock)); + STAILQ_INIT(&slp->ns_rec); + TAILQ_INIT(&slp->ns_uidlruhead); + TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain); + + slp->ns_so = so; + slp->ns_nam = mynam; + fp->f_count++; + slp->ns_fp = fp; + s = splnet(); + so->so_upcallarg = (caddr_t)slp; + so->so_upcall = nfsrv_rcv; + so->so_rcv.sb_flags |= SB_UPCALL; + slp->ns_flag = (SLP_VALID | SLP_NEEDQ); + nfsrv_wakenfsd(slp); + splx(s); + return (0); +} + +/* + * Called by nfssvc() for nfsds. Just loops around servicing rpc requests + * until it is killed by a signal. + */ +static int +nfssvc_nfsd(nsd, argp, p) + struct nfsd_srvargs *nsd; + caddr_t argp; + struct proc *p; +{ + register int siz; + register struct nfssvc_sock *slp; + struct nfsd *nfsd = nsd->nsd_nfsd; + struct nfsrv_descript *nd = NULL; + struct mbuf *m, *mreq; + int error = 0, cacherep, s, sotype, writes_todo; + int procrastinate; + u_quad_t cur_usec; + +#ifndef nolint + cacherep = RC_DOIT; + writes_todo = 0; +#endif + if (nfsd == (struct nfsd *)0) { + nsd->nsd_nfsd = nfsd = (struct nfsd *) + malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK); + bzero((caddr_t)nfsd, sizeof (struct nfsd)); + s = splnet(); + nfsd->nfsd_procp = p; + TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); + nfs_numnfsd++; + } else + s = splnet(); + + /* + * Loop getting rpc requests until SIGKILL. + */ + for (;;) { + if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) { + while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && + (nfsd_head_flag & NFSD_CHECKSLP) == 0) { + nfsd->nfsd_flag |= NFSD_WAITING; + nfsd_waiting++; + error = tsleep((caddr_t)nfsd, PSOCK | PCATCH, + "nfsd", 0); + nfsd_waiting--; + if (error) + goto done; + } + if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && + (nfsd_head_flag & NFSD_CHECKSLP) != 0) { + for (slp = nfssvc_sockhead.tqh_first; slp != 0; + slp = slp->ns_chain.tqe_next) { + if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) + == (SLP_VALID | SLP_DOREC)) { + slp->ns_flag &= ~SLP_DOREC; + slp->ns_sref++; + nfsd->nfsd_slp = slp; + break; + } + } + if (slp == 0) + nfsd_head_flag &= ~NFSD_CHECKSLP; + } + if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0) + continue; + if (slp->ns_flag & SLP_VALID) { + if (slp->ns_flag & SLP_DISCONN) + nfsrv_zapsock(slp); + else if (slp->ns_flag & SLP_NEEDQ) { + slp->ns_flag &= ~SLP_NEEDQ; + (void) nfs_slplock(slp, 1); + nfsrv_rcv(slp->ns_so, (caddr_t)slp, + M_WAIT); + nfs_slpunlock(slp); + } + error = nfsrv_dorec(slp, nfsd, &nd); + cur_usec = nfs_curusec(); + if (error && slp->ns_tq.lh_first && + slp->ns_tq.lh_first->nd_time <= cur_usec) { + error = 0; + cacherep = RC_DOIT; + writes_todo = 1; + } else + writes_todo = 0; + nfsd->nfsd_flag |= NFSD_REQINPROG; + } + } else { + error = 0; + slp = nfsd->nfsd_slp; + } + if (error || (slp->ns_flag & SLP_VALID) == 0) { + if (nd) { + free((caddr_t)nd, M_NFSRVDESC); + nd = NULL; + } + nfsd->nfsd_slp = (struct nfssvc_sock *)0; + nfsd->nfsd_flag &= ~NFSD_REQINPROG; + nfsrv_slpderef(slp); + continue; + } + splx(s); + sotype = slp->ns_so->so_type; + if (nd) { + getmicrotime(&nd->nd_starttime); + if (nd->nd_nam2) + nd->nd_nam = nd->nd_nam2; + else + nd->nd_nam = slp->ns_nam; + + /* + * Check to see if authorization is needed. + */ + if (nfsd->nfsd_flag & NFSD_NEEDAUTH) { + nfsd->nfsd_flag &= ~NFSD_NEEDAUTH; + nsd->nsd_haddr = + ((struct sockaddr_in *) + nd->nd_nam)->sin_addr.s_addr; + nsd->nsd_authlen = nfsd->nfsd_authlen; + nsd->nsd_verflen = nfsd->nfsd_verflen; + if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr, + nfsd->nfsd_authlen) && + !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr, + nfsd->nfsd_verflen) && + !copyout((caddr_t)nsd, argp, sizeof (*nsd))) + return (ENEEDAUTH); + cacherep = RC_DROPIT; + } else + cacherep = nfsrv_getcache(nd, slp, &mreq); + + /* + * Check for just starting up for NQNFS and send + * fake "try again later" replies to the NQNFS clients. + */ + if (notstarted && nqnfsstarttime <= time_second) { + if (modify_flag) { + nqnfsstarttime = time_second + nqsrv_writeslack; + modify_flag = 0; + } else + notstarted = 0; + } + if (notstarted) { + if ((nd->nd_flag & ND_NQNFS) == 0) + cacherep = RC_DROPIT; + else if (nd->nd_procnum != NFSPROC_WRITE) { + nd->nd_procnum = NFSPROC_NOOP; + nd->nd_repstat = NQNFS_TRYLATER; + cacherep = RC_DOIT; + } else + modify_flag = 1; + } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) { + nfsd->nfsd_flag &= ~NFSD_AUTHFAIL; + nd->nd_procnum = NFSPROC_NOOP; + nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); + cacherep = RC_DOIT; + } else if (nfs_privport) { + /* Check if source port is privileged */ + u_short port; + struct sockaddr *nam = nd->nd_nam; + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)nam; + port = ntohs(sin->sin_port); + if (port >= IPPORT_RESERVED && + nd->nd_procnum != NFSPROC_NULL) { + nd->nd_procnum = NFSPROC_NOOP; + nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); + cacherep = RC_DOIT; + printf("NFS request from unprivileged port (%s:%d)\n", + inet_ntoa(sin->sin_addr), port); + } + } + + } + + /* + * Loop to get all the write rpc relies that have been + * gathered together. + */ + do { + switch (cacherep) { + case RC_DOIT: + if (nd && (nd->nd_flag & ND_NFSV3)) + procrastinate = nfsrvw_procrastinate_v3; + else + procrastinate = nfsrvw_procrastinate; + if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE && + procrastinate > 0 && !notstarted)) + error = nfsrv_writegather(&nd, slp, + nfsd->nfsd_procp, &mreq); + else + error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, + slp, nfsd->nfsd_procp, &mreq); + if (mreq == NULL) + break; + if (error != 0 && error != NFSERR_RETVOID) { + if (nd->nd_procnum != NQNFSPROC_VACATED) + nfsstats.srv_errs++; + nfsrv_updatecache(nd, FALSE, mreq); + if (nd->nd_nam2) + FREE(nd->nd_nam2, M_SONAME); + break; + } + nfsstats.srvrpccnt[nd->nd_procnum]++; + nfsrv_updatecache(nd, TRUE, mreq); + nd->nd_mrep = (struct mbuf *)0; + case RC_REPLY: + m = mreq; + siz = 0; + while (m) { + siz += m->m_len; + m = m->m_next; + } + if (siz <= 0 || siz > NFS_MAXPACKET) { + printf("mbuf siz=%d\n",siz); + panic("Bad nfs svc reply"); + } + m = mreq; + m->m_pkthdr.len = siz; + m->m_pkthdr.rcvif = (struct ifnet *)0; + /* + * For stream protocols, prepend a Sun RPC + * Record Mark. + */ + if (sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_int32_t *) = htonl(0x80000000 | siz); + } + if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED) + (void) nfs_slplock(slp, 1); + if (slp->ns_flag & SLP_VALID) + error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL); + else { + error = EPIPE; + m_freem(m); + } + if (nfsrtton) + nfsd_rt(sotype, nd, cacherep); + if (nd->nd_nam2) + FREE(nd->nd_nam2, M_SONAME); + if (nd->nd_mrep) + m_freem(nd->nd_mrep); + if (error == EPIPE) + nfsrv_zapsock(slp); + if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED) + nfs_slpunlock(slp); + if (error == EINTR || error == ERESTART) { + free((caddr_t)nd, M_NFSRVDESC); + nfsrv_slpderef(slp); + s = splnet(); + goto done; + } + break; + case RC_DROPIT: + if (nfsrtton) + nfsd_rt(sotype, nd, cacherep); + m_freem(nd->nd_mrep); + if (nd->nd_nam2) + FREE(nd->nd_nam2, M_SONAME); + break; + }; + if (nd) { + FREE((caddr_t)nd, M_NFSRVDESC); + nd = NULL; + } + + /* + * Check to see if there are outstanding writes that + * need to be serviced. + */ + cur_usec = nfs_curusec(); + s = splsoftclock(); + if (slp->ns_tq.lh_first && + slp->ns_tq.lh_first->nd_time <= cur_usec) { + cacherep = RC_DOIT; + writes_todo = 1; + } else + writes_todo = 0; + splx(s); + } while (writes_todo); + s = splnet(); + if (nfsrv_dorec(slp, nfsd, &nd)) { + nfsd->nfsd_flag &= ~NFSD_REQINPROG; + nfsd->nfsd_slp = NULL; + nfsrv_slpderef(slp); + } + } +done: + TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); + splx(s); + free((caddr_t)nfsd, M_NFSD); + nsd->nsd_nfsd = (struct nfsd *)0; + if (--nfs_numnfsd == 0) + nfsrv_init(TRUE); /* Reinitialize everything */ + return (error); +} + +/* + * Shut down a socket associated with an nfssvc_sock structure. + * Should be called with the send lock set, if required. + * The trick here is to increment the sref at the start, so that the nfsds + * will stop using it and clear ns_flag at the end so that it will not be + * reassigned during cleanup. + */ +static void +nfsrv_zapsock(slp) + register struct nfssvc_sock *slp; +{ + register struct nfsuid *nuidp, *nnuidp; + register struct nfsrv_descript *nwp, *nnwp; + struct socket *so; + struct file *fp; + struct nfsrv_rec *rec; + int s; + + slp->ns_flag &= ~SLP_ALLFLAGS; + fp = slp->ns_fp; + if (fp) { + slp->ns_fp = (struct file *)0; + so = slp->ns_so; + so->so_rcv.sb_flags &= ~SB_UPCALL; + so->so_upcall = NULL; + so->so_upcallarg = NULL; + soshutdown(so, 2); + closef(fp, (struct proc *)0); + if (slp->ns_nam) + FREE(slp->ns_nam, M_SONAME); + m_freem(slp->ns_raw); + while ((rec = STAILQ_FIRST(&slp->ns_rec)) != NULL) { + STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link); + if (rec->nr_address) + FREE(rec->nr_address, M_SONAME); + m_freem(rec->nr_packet); + free(rec, M_NFSRVDESC); + } + for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; + nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); + if (nuidp->nu_flag & NU_NAM) + FREE(nuidp->nu_nam, M_SONAME); + free((caddr_t)nuidp, M_NFSUID); + } + s = splsoftclock(); + for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { + nnwp = nwp->nd_tq.le_next; + LIST_REMOVE(nwp, nd_tq); + free((caddr_t)nwp, M_NFSRVDESC); + } + LIST_INIT(&slp->ns_tq); + splx(s); + } +} + +/* + * Derefence a server socket structure. If it has no more references and + * is no longer valid, you can throw it away. + */ +void +nfsrv_slpderef(slp) + register struct nfssvc_sock *slp; +{ + if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) { + TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); + free((caddr_t)slp, M_NFSSVC); + } +} + +/* + * Lock a socket against others. + */ +int +nfs_slplock(slp, wait) + register struct nfssvc_sock *slp; + int wait; +{ + int *statep = &slp->ns_solock; + + if (!wait && (*statep & NFSSTA_SNDLOCK)) + return(0); /* already locked, fail */ + while (*statep & NFSSTA_SNDLOCK) { + *statep |= NFSSTA_WANTSND; + (void) tsleep((caddr_t)statep, PZERO - 1, "nfsslplck", 0); + } + *statep |= NFSSTA_SNDLOCK; + return (1); +} + +/* + * Unlock the stream socket for others. + */ +void +nfs_slpunlock(slp) + register struct nfssvc_sock *slp; +{ + int *statep = &slp->ns_solock; + + if ((*statep & NFSSTA_SNDLOCK) == 0) + panic("nfs slpunlock"); + *statep &= ~NFSSTA_SNDLOCK; + if (*statep & NFSSTA_WANTSND) { + *statep &= ~NFSSTA_WANTSND; + wakeup((caddr_t)statep); + } +} + +/* + * Initialize the data structures for the server. + * Handshake with any new nfsds starting up to avoid any chance of + * corruption. + */ +void +nfsrv_init(terminating) + int terminating; +{ + register struct nfssvc_sock *slp, *nslp; + + if (nfssvc_sockhead_flag & SLP_INIT) + panic("nfsd init"); + nfssvc_sockhead_flag |= SLP_INIT; + if (terminating) { + for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) { + nslp = slp->ns_chain.tqe_next; + if (slp->ns_flag & SLP_VALID) + nfsrv_zapsock(slp); + TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); + free((caddr_t)slp, M_NFSSVC); + } + nfsrv_cleancache(); /* And clear out server cache */ + } else + nfs_pub.np_valid = 0; + + TAILQ_INIT(&nfssvc_sockhead); + nfssvc_sockhead_flag &= ~SLP_INIT; + if (nfssvc_sockhead_flag & SLP_WANTINIT) { + nfssvc_sockhead_flag &= ~SLP_WANTINIT; + wakeup((caddr_t)&nfssvc_sockhead); + } + + TAILQ_INIT(&nfsd_head); + nfsd_head_flag &= ~NFSD_CHECKSLP; + +#if 0 + nfs_udpsock = (struct nfssvc_sock *) + malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK); + bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); + STAILQ_INIT(&nfs_udpsock->ns_rec); + TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); + TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + + nfs_cltpsock = (struct nfssvc_sock *) + malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK); + bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); + STAILQ_INIT(&nfs_cltpsock->ns_rec); + TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); + TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); +#endif +} + +/* + * Add entries to the server monitor log. + */ +static void +nfsd_rt(sotype, nd, cacherep) + int sotype; + register struct nfsrv_descript *nd; + int cacherep; +{ + register struct drt *rt; + + rt = &nfsdrt.drt[nfsdrt.pos]; + if (cacherep == RC_DOIT) + rt->flag = 0; + else if (cacherep == RC_REPLY) + rt->flag = DRT_CACHEREPLY; + else + rt->flag = DRT_CACHEDROP; + if (sotype == SOCK_STREAM) + rt->flag |= DRT_TCP; + if (nd->nd_flag & ND_NQNFS) + rt->flag |= DRT_NQNFS; + else if (nd->nd_flag & ND_NFSV3) + rt->flag |= DRT_NFSV3; + rt->proc = nd->nd_procnum; + if (nd->nd_nam->sa_family == AF_INET) + rt->ipadr = ((struct sockaddr_in *)nd->nd_nam)->sin_addr.s_addr; + else + rt->ipadr = INADDR_ANY; + rt->resptime = nfs_curusec() - (nd->nd_starttime.tv_sec * 1000000 + nd->nd_starttime.tv_usec); + getmicrotime(&rt->tstamp); + nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ; +} +#endif /* NFS_NOSERVER */ + +static int nfs_defect = 0; +SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); + +/* + * Asynchronous I/O daemons for client nfs. + * They do read-ahead and write-behind operations on the block I/O cache. + * Never returns unless it fails or gets killed. + */ +static int +nfssvc_iod(p) + struct proc *p; +{ + register struct buf *bp; + register int i, myiod; + struct nfsmount *nmp; + int error = 0; + + /* + * Assign my position or return error if too many already running + */ + myiod = -1; + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) + if (nfs_asyncdaemon[i] == 0) { + nfs_asyncdaemon[i]++; + myiod = i; + break; + } + if (myiod == -1) + return (EBUSY); + nfs_numasync++; + /* + * Just loop around doin our stuff until SIGKILL + */ + for (;;) { + while (((nmp = nfs_iodmount[myiod]) == NULL + || nmp->nm_bufq.tqh_first == NULL) + && error == 0) { + if (nmp) + nmp->nm_bufqiods--; + nfs_iodwant[myiod] = p; + nfs_iodmount[myiod] = NULL; + error = tsleep((caddr_t)&nfs_iodwant[myiod], + PWAIT | PCATCH, "nfsidl", 0); + } + if (error) { + nfs_asyncdaemon[myiod] = 0; + if (nmp) + nmp->nm_bufqiods--; + nfs_iodwant[myiod] = NULL; + nfs_iodmount[myiod] = NULL; + nfs_numasync--; + return (error); + } + while ((bp = nmp->nm_bufq.tqh_first) != NULL) { + /* Take one off the front of the list */ + TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); + nmp->nm_bufqlen--; + if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { + nmp->nm_bufqwant = FALSE; + wakeup(&nmp->nm_bufq); + } + if (bp->b_flags & B_READ) + (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0); + else + (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0); + /* + * If there are more than one iod on this mount, then defect + * so that the iods can be shared out fairly between the mounts + */ + if (nfs_defect && nmp->nm_bufqiods > 1) { + NFS_DPF(ASYNCIO, + ("nfssvc_iod: iod %d defecting from mount %p\n", + myiod, nmp)); + nfs_iodmount[myiod] = NULL; + nmp->nm_bufqiods--; + break; + } + } + } +} + + +/* + * Get an authorization string for the uid by having the mount_nfs sitting + * on this mount point porpous out of the kernel and do it. + */ +int +nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) + register struct nfsmount *nmp; + struct nfsreq *rep; + struct ucred *cred; + char **auth_str; + int *auth_len; + char *verf_str; + int *verf_len; + NFSKERBKEY_T key; /* return session key */ +{ + int error = 0; + + while ((nmp->nm_state & NFSSTA_WAITAUTH) == 0) { + nmp->nm_state |= NFSSTA_WANTAUTH; + (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK, + "nfsauth1", 2 * hz); + error = nfs_sigintr(nmp, rep, rep->r_procp); + if (error) { + nmp->nm_state &= ~NFSSTA_WANTAUTH; + return (error); + } + } + nmp->nm_state &= ~(NFSSTA_WAITAUTH | NFSSTA_WANTAUTH); + nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK); + nmp->nm_authlen = RPCAUTH_MAXSIZ; + nmp->nm_verfstr = verf_str; + nmp->nm_verflen = *verf_len; + nmp->nm_authuid = cred->cr_uid; + wakeup((caddr_t)&nmp->nm_authstr); + + /* + * And wait for mount_nfs to do its stuff. + */ + while ((nmp->nm_state & NFSSTA_HASAUTH) == 0 && error == 0) { + (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK, + "nfsauth2", 2 * hz); + error = nfs_sigintr(nmp, rep, rep->r_procp); + } + if (nmp->nm_state & NFSSTA_AUTHERR) { + nmp->nm_state &= ~NFSSTA_AUTHERR; + error = EAUTH; + } + if (error) + free((caddr_t)*auth_str, M_TEMP); + else { + *auth_len = nmp->nm_authlen; + *verf_len = nmp->nm_verflen; + bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key)); + } + nmp->nm_state &= ~NFSSTA_HASAUTH; + nmp->nm_state |= NFSSTA_WAITAUTH; + if (nmp->nm_state & NFSSTA_WANTAUTH) { + nmp->nm_state &= ~NFSSTA_WANTAUTH; + wakeup((caddr_t)&nmp->nm_authtype); + } + return (error); +} + +/* + * Get a nickname authenticator and verifier. + */ +int +nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) + struct nfsmount *nmp; + struct ucred *cred; + char **auth_str; + int *auth_len; + char *verf_str; + int verf_len; +{ + register struct nfsuid *nuidp; + register u_int32_t *nickp, *verfp; + struct timeval ktvin, ktvout; + +#ifdef DIAGNOSTIC + if (verf_len < (4 * NFSX_UNSIGNED)) + panic("nfs_getnickauth verf too small"); +#endif + for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first; + nuidp != 0; nuidp = nuidp->nu_hash.le_next) { + if (nuidp->nu_cr.cr_uid == cred->cr_uid) + break; + } + if (!nuidp || nuidp->nu_expire < time_second) + return (EACCES); + + /* + * Move to the end of the lru list (end of lru == most recently used). + */ + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); + + nickp = (u_int32_t *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); + *nickp++ = txdr_unsigned(RPCAKN_NICKNAME); + *nickp = txdr_unsigned(nuidp->nu_nickname); + *auth_str = (char *)nickp; + *auth_len = 2 * NFSX_UNSIGNED; + + /* + * Now we must encrypt the verifier and package it up. + */ + verfp = (u_int32_t *)verf_str; + *verfp++ = txdr_unsigned(RPCAKN_NICKNAME); + if (time_second > nuidp->nu_timestamp.tv_sec || + (time_second == nuidp->nu_timestamp.tv_sec && + time_second > nuidp->nu_timestamp.tv_usec)) + getmicrotime(&nuidp->nu_timestamp); + else + nuidp->nu_timestamp.tv_usec++; + ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec); + ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec); + + /* + * Now encrypt the timestamp verifier in ecb mode using the session + * key. + */ +#ifdef NFSKERB + XXX +#endif + + *verfp++ = ktvout.tv_sec; + *verfp++ = ktvout.tv_usec; + *verfp = 0; + return (0); +} + +/* + * Save the current nickname in a hash list entry on the mount point. + */ +int +nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) + register struct nfsmount *nmp; + struct ucred *cred; + int len; + NFSKERBKEY_T key; + struct mbuf **mdp; + char **dposp; + struct mbuf *mrep; +{ + register struct nfsuid *nuidp; + register u_int32_t *tl; + register int32_t t1; + struct mbuf *md = *mdp; + struct timeval ktvin, ktvout; + u_int32_t nick; + char *dpos = *dposp, *cp2; + int deltasec, error = 0; + + if (len == (3 * NFSX_UNSIGNED)) { + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + ktvin.tv_sec = *tl++; + ktvin.tv_usec = *tl++; + nick = fxdr_unsigned(u_int32_t, *tl); + + /* + * Decrypt the timestamp in ecb mode. + */ +#ifdef NFSKERB + XXX +#endif + ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec); + ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec); + deltasec = time_second - ktvout.tv_sec; + if (deltasec < 0) + deltasec = -deltasec; + /* + * If ok, add it to the hash list for the mount point. + */ + if (deltasec <= NFS_KERBCLOCKSKEW) { + if (nmp->nm_numuids < nuidhash_max) { + nmp->nm_numuids++; + nuidp = (struct nfsuid *) + malloc(sizeof (struct nfsuid), M_NFSUID, + M_WAITOK); + } else { + nuidp = nmp->nm_uidlruhead.tqh_first; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, + nu_lru); + } + nuidp->nu_flag = 0; + nuidp->nu_cr.cr_uid = cred->cr_uid; + nuidp->nu_expire = time_second + NFS_KERBTTL; + nuidp->nu_timestamp = ktvout; + nuidp->nu_nickname = nick; + bcopy(key, nuidp->nu_key, sizeof (key)); + TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, + nu_lru); + LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid), + nuidp, nu_hash); + } + } else + nfsm_adv(nfsm_rndup(len)); +nfsmout: + *mdp = md; + *dposp = dpos; + return (error); +} diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c new file mode 100644 index 0000000..9ddb428 --- /dev/null +++ b/sys/nfs/nfs_vfsops.c @@ -0,0 +1,1078 @@ +/* + * Copyright (c) 1989, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 + * $FreeBSD$ + */ + +#include "opt_bootp.h" + +#include <sys/param.h> +#include <sys/sockio.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/systm.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_zone.h> + +#include <net/if.h> +#include <net/route.h> +#include <netinet/in.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsdiskless.h> +#include <nfs/nqnfs.h> + +extern int nfs_mountroot __P((struct mount *mp)); + +extern int nfs_ticks; + +MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header"); +MALLOC_DEFINE(M_NFSBIGFH, "NFSV3 bigfh", "NFS version 3 file handle"); +MALLOC_DEFINE(M_NFSD, "NFS daemon", "Nfs server daemon structure"); +MALLOC_DEFINE(M_NFSDIROFF, "NFSV3 diroff", "NFS directory offset data"); +MALLOC_DEFINE(M_NFSRVDESC, "NFSV3 srvdesc", "NFS server socket descriptor"); +MALLOC_DEFINE(M_NFSUID, "NFS uid", "Nfs uid mapping structure"); +MALLOC_DEFINE(M_NQLEASE, "NQNFS Lease", "Nqnfs lease"); +MALLOC_DEFINE(M_NFSHASH, "NFS hash", "NFS hash tables"); + +vm_zone_t nfsmount_zone; + +struct nfsstats nfsstats; +SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); +SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, + &nfsstats, nfsstats, ""); +#ifdef NFS_DEBUG +int nfs_debug; +SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); +#endif + +static int nfs_iosize __P((struct nfsmount *nmp)); +static void nfs_decode_args __P((struct nfsmount *nmp, + struct nfs_args *argp)); +static int mountnfs __P((struct nfs_args *,struct mount *, + struct sockaddr *,char *,char *,struct vnode **)); +static int nfs_mount __P(( struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +static int nfs_unmount __P(( struct mount *mp, int mntflags, + struct proc *p)); +static int nfs_root __P(( struct mount *mp, struct vnode **vpp)); +static int nfs_statfs __P(( struct mount *mp, struct statfs *sbp, + struct proc *p)); +static int nfs_sync __P(( struct mount *mp, int waitfor, + struct ucred *cred, struct proc *p)); + +/* + * nfs vfs operations. + */ +static struct vfsops nfs_vfsops = { + nfs_mount, + vfs_stdstart, + nfs_unmount, + nfs_root, + vfs_stdquotactl, + nfs_statfs, + nfs_sync, + vfs_stdvget, + vfs_stdfhtovp, /* shouldn't happen */ + vfs_stdcheckexp, + vfs_stdvptofh, /* shouldn't happen */ + nfs_init, + nfs_uninit, + vfs_stdextattrctl, +}; +VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK); + +/* + * This structure must be filled in by a primary bootstrap or bootstrap + * server for a diskless/dataless machine. It is initialized below just + * to ensure that it is allocated to initialized data (.data not .bss). + */ +struct nfs_diskless nfs_diskless = { { { 0 } } }; +struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; +int nfs_diskless_valid = 0; + +SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, + &nfs_diskless_valid, 0, ""); + +SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, + nfsv3_diskless.root_hostnam, 0, ""); + +SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, + &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, + "%Ssockaddr_in", ""); + +SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_swappath, CTLFLAG_RD, + nfsv3_diskless.swap_hostnam, 0, ""); + +SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_swapaddr, CTLFLAG_RD, + &nfsv3_diskless.swap_saddr, sizeof nfsv3_diskless.swap_saddr, + "%Ssockaddr_in",""); + + +void nfsargs_ntoh __P((struct nfs_args *)); +static int nfs_mountdiskless __P((char *, char *, int, + struct sockaddr_in *, struct nfs_args *, + struct proc *, struct vnode **, + struct mount **)); +static void nfs_convert_diskless __P((void)); +static void nfs_convert_oargs __P((struct nfs_args *args, + struct onfs_args *oargs)); + +static int +nfs_iosize(nmp) + struct nfsmount* nmp; +{ + int iosize; + + /* + * Calculate the size used for io buffers. Use the larger + * of the two sizes to minimise nfs requests but make sure + * that it is at least one VM page to avoid wasting buffer + * space. + */ + iosize = max(nmp->nm_rsize, nmp->nm_wsize); + if (iosize < PAGE_SIZE) iosize = PAGE_SIZE; + return iosize; +} + +static void +nfs_convert_oargs(args, oargs) + struct nfs_args *args; + struct onfs_args *oargs; +{ + args->version = NFS_ARGSVERSION; + args->addr = oargs->addr; + args->addrlen = oargs->addrlen; + args->sotype = oargs->sotype; + args->proto = oargs->proto; + args->fh = oargs->fh; + args->fhsize = oargs->fhsize; + args->flags = oargs->flags; + args->wsize = oargs->wsize; + args->rsize = oargs->rsize; + args->readdirsize = oargs->readdirsize; + args->timeo = oargs->timeo; + args->retrans = oargs->retrans; + args->maxgrouplist = oargs->maxgrouplist; + args->readahead = oargs->readahead; + args->leaseterm = oargs->leaseterm; + args->deadthresh = oargs->deadthresh; + args->hostname = oargs->hostname; +} + +static void +nfs_convert_diskless() +{ + bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, + sizeof(struct ifaliasreq)); + bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, + sizeof(struct sockaddr_in)); + nfs_convert_oargs(&nfsv3_diskless.swap_args,&nfs_diskless.swap_args); + nfsv3_diskless.swap_fhsize = NFSX_V2FH; + bcopy(nfs_diskless.swap_fh,nfsv3_diskless.swap_fh,NFSX_V2FH); + bcopy(&nfs_diskless.swap_saddr,&nfsv3_diskless.swap_saddr, + sizeof(struct sockaddr_in)); + bcopy(nfs_diskless.swap_hostnam,nfsv3_diskless.swap_hostnam, MNAMELEN); + nfsv3_diskless.swap_nblks = nfs_diskless.swap_nblks; + bcopy(&nfs_diskless.swap_ucred, &nfsv3_diskless.swap_ucred, + sizeof(struct ucred)); + nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); + nfsv3_diskless.root_fhsize = NFSX_V2FH; + bcopy(nfs_diskless.root_fh,nfsv3_diskless.root_fh,NFSX_V2FH); + bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, + sizeof(struct sockaddr_in)); + bcopy(nfs_diskless.root_hostnam,nfsv3_diskless.root_hostnam, MNAMELEN); + nfsv3_diskless.root_time = nfs_diskless.root_time; + bcopy(nfs_diskless.my_hostnam,nfsv3_diskless.my_hostnam, + MAXHOSTNAMELEN); + nfs_diskless_valid = 3; +} + +/* + * nfs statfs call + */ +int +nfs_statfs(mp, sbp, p) + struct mount *mp; + register struct statfs *sbp; + struct proc *p; +{ + register struct vnode *vp; + register struct nfs_statfs *sfp; + register caddr_t cp; + register u_int32_t *tl; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + struct nfsmount *nmp = VFSTONFS(mp); + int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct ucred *cred; + struct nfsnode *np; + u_quad_t tquad; + +#ifndef nolint + sfp = (struct nfs_statfs *)0; +#endif + error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); + if (error) + return (error); + vp = NFSTOV(np); + cred = crget(); + cred->cr_ngroups = 1; + if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (void)nfs_fsinfo(nmp, vp, cred, p); + nfsstats.rpccnt[NFSPROC_FSSTAT]++; + nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); + nfsm_fhtom(vp, v3); + nfsm_request(vp, NFSPROC_FSSTAT, p, cred); + if (v3) + nfsm_postop_attr(vp, retattr); + if (error) { + if (mrep != NULL) + m_freem(mrep); + goto nfsmout; + } + nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); + sbp->f_flags = nmp->nm_flag; + sbp->f_iosize = nfs_iosize(nmp); + if (v3) { + sbp->f_bsize = NFS_FABLKSIZE; + tquad = fxdr_hyper(&sfp->sf_tbytes); + sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); + tquad = fxdr_hyper(&sfp->sf_fbytes); + sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); + tquad = fxdr_hyper(&sfp->sf_abytes); + sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); + sbp->f_files = (fxdr_unsigned(int32_t, + sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); + sbp->f_ffree = (fxdr_unsigned(int32_t, + sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); + } else { + sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); + sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); + sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); + sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); + sbp->f_files = 0; + sbp->f_ffree = 0; + } + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + nfsm_reqdone; + vput(vp); + crfree(cred); + return (error); +} + +/* + * nfs version 3 fsinfo rpc call + */ +int +nfs_fsinfo(nmp, vp, cred, p) + register struct nfsmount *nmp; + register struct vnode *vp; + struct ucred *cred; + struct proc *p; +{ + register struct nfsv3_fsinfo *fsp; + register caddr_t cp; + register int32_t t1, t2; + register u_int32_t *tl, pref, max; + caddr_t bpos, dpos, cp2; + int error = 0, retattr; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + u_int64_t maxfsize; + + nfsstats.rpccnt[NFSPROC_FSINFO]++; + nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); + nfsm_fhtom(vp, 1); + nfsm_request(vp, NFSPROC_FSINFO, p, cred); + nfsm_postop_attr(vp, retattr); + if (!error) { + nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO); + pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); + if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) + nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & + ~(NFS_FABLKSIZE - 1); + max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax); + if (max < nmp->nm_wsize && max > 0) { + nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); + if (nmp->nm_wsize == 0) + nmp->nm_wsize = max; + } + pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref); + if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE) + nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & + ~(NFS_FABLKSIZE - 1); + max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax); + if (max < nmp->nm_rsize && max > 0) { + nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); + if (nmp->nm_rsize == 0) + nmp->nm_rsize = max; + } + pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref); + if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ) + nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & + ~(NFS_DIRBLKSIZ - 1); + if (max < nmp->nm_readdirsize && max > 0) { + nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); + if (nmp->nm_readdirsize == 0) + nmp->nm_readdirsize = max; + } + maxfsize = fxdr_hyper(&fsp->fs_maxfilesize); + if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize) + nmp->nm_maxfilesize = maxfsize; + nmp->nm_state |= NFSSTA_GOTFSINFO; + } + nfsm_reqdone; + return (error); +} + +/* + * Mount a remote root fs via. nfs. This depends on the info in the + * nfs_diskless structure that has been filled in properly by some primary + * bootstrap. + * It goes something like this: + * - do enough of "ifconfig" by calling ifioctl() so that the system + * can talk to the server + * - If nfs_diskless.mygateway is filled in, use that address as + * a default gateway. + * - build the rootfs mount point and call mountnfs() to do the rest. + */ +int +nfs_mountroot(mp) + struct mount *mp; +{ + struct mount *swap_mp; + struct nfsv3_diskless *nd = &nfsv3_diskless; + struct socket *so; + struct vnode *vp; + struct proc *p = curproc; /* XXX */ + int error, i; + u_long l; + char buf[128]; + +#if defined(BOOTP_NFSROOT) && defined(BOOTP) + bootpc_init(); /* use bootp to get nfs_diskless filled in */ +#endif + + /* + * XXX time must be non-zero when we init the interface or else + * the arp code will wedge... + */ + while (time_second == 0) + tsleep(&time_second, PZERO+8, "arpkludge", 10); + + if (nfs_diskless_valid==1) + nfs_convert_diskless(); + + /* + * XXX splnet, so networks will receive... + */ + splnet(); + +#ifdef notyet + /* Set up swap credentials. */ + proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid); + proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid); + if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) > + NGROUPS) + proc0.p_ucred->cr_ngroups = NGROUPS; + for (i = 0; i < proc0.p_ucred->cr_ngroups; i++) + proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]); +#endif + + /* + * Do enough of ifconfig(8) so that the critical net interface can + * talk to the server. + */ + error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, p); + if (error) + panic("nfs_mountroot: socreate(%04x): %d", + nd->myif.ifra_addr.sa_family, error); + + /* + * We might not have been told the right interface, so we pass + * over the first ten interfaces of the same kind, until we get + * one of them configured. + */ + + for (i = strlen(nd->myif.ifra_name) - 1; + nd->myif.ifra_name[i] >= '0' && + nd->myif.ifra_name[i] <= '9'; + nd->myif.ifra_name[i] ++) { + error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p); + if(!error) + break; + } + if (error) + panic("nfs_mountroot: SIOCAIFADDR: %d", error); + soclose(so); + + /* + * If the gateway field is filled in, set it as the default route. + */ + if (nd->mygateway.sin_len != 0) { + struct sockaddr_in mask, sin; + + bzero((caddr_t)&mask, sizeof(mask)); + sin = mask; + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, + (struct sockaddr *)&nd->mygateway, + (struct sockaddr *)&mask, + RTF_UP | RTF_GATEWAY, (struct rtentry **)0); + if (error) + panic("nfs_mountroot: RTM_ADD: %d", error); + } + + /* + * Create the rootfs mount point. + */ + nd->root_args.fh = nd->root_fh; + nd->root_args.fhsize = nd->root_fhsize; + l = ntohl(nd->root_saddr.sin_addr.s_addr); + snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", + (l >> 24) & 0xff, (l >> 16) & 0xff, + (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam); + printf("NFS ROOT: %s\n",buf); + if ((error = nfs_mountdiskless(buf, "/", MNT_RDONLY, + &nd->root_saddr, &nd->root_args, p, &vp, &mp)) != 0) { + if (swap_mp) { + mp->mnt_vfc->vfc_refcount--; + free(swap_mp, M_MOUNT); + } + return (error); + } + + swap_mp = NULL; + if (nd->swap_nblks) { + + /* Convert to DEV_BSIZE instead of Kilobyte */ + nd->swap_nblks *= 2; + + /* + * Create a fake mount point just for the swap vnode so that the + * swap file can be on a different server from the rootfs. + */ + nd->swap_args.fh = nd->swap_fh; + nd->swap_args.fhsize = nd->swap_fhsize; + l = ntohl(nd->swap_saddr.sin_addr.s_addr); + snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", + (l >> 24) & 0xff, (l >> 16) & 0xff, + (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam); + printf("NFS SWAP: %s\n",buf); + if ((error = nfs_mountdiskless(buf, "/swap", 0, + &nd->swap_saddr, &nd->swap_args, p, &vp, &swap_mp)) != 0) + return (error); + vfs_unbusy(swap_mp, p); + + VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size = + nd->swap_nblks * DEV_BSIZE ; + + /* + * Since the swap file is not the root dir of a file system, + * hack it to a regular file. + */ + vp->v_type = VREG; + vp->v_flag = 0; + VREF(vp); + swaponvp(p, vp, NODEV, nd->swap_nblks); + } + + mp->mnt_flag |= MNT_ROOTFS; + mp->mnt_vnodecovered = NULLVP; + rootvp = vp; + vfs_unbusy(mp, p); + + /* + * This is not really an nfs issue, but it is much easier to + * set hostname here and then let the "/etc/rc.xxx" files + * mount the right /var based upon its preset value. + */ + bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); + hostname[MAXHOSTNAMELEN - 1] = '\0'; + for (i = 0; i < MAXHOSTNAMELEN; i++) + if (hostname[i] == '\0') + break; + inittodr(ntohl(nd->root_time)); + return (0); +} + +/* + * Internal version of mount system call for diskless setup. + */ +static int +nfs_mountdiskless(path, which, mountflag, sin, args, p, vpp, mpp) + char *path; + char *which; + int mountflag; + struct sockaddr_in *sin; + struct nfs_args *args; + struct proc *p; + struct vnode **vpp; + struct mount **mpp; +{ + struct mount *mp; + struct sockaddr *nam; + int error; + + mp = *mpp; + + if (!mp && (error = vfs_rootmountalloc("nfs", path, &mp))) { + printf("nfs_mountroot: NFS not configured"); + return (error); + } + + mp->mnt_kern_flag = 0; + mp->mnt_flag = mountflag; + nam = dup_sockaddr((struct sockaddr *)sin, 1); + if ((error = mountnfs(args, mp, nam, which, path, vpp)) != 0) { + printf("nfs_mountroot: mount %s on %s: %d", path, which, error); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); + free(mp, M_MOUNT); + FREE(nam, M_SONAME); + return (error); + } + (void) copystr(which, mp->mnt_stat.f_mntonname, MNAMELEN - 1, 0); + *mpp = mp; + return (0); +} + +static void +nfs_decode_args(nmp, argp) + struct nfsmount *nmp; + struct nfs_args *argp; +{ + int s; + int adjsock; + int maxio; + + s = splnet(); + /* + * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes + * no sense in that context. + */ + if (argp->sotype == SOCK_STREAM) + nmp->nm_flag &= ~NFSMNT_NOCONN; + + /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */ + if ((argp->flags & NFSMNT_NFSV3) == 0) + nmp->nm_flag &= ~NFSMNT_RDIRPLUS; + + /* Re-bind if rsrvd port requested and wasn't on one */ + adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) + && (argp->flags & NFSMNT_RESVPORT); + /* Also re-bind if we're switching to/from a connected UDP socket */ + adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != + (argp->flags & NFSMNT_NOCONN)); + + /* Update flags atomically. Don't change the lock bits. */ + nmp->nm_flag = argp->flags | nmp->nm_flag; + splx(s); + + if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { + nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; + if (nmp->nm_timeo < NFS_MINTIMEO) + nmp->nm_timeo = NFS_MINTIMEO; + else if (nmp->nm_timeo > NFS_MAXTIMEO) + nmp->nm_timeo = NFS_MAXTIMEO; + } + + if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { + nmp->nm_retry = argp->retrans; + if (nmp->nm_retry > NFS_MAXREXMIT) + nmp->nm_retry = NFS_MAXREXMIT; + } + + if (argp->flags & NFSMNT_NFSV3) { + if (argp->sotype == SOCK_DGRAM) + maxio = NFS_MAXDGRAMDATA; + else + maxio = NFS_MAXDATA; + } else + maxio = NFS_V2MAXDATA; + + if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { + nmp->nm_wsize = argp->wsize; + /* Round down to multiple of blocksize */ + nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); + if (nmp->nm_wsize <= 0) + nmp->nm_wsize = NFS_FABLKSIZE; + } + if (nmp->nm_wsize > maxio) + nmp->nm_wsize = maxio; + if (nmp->nm_wsize > MAXBSIZE) + nmp->nm_wsize = MAXBSIZE; + + if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { + nmp->nm_rsize = argp->rsize; + /* Round down to multiple of blocksize */ + nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); + if (nmp->nm_rsize <= 0) + nmp->nm_rsize = NFS_FABLKSIZE; + } + if (nmp->nm_rsize > maxio) + nmp->nm_rsize = maxio; + if (nmp->nm_rsize > MAXBSIZE) + nmp->nm_rsize = MAXBSIZE; + + if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { + nmp->nm_readdirsize = argp->readdirsize; + } + if (nmp->nm_readdirsize > maxio) + nmp->nm_readdirsize = maxio; + if (nmp->nm_readdirsize > nmp->nm_rsize) + nmp->nm_readdirsize = nmp->nm_rsize; + + if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) + nmp->nm_acregmin = argp->acregmin; + else + nmp->nm_acregmin = NFS_MINATTRTIMO; + if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) + nmp->nm_acregmax = argp->acregmax; + else + nmp->nm_acregmax = NFS_MAXATTRTIMO; + if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) + nmp->nm_acdirmin = argp->acdirmin; + else + nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; + if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) + nmp->nm_acdirmax = argp->acdirmax; + else + nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; + if (nmp->nm_acdirmin > nmp->nm_acdirmax) + nmp->nm_acdirmin = nmp->nm_acdirmax; + if (nmp->nm_acregmin > nmp->nm_acregmax) + nmp->nm_acregmin = nmp->nm_acregmax; + + if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 && + argp->maxgrouplist <= NFS_MAXGRPS) + nmp->nm_numgrps = argp->maxgrouplist; + if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && + argp->readahead <= NFS_MAXRAHEAD) + nmp->nm_readahead = argp->readahead; + if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && + argp->leaseterm <= NQ_MAXLEASE) + nmp->nm_leaseterm = argp->leaseterm; + if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && + argp->deadthresh <= NQ_NEVERDEAD) + nmp->nm_deadthresh = argp->deadthresh; + + adjsock |= ((nmp->nm_sotype != argp->sotype) || + (nmp->nm_soproto != argp->proto)); + nmp->nm_sotype = argp->sotype; + nmp->nm_soproto = argp->proto; + + if (nmp->nm_so && adjsock) { + nfs_safedisconnect(nmp); + if (nmp->nm_sotype == SOCK_DGRAM) + while (nfs_connect(nmp, (struct nfsreq *)0)) { + printf("nfs_args: retrying connect\n"); + (void) tsleep((caddr_t)&lbolt, + PSOCK, "nfscon", 0); + } + } +} + +/* + * VFS Operations. + * + * mount system call + * It seems a bit dumb to copyinstr() the host and path here and then + * bcopy() them in mountnfs(), but I wanted to detect errors before + * doing the sockargs() call because sockargs() allocates an mbuf and + * an error after that means that I have to release the mbuf. + */ +/* ARGSUSED */ +static int +nfs_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + int error; + struct nfs_args args; + struct sockaddr *nam; + struct vnode *vp; + char pth[MNAMELEN], hst[MNAMELEN]; + size_t len; + u_char nfh[NFSX_V3FHMAX]; + + if (path == NULL) { + nfs_mountroot(mp); + return (0); + } + error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)); + if (error) + return (error); + if (args.version != NFS_ARGSVERSION) { +#ifdef COMPAT_PRELITE2 + /* + * If the argument version is unknown, then assume the + * caller is a pre-lite2 4.4BSD client and convert its + * arguments. + */ + struct onfs_args oargs; + error = copyin(data, (caddr_t)&oargs, sizeof (struct onfs_args)); + if (error) + return (error); + nfs_convert_oargs(&args,&oargs); +#else /* !COMPAT_PRELITE2 */ + return (EPROGMISMATCH); +#endif /* COMPAT_PRELITE2 */ + } + if (mp->mnt_flag & MNT_UPDATE) { + register struct nfsmount *nmp = VFSTONFS(mp); + + if (nmp == NULL) + return (EIO); + /* + * When doing an update, we can't change from or to + * v3 and/or nqnfs, or change cookie translation + */ + args.flags = (args.flags & + ~(NFSMNT_NFSV3|NFSMNT_NQNFS /*|NFSMNT_XLATECOOKIE*/)) | + (nmp->nm_flag & + (NFSMNT_NFSV3|NFSMNT_NQNFS /*|NFSMNT_XLATECOOKIE*/)); + nfs_decode_args(nmp, &args); + return (0); + } + error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); + if (error) + return (error); + error = copyinstr(path, pth, MNAMELEN-1, &len); + if (error) + return (error); + bzero(&pth[len], MNAMELEN - len); + error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); + if (error) + return (error); + bzero(&hst[len], MNAMELEN - len); + /* sockargs() call must be after above copyin() calls */ + error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); + if (error) + return (error); + args.fh = nfh; + error = mountnfs(&args, mp, nam, pth, hst, &vp); + return (error); +} + +/* + * Common code for mount and mountroot + */ +static int +mountnfs(argp, mp, nam, pth, hst, vpp) + register struct nfs_args *argp; + register struct mount *mp; + struct sockaddr *nam; + char *pth, *hst; + struct vnode **vpp; +{ + register struct nfsmount *nmp; + struct nfsnode *np; + int error; + struct vattr attrs; + + if (mp->mnt_flag & MNT_UPDATE) { + nmp = VFSTONFS(mp); + /* update paths, file handles, etc, here XXX */ + FREE(nam, M_SONAME); + return (0); + } else { + nmp = zalloc(nfsmount_zone); + bzero((caddr_t)nmp, sizeof (struct nfsmount)); + TAILQ_INIT(&nmp->nm_uidlruhead); + TAILQ_INIT(&nmp->nm_bufq); + mp->mnt_data = (qaddr_t)nmp; + } + vfs_getnewfsid(mp); + nmp->nm_mountp = mp; + if (argp->flags & NFSMNT_NQNFS) + /* + * We have to set mnt_maxsymlink to a non-zero value so + * that COMPAT_43 routines will know that we are setting + * the d_type field in directories (and can zero it for + * unsuspecting binaries). + */ + mp->mnt_maxsymlinklen = 1; + + /* + * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too + * high, depending on whether we end up with negative offsets in + * the client or server somewhere. 2GB-1 may be safer. + * + * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum + * that we can handle until we find out otherwise. + * XXX Our "safe" limit on the client is what we can store in our + * buffer cache using signed(!) block numbers. + */ + if ((argp->flags & NFSMNT_NFSV3) == 0) + nmp->nm_maxfilesize = 0xffffffffLL; + else + nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1; + + nmp->nm_timeo = NFS_TIMEO; + nmp->nm_retry = NFS_RETRANS; + nmp->nm_wsize = NFS_WSIZE; + nmp->nm_rsize = NFS_RSIZE; + nmp->nm_readdirsize = NFS_READDIRSIZE; + nmp->nm_numgrps = NFS_MAXGRPS; + nmp->nm_readahead = NFS_DEFRAHEAD; + nmp->nm_leaseterm = NQ_DEFLEASE; + nmp->nm_deadthresh = NQ_DEADTHRESH; + CIRCLEQ_INIT(&nmp->nm_timerhead); + nmp->nm_inprog = NULLVP; + nmp->nm_fhsize = argp->fhsize; + bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); + bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); + bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); + nmp->nm_nam = nam; + /* Set up the sockets and per-host congestion */ + nmp->nm_sotype = argp->sotype; + nmp->nm_soproto = argp->proto; + + nfs_decode_args(nmp, argp); + + /* + * For Connection based sockets (TCP,...) defer the connect until + * the first request, in case the server is not responding. + */ + if (nmp->nm_sotype == SOCK_DGRAM && + (error = nfs_connect(nmp, (struct nfsreq *)0))) + goto bad; + + /* + * This is silly, but it has to be set so that vinifod() works. + * We do not want to do an nfs_statfs() here since we can get + * stuck on a dead server and we are holding a lock on the mount + * point. + */ + mp->mnt_stat.f_iosize = nfs_iosize(nmp); + /* + * A reference count is needed on the nfsnode representing the + * remote root. If this object is not persistent, then backward + * traversals of the mount point (i.e. "..") will not work if + * the nfsnode gets flushed out of the cache. Ufs does not have + * this problem, because one can identify root inodes by their + * number == ROOTINO (2). + */ + error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); + if (error) + goto bad; + *vpp = NFSTOV(np); + + /* + * Get file attributes for the mountpoint. This has the side + * effect of filling in (*vpp)->v_type with the correct value. + */ + VOP_GETATTR(*vpp, &attrs, curproc->p_ucred, curproc); + + /* + * Lose the lock but keep the ref. + */ + VOP_UNLOCK(*vpp, 0, curproc); + + return (0); +bad: + nfs_disconnect(nmp); + zfree(nfsmount_zone, nmp); + FREE(nam, M_SONAME); + return (error); +} + +/* + * unmount system call + */ +static int +nfs_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + register struct nfsmount *nmp; + struct nfsnode *np; + struct vnode *vp; + int error, flags = 0; + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + nmp = VFSTONFS(mp); + /* + * Goes something like this.. + * - Check for activity on the root vnode (other than ourselves). + * - Call vflush() to clear out vnodes for this file system, + * except for the root vnode. + * - Decrement reference on the vnode representing remote root. + * - Close the socket + * - Free up the data structures + */ + /* + * We need to decrement the ref. count on the nfsnode representing + * the remote root. See comment in mountnfs(). The VFS unmount() + * has done vput on this vnode, otherwise we would get deadlock! + */ + error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); + if (error) + return(error); + vp = NFSTOV(np); + if (vp->v_usecount > 2) { + vput(vp); + return (EBUSY); + } + + /* + * Must handshake with nqnfs_clientd() if it is active. + */ + nmp->nm_state |= NFSSTA_DISMINPROG; + while (nmp->nm_inprog != NULLVP) + (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0); + error = vflush(mp, vp, flags); + if (error) { + vput(vp); + nmp->nm_state &= ~NFSSTA_DISMINPROG; + return (error); + } + + /* + * We are now committed to the unmount. + * For NQNFS, let the server daemon free the nfsmount structure. + */ + if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) + nmp->nm_state |= NFSSTA_DISMNT; + + /* + * There are two reference counts and one lock to get rid of here. + */ + vput(vp); + vrele(vp); + vgone(vp); + nfs_disconnect(nmp); + FREE(nmp->nm_nam, M_SONAME); + + if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0) + zfree(nfsmount_zone, nmp); + return (0); +} + +/* + * Return root of a filesystem + */ +static int +nfs_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + register struct vnode *vp; + struct nfsmount *nmp; + struct nfsnode *np; + int error; + + nmp = VFSTONFS(mp); + error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); + if (error) + return (error); + vp = NFSTOV(np); + if (vp->v_type == VNON) + vp->v_type = VDIR; + vp->v_flag = VROOT; + *vpp = vp; + return (0); +} + +extern int syncprt; + +/* + * Flush out the buffer cache + */ +/* ARGSUSED */ +static int +nfs_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp; + int error, allerror = 0; + + /* + * Force stale buffer cache information to be flushed. + */ +loop: + for (vp = mp->mnt_vnodelist.lh_first; + vp != NULL; + vp = vp->v_mntvnodes.le_next) { + /* + * If the vnode that we are about to sync is no longer + * associated with this mount point, start over. + */ + if (vp->v_mount != mp) + goto loop; + if (VOP_ISLOCKED(vp, NULL) || TAILQ_EMPTY(&vp->v_dirtyblkhd) || + waitfor == MNT_LAZY) + continue; + if (vget(vp, LK_EXCLUSIVE, p)) + goto loop; + error = VOP_FSYNC(vp, cred, waitfor, p); + if (error) + allerror = error; + vput(vp); + } + return (allerror); +} + diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c new file mode 100644 index 0000000..4097d96 --- /dev/null +++ b/sys/nfs/nfs_vnops.c @@ -0,0 +1,3354 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 + * $FreeBSD$ + */ + + +/* + * vnode op calls for Sun NFS version 2 and 3 + */ + +#include "opt_inet.h" + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/namei.h> +#include <sys/socket.h> +#include <sys/vnode.h> +#include <sys/dirent.h> +#include <sys/fcntl.h> +#include <sys/lockf.h> +#include <sys/stat.h> +#include <sys/sysctl.h> +#include <sys/conf.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_zone.h> + +#include <miscfs/fifofs/fifo.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfs.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nqnfs.h> + +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/in_var.h> + +/* Defs */ +#define TRUE 1 +#define FALSE 0 + +/* + * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these + * calls are not in getblk() and brelse() so that they would not be necessary + * here. + */ +#ifndef B_VMIO +#define vfs_busy_pages(bp, f) +#endif + +static int nfsspec_read __P((struct vop_read_args *)); +static int nfsspec_write __P((struct vop_write_args *)); +static int nfsfifo_read __P((struct vop_read_args *)); +static int nfsfifo_write __P((struct vop_write_args *)); +static int nfsspec_close __P((struct vop_close_args *)); +static int nfsfifo_close __P((struct vop_close_args *)); +#define nfs_poll vop_nopoll +static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int)); +static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *)); +static int nfs_lookup __P((struct vop_lookup_args *)); +static int nfs_create __P((struct vop_create_args *)); +static int nfs_mknod __P((struct vop_mknod_args *)); +static int nfs_open __P((struct vop_open_args *)); +static int nfs_close __P((struct vop_close_args *)); +static int nfs_access __P((struct vop_access_args *)); +static int nfs_getattr __P((struct vop_getattr_args *)); +static int nfs_setattr __P((struct vop_setattr_args *)); +static int nfs_read __P((struct vop_read_args *)); +static int nfs_mmap __P((struct vop_mmap_args *)); +static int nfs_fsync __P((struct vop_fsync_args *)); +static int nfs_remove __P((struct vop_remove_args *)); +static int nfs_link __P((struct vop_link_args *)); +static int nfs_rename __P((struct vop_rename_args *)); +static int nfs_mkdir __P((struct vop_mkdir_args *)); +static int nfs_rmdir __P((struct vop_rmdir_args *)); +static int nfs_symlink __P((struct vop_symlink_args *)); +static int nfs_readdir __P((struct vop_readdir_args *)); +static int nfs_bmap __P((struct vop_bmap_args *)); +static int nfs_strategy __P((struct vop_strategy_args *)); +static int nfs_lookitup __P((struct vnode *, const char *, int, + struct ucred *, struct proc *, struct nfsnode **)); +static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *)); +static int nfsspec_access __P((struct vop_access_args *)); +static int nfs_readlink __P((struct vop_readlink_args *)); +static int nfs_print __P((struct vop_print_args *)); +static int nfs_advlock __P((struct vop_advlock_args *)); +static int nfs_bwrite __P((struct vop_bwrite_args *)); +/* + * Global vfs data structures for nfs + */ +vop_t **nfsv2_vnodeop_p; +static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_access_desc, (vop_t *) nfs_access }, + { &vop_advlock_desc, (vop_t *) nfs_advlock }, + { &vop_bmap_desc, (vop_t *) nfs_bmap }, + { &vop_bwrite_desc, (vop_t *) nfs_bwrite }, + { &vop_close_desc, (vop_t *) nfs_close }, + { &vop_create_desc, (vop_t *) nfs_create }, + { &vop_fsync_desc, (vop_t *) nfs_fsync }, + { &vop_getattr_desc, (vop_t *) nfs_getattr }, + { &vop_getpages_desc, (vop_t *) nfs_getpages }, + { &vop_putpages_desc, (vop_t *) nfs_putpages }, + { &vop_inactive_desc, (vop_t *) nfs_inactive }, + { &vop_lease_desc, (vop_t *) vop_null }, + { &vop_link_desc, (vop_t *) nfs_link }, + { &vop_lock_desc, (vop_t *) vop_sharedlock }, + { &vop_lookup_desc, (vop_t *) nfs_lookup }, + { &vop_mkdir_desc, (vop_t *) nfs_mkdir }, + { &vop_mknod_desc, (vop_t *) nfs_mknod }, + { &vop_mmap_desc, (vop_t *) nfs_mmap }, + { &vop_open_desc, (vop_t *) nfs_open }, + { &vop_poll_desc, (vop_t *) nfs_poll }, + { &vop_print_desc, (vop_t *) nfs_print }, + { &vop_read_desc, (vop_t *) nfs_read }, + { &vop_readdir_desc, (vop_t *) nfs_readdir }, + { &vop_readlink_desc, (vop_t *) nfs_readlink }, + { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, + { &vop_remove_desc, (vop_t *) nfs_remove }, + { &vop_rename_desc, (vop_t *) nfs_rename }, + { &vop_rmdir_desc, (vop_t *) nfs_rmdir }, + { &vop_setattr_desc, (vop_t *) nfs_setattr }, + { &vop_strategy_desc, (vop_t *) nfs_strategy }, + { &vop_symlink_desc, (vop_t *) nfs_symlink }, + { &vop_write_desc, (vop_t *) nfs_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc nfsv2_vnodeop_opv_desc = + { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries }; +VNODEOP_SET(nfsv2_vnodeop_opv_desc); + +/* + * Special device vnode ops + */ +vop_t **spec_nfsv2nodeop_p; +static struct vnodeopv_entry_desc nfsv2_specop_entries[] = { + { &vop_default_desc, (vop_t *) spec_vnoperate }, + { &vop_access_desc, (vop_t *) nfsspec_access }, + { &vop_close_desc, (vop_t *) nfsspec_close }, + { &vop_fsync_desc, (vop_t *) nfs_fsync }, + { &vop_getattr_desc, (vop_t *) nfs_getattr }, + { &vop_inactive_desc, (vop_t *) nfs_inactive }, + { &vop_lock_desc, (vop_t *) vop_sharedlock }, + { &vop_print_desc, (vop_t *) nfs_print }, + { &vop_read_desc, (vop_t *) nfsspec_read }, + { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, + { &vop_setattr_desc, (vop_t *) nfs_setattr }, + { &vop_write_desc, (vop_t *) nfsspec_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = + { &spec_nfsv2nodeop_p, nfsv2_specop_entries }; +VNODEOP_SET(spec_nfsv2nodeop_opv_desc); + +vop_t **fifo_nfsv2nodeop_p; +static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = { + { &vop_default_desc, (vop_t *) fifo_vnoperate }, + { &vop_access_desc, (vop_t *) nfsspec_access }, + { &vop_close_desc, (vop_t *) nfsfifo_close }, + { &vop_fsync_desc, (vop_t *) nfs_fsync }, + { &vop_getattr_desc, (vop_t *) nfs_getattr }, + { &vop_inactive_desc, (vop_t *) nfs_inactive }, + { &vop_lock_desc, (vop_t *) vop_sharedlock }, + { &vop_print_desc, (vop_t *) nfs_print }, + { &vop_read_desc, (vop_t *) nfsfifo_read }, + { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, + { &vop_setattr_desc, (vop_t *) nfs_setattr }, + { &vop_write_desc, (vop_t *) nfsfifo_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = + { &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries }; +VNODEOP_SET(fifo_nfsv2nodeop_opv_desc); + +static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp, + struct componentname *cnp, + struct vattr *vap)); +static int nfs_removerpc __P((struct vnode *dvp, const char *name, + int namelen, + struct ucred *cred, struct proc *proc)); +static int nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr, + int fnamelen, struct vnode *tdvp, + const char *tnameptr, int tnamelen, + struct ucred *cred, struct proc *proc)); +static int nfs_renameit __P((struct vnode *sdvp, + struct componentname *scnp, + struct sillyrename *sp)); + +/* + * Global variables + */ +extern u_int32_t nfs_true, nfs_false; +extern u_int32_t nfs_xdrneg1; +extern struct nfsstats nfsstats; +extern nfstype nfsv3_type[9]; +struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; +int nfs_numasync = 0; +#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) + +SYSCTL_DECL(_vfs_nfs); + +static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; +SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, + &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); + +#if 0 +SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, + &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); + +SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, + &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); +#endif + +#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ + | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ + | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) +static int +nfs3_access_otw(struct vnode *vp, + int wmode, + struct proc *p, + struct ucred *cred) +{ + const int v3 = 1; + u_int32_t *tl; + int error = 0, attrflag; + + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + caddr_t bpos, dpos, cp2; + register int32_t t1, t2; + register caddr_t cp; + u_int32_t rmode; + struct nfsnode *np = VTONFS(vp); + + nfsstats.rpccnt[NFSPROC_ACCESS]++; + nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); + nfsm_fhtom(vp, v3); + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(wmode); + nfsm_request(vp, NFSPROC_ACCESS, p, cred); + nfsm_postop_attr(vp, attrflag); + if (!error) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + rmode = fxdr_unsigned(u_int32_t, *tl); + np->n_mode = rmode; + np->n_modeuid = cred->cr_uid; + np->n_modestamp = time_second; + } + nfsm_reqdone; + return error; +} + +/* + * nfs access vnode op. + * For nfs version 2, just return ok. File accesses may fail later. + * For nfs version 3, use the access rpc to check accessibility. If file modes + * are changed on the server, accesses might still fail later. + */ +static int +nfs_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + int error = 0; + u_int32_t mode, wmode; + int v3 = NFS_ISV3(vp); + struct nfsnode *np = VTONFS(vp); + + /* + * Disallow write attempts on filesystems mounted read-only; + * unless the file is a socket, fifo, or a block or character + * device resident on the filesystem. + */ + if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } + /* + * For nfs v3, check to see if we have done this recently, and if + * so return our cached result instead of making an ACCESS call. + * If not, do an access rpc, otherwise you are stuck emulating + * ufs_access() locally using the vattr. This may not be correct, + * since the server may apply other access criteria such as + * client uid-->server uid mapping that we do not know about. + */ + if (v3) { + if (ap->a_mode & VREAD) + mode = NFSV3ACCESS_READ; + else + mode = 0; + if (vp->v_type != VDIR) { + if (ap->a_mode & VWRITE) + mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); + if (ap->a_mode & VEXEC) + mode |= NFSV3ACCESS_EXECUTE; + } else { + if (ap->a_mode & VWRITE) + mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | + NFSV3ACCESS_DELETE); + if (ap->a_mode & VEXEC) + mode |= NFSV3ACCESS_LOOKUP; + } + /* XXX safety belt, only make blanket request if caching */ + if (nfsaccess_cache_timeout > 0) { + wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | + NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | + NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; + } else { + wmode = mode; + } + + /* + * Does our cached result allow us to give a definite yes to + * this request? + */ + if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && + (ap->a_cred->cr_uid == np->n_modeuid) && + ((np->n_mode & mode) == mode)) { + nfsstats.accesscache_hits++; + } else { + /* + * Either a no, or a don't know. Go to the wire. + */ + nfsstats.accesscache_misses++; + error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred); + if (!error) { + if ((np->n_mode & mode) != mode) { + error = EACCES; + } + } + } + return (error); + } else { + if ((error = nfsspec_access(ap)) != 0) + return (error); + + /* + * Attempt to prevent a mapped root from accessing a file + * which it shouldn't. We try to read a byte from the file + * if the user is root and the file is not zero length. + * After calling nfsspec_access, we should have the correct + * file size cached. + */ + if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) + && VTONFS(vp)->n_size > 0) { + struct iovec aiov; + struct uio auio; + char buf[1]; + + aiov.iov_base = buf; + aiov.iov_len = 1; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_resid = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_procp = ap->a_p; + + if (vp->v_type == VREG) + error = nfs_readrpc(vp, &auio, ap->a_cred); + else if (vp->v_type == VDIR) { + char* bp; + bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); + aiov.iov_base = bp; + aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; + error = nfs_readdirrpc(vp, &auio, ap->a_cred); + free(bp, M_TEMP); + } else if (vp->v_type == VLNK) + error = nfs_readlinkrpc(vp, &auio, ap->a_cred); + else + error = EACCES; + } + return (error); + } +} + +/* + * nfs open vnode op + * Check to see if the type is ok + * and that deletion is not in progress. + * For paged in text files, you will need to flush the page cache + * if consistency is lost. + */ +/* ARGSUSED */ +static int +nfs_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + struct vattr vattr; + int error; + + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { +#ifdef DIAGNOSTIC + printf("open eacces vtyp=%d\n",vp->v_type); +#endif + return (EACCES); + } + /* + * Get a valid lease. If cached data is stale, flush it. + */ + if (nmp->nm_flag & NFSMNT_NQNFS) { + if (NQNFS_CKINVALID(vp, np, ND_READ)) { + do { + error = nqnfs_getlease(vp, ND_READ, ap->a_cred, + ap->a_p); + } while (error == NQNFS_EXPIRED); + if (error) + return (error); + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE)) { + if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, + ap->a_p, 1)) == EINTR) + return (error); + np->n_brev = np->n_lrev; + } + } + } else { + if (np->n_flag & NMODIFIED) { + if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, + ap->a_p, 1)) == EINTR) + return (error); + np->n_attrstamp = 0; + if (vp->v_type == VDIR) + np->n_direofoffset = 0; + error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); + if (error) + return (error); + np->n_mtime = vattr.va_mtime.tv_sec; + } else { + error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); + if (error) + return (error); + if (np->n_mtime != vattr.va_mtime.tv_sec) { + if (vp->v_type == VDIR) + np->n_direofoffset = 0; + if ((error = nfs_vinvalbuf(vp, V_SAVE, + ap->a_cred, ap->a_p, 1)) == EINTR) + return (error); + np->n_mtime = vattr.va_mtime.tv_sec; + } + } + } + if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) + np->n_attrstamp = 0; /* For Open/Close consistency */ + return (0); +} + +/* + * nfs close vnode op + * What an NFS client should do upon close after writing is a debatable issue. + * Most NFS clients push delayed writes to the server upon close, basically for + * two reasons: + * 1 - So that any write errors may be reported back to the client process + * doing the close system call. By far the two most likely errors are + * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. + * 2 - To put a worst case upper bound on cache inconsistency between + * multiple clients for the file. + * There is also a consistency problem for Version 2 of the protocol w.r.t. + * not being able to tell if other clients are writing a file concurrently, + * since there is no way of knowing if the changed modify time in the reply + * is only due to the write for this client. + * (NFS Version 3 provides weak cache consistency data in the reply that + * should be sufficient to detect and handle this case.) + * + * The current code does the following: + * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers + * for NFS Version 3 - flush dirty buffers to the server but don't invalidate + * or commit them (this satisfies 1 and 2 except for the + * case where the server crashes after this close but + * before the commit RPC, which is felt to be "good + * enough". Changing the last argument to nfs_flush() to + * a 1 would force a commit operation, if it is felt a + * commit is necessary now. + * for NQNFS - do nothing now, since 2 is dealt with via leases and + * 1 should be dealt with via an fsync() system call for + * cases where write errors are important. + */ +/* ARGSUSED */ +static int +nfs_close(ap) + struct vop_close_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + int error = 0; + + if (vp->v_type == VREG) { + if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && + (np->n_flag & NMODIFIED)) { + if (NFS_ISV3(vp)) { + error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0); + np->n_flag &= ~NMODIFIED; + } else + error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); + np->n_attrstamp = 0; + } + if (np->n_flag & NWRITEERR) { + np->n_flag &= ~NWRITEERR; + error = np->n_error; + } + } + return (error); +} + +/* + * nfs getattr call from vfs. + */ +static int +nfs_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register caddr_t cp; + register u_int32_t *tl; + register int32_t t1, t2; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int v3 = NFS_ISV3(vp); + + /* + * Update local times for special files. + */ + if (np->n_flag & (NACC | NUPD)) + np->n_flag |= NCHG; + /* + * First look in the cache. + */ + if (nfs_getattrcache(vp, ap->a_vap) == 0) + return (0); + + if (v3 && nfsaccess_cache_timeout > 0) { + nfsstats.accesscache_misses++; + nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred); + if (nfs_getattrcache(vp, ap->a_vap) == 0) + return (0); + } + + nfsstats.rpccnt[NFSPROC_GETATTR]++; + nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); + nfsm_fhtom(vp, v3); + nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred); + if (!error) { + nfsm_loadattr(vp, ap->a_vap); + } + nfsm_reqdone; + return (error); +} + +/* + * nfs setattr call. + */ +static int +nfs_setattr(ap) + struct vop_setattr_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register struct vattr *vap = ap->a_vap; + int error = 0; + u_quad_t tsize; + +#ifndef nolint + tsize = (u_quad_t)0; +#endif + + /* + * Setting of flags is not supported. + */ + if (vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + + /* + * Disallow write attempts if the filesystem is mounted read-only. + */ + if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && + (vp->v_mount->mnt_flag & MNT_RDONLY)) + return (EROFS); + if (vap->va_size != VNOVAL) { + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VCHR: + case VBLK: + case VSOCK: + case VFIFO: + if (vap->va_mtime.tv_sec == VNOVAL && + vap->va_atime.tv_sec == VNOVAL && + vap->va_mode == (mode_t)VNOVAL && + vap->va_uid == (uid_t)VNOVAL && + vap->va_gid == (gid_t)VNOVAL) + return (0); + vap->va_size = VNOVAL; + break; + default: + /* + * Disallow write attempts if the filesystem is + * mounted read-only. + */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + vnode_pager_setsize(vp, vap->va_size); + if (np->n_flag & NMODIFIED) { + if (vap->va_size == 0) + error = nfs_vinvalbuf(vp, 0, + ap->a_cred, ap->a_p, 1); + else + error = nfs_vinvalbuf(vp, V_SAVE, + ap->a_cred, ap->a_p, 1); + if (error) { + vnode_pager_setsize(vp, np->n_size); + return (error); + } + } + tsize = np->n_size; + np->n_size = np->n_vattr.va_size = vap->va_size; + }; + } else if ((vap->va_mtime.tv_sec != VNOVAL || + vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && + vp->v_type == VREG && + (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, + ap->a_p, 1)) == EINTR) + return (error); + error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); + if (error && vap->va_size != VNOVAL) { + np->n_size = np->n_vattr.va_size = tsize; + vnode_pager_setsize(vp, np->n_size); + } + return (error); +} + +/* + * Do an nfs setattr rpc. + */ +static int +nfs_setattrrpc(vp, vap, cred, procp) + register struct vnode *vp; + register struct vattr *vap; + struct ucred *cred; + struct proc *procp; +{ + register struct nfsv2_sattr *sp; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + u_int32_t *tl; + int error = 0, wccflag = NFSV3_WCCRATTR; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int v3 = NFS_ISV3(vp); + + nfsstats.rpccnt[NFSPROC_SETATTR]++; + nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); + nfsm_fhtom(vp, v3); + if (v3) { + nfsm_v3attrbuild(vap, TRUE); + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = nfs_false; + } else { + nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + if (vap->va_mode == (mode_t)VNOVAL) + sp->sa_mode = nfs_xdrneg1; + else + sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); + if (vap->va_uid == (uid_t)VNOVAL) + sp->sa_uid = nfs_xdrneg1; + else + sp->sa_uid = txdr_unsigned(vap->va_uid); + if (vap->va_gid == (gid_t)VNOVAL) + sp->sa_gid = nfs_xdrneg1; + else + sp->sa_gid = txdr_unsigned(vap->va_gid); + sp->sa_size = txdr_unsigned(vap->va_size); + txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); + txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + } + nfsm_request(vp, NFSPROC_SETATTR, procp, cred); + if (v3) { + nfsm_wcc_data(vp, wccflag); + } else + nfsm_loadattr(vp, (struct vattr *)0); + nfsm_reqdone; + return (error); +} + +/* + * nfs lookup call, one step at a time... + * First look in cache + * If not found, unlock the directory nfsnode and do the rpc + */ +static int +nfs_lookup(ap) + struct vop_lookup_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + int flags = cnp->cn_flags; + struct vnode *newvp; + u_int32_t *tl; + caddr_t cp; + int32_t t1, t2; + struct nfsmount *nmp; + caddr_t bpos, dpos, cp2; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + long len; + nfsfh_t *fhp; + struct nfsnode *np; + int lockparent, wantparent, error = 0, attrflag, fhsize; + int v3 = NFS_ISV3(dvp); + struct proc *p = cnp->cn_proc; + + *vpp = NULLVP; + if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + if (dvp->v_type != VDIR) + return (ENOTDIR); + lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT|WANTPARENT); + nmp = VFSTONFS(dvp->v_mount); + np = VTONFS(dvp); + if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { + struct vattr vattr; + int vpid; + + if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) { + *vpp = NULLVP; + return (error); + } + + newvp = *vpp; + vpid = newvp->v_id; + /* + * See the comment starting `Step through' in ufs/ufs_lookup.c + * for an explanation of the locking protocol + */ + if (dvp == newvp) { + VREF(newvp); + error = 0; + } else if (flags & ISDOTDOT) { + VOP_UNLOCK(dvp, 0, p); + error = vget(newvp, LK_EXCLUSIVE, p); + if (!error && lockparent && (flags & ISLASTCN)) + error = vn_lock(dvp, LK_EXCLUSIVE, p); + } else { + error = vget(newvp, LK_EXCLUSIVE, p); + if (!lockparent || error || !(flags & ISLASTCN)) + VOP_UNLOCK(dvp, 0, p); + } + if (!error) { + if (vpid == newvp->v_id) { + if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p) + && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { + nfsstats.lookupcache_hits++; + if (cnp->cn_nameiop != LOOKUP && + (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + return (0); + } + cache_purge(newvp); + } + vput(newvp); + if (lockparent && dvp != newvp && (flags & ISLASTCN)) + VOP_UNLOCK(dvp, 0, p); + } + error = vn_lock(dvp, LK_EXCLUSIVE, p); + *vpp = NULLVP; + if (error) + return (error); + } + error = 0; + newvp = NULLVP; + nfsstats.lookupcache_misses++; + nfsstats.rpccnt[NFSPROC_LOOKUP]++; + len = cnp->cn_namelen; + nfsm_reqhead(dvp, NFSPROC_LOOKUP, + NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); + nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); + if (error) { + nfsm_postop_attr(dvp, attrflag); + m_freem(mrep); + goto nfsmout; + } + nfsm_getfh(fhp, fhsize, v3); + + /* + * Handle RENAME case... + */ + if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { + if (NFS_CMPFH(np, fhp, fhsize)) { + m_freem(mrep); + return (EISDIR); + } + error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); + if (error) { + m_freem(mrep); + return (error); + } + newvp = NFSTOV(np); + if (v3) { + nfsm_postop_attr(newvp, attrflag); + nfsm_postop_attr(dvp, attrflag); + } else + nfsm_loadattr(newvp, (struct vattr *)0); + *vpp = newvp; + m_freem(mrep); + cnp->cn_flags |= SAVENAME; + if (!lockparent) + VOP_UNLOCK(dvp, 0, p); + return (0); + } + + if (flags & ISDOTDOT) { + VOP_UNLOCK(dvp, 0, p); + error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); + if (error) { + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); + } + newvp = NFSTOV(np); + if (lockparent && (flags & ISLASTCN) && + (error = vn_lock(dvp, LK_EXCLUSIVE, p))) { + vput(newvp); + return (error); + } + } else if (NFS_CMPFH(np, fhp, fhsize)) { + VREF(dvp); + newvp = dvp; + } else { + error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); + if (error) { + m_freem(mrep); + return (error); + } + if (!lockparent || !(flags & ISLASTCN)) + VOP_UNLOCK(dvp, 0, p); + newvp = NFSTOV(np); + } + if (v3) { + nfsm_postop_attr(newvp, attrflag); + nfsm_postop_attr(dvp, attrflag); + } else + nfsm_loadattr(newvp, (struct vattr *)0); + if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + if ((cnp->cn_flags & MAKEENTRY) && + (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { + np->n_ctime = np->n_vattr.va_ctime.tv_sec; + cache_enter(dvp, newvp, cnp); + } + *vpp = newvp; + nfsm_reqdone; + if (error) { + if (newvp != NULLVP) { + vrele(newvp); + *vpp = NULLVP; + } + if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && + (flags & ISLASTCN) && error == ENOENT) { + if (!lockparent) + VOP_UNLOCK(dvp, 0, p); + if (dvp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else + error = EJUSTRETURN; + } + if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + } + return (error); +} + +/* + * nfs read call. + * Just call nfs_bioread() to do the work. + */ +static int +nfs_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + if (vp->v_type != VREG) + return (EPERM); + return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); +} + +/* + * nfs readlink call + */ +static int +nfs_readlink(ap) + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + if (vp->v_type != VLNK) + return (EINVAL); + return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); +} + +/* + * Do a readlink rpc. + * Called by nfs_doio() from below the buffer cache. + */ +int +nfs_readlinkrpc(vp, uiop, cred) + register struct vnode *vp; + struct uio *uiop; + struct ucred *cred; +{ + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + int error = 0, len, attrflag; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int v3 = NFS_ISV3(vp); + + nfsstats.rpccnt[NFSPROC_READLINK]++; + nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); + nfsm_fhtom(vp, v3); + nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred); + if (v3) + nfsm_postop_attr(vp, attrflag); + if (!error) { + nfsm_strsiz(len, NFS_MAXPATHLEN); + if (len == NFS_MAXPATHLEN) { + struct nfsnode *np = VTONFS(vp); + if (np->n_size && np->n_size < NFS_MAXPATHLEN) + len = np->n_size; + } + nfsm_mtouio(uiop, len); + } + nfsm_reqdone; + return (error); +} + +/* + * nfs read rpc call + * Ditto above + */ +int +nfs_readrpc(vp, uiop, cred) + register struct vnode *vp; + struct uio *uiop; + struct ucred *cred; +{ + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct nfsmount *nmp; + int error = 0, len, retlen, tsiz, eof, attrflag; + int v3 = NFS_ISV3(vp); + +#ifndef nolint + eof = 0; +#endif + nmp = VFSTONFS(vp->v_mount); + tsiz = uiop->uio_resid; + if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) + return (EFBIG); + while (tsiz > 0) { + nfsstats.rpccnt[NFSPROC_READ]++; + len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; + nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); + nfsm_fhtom(vp, v3); + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3); + if (v3) { + txdr_hyper(uiop->uio_offset, tl); + *(tl + 2) = txdr_unsigned(len); + } else { + *tl++ = txdr_unsigned(uiop->uio_offset); + *tl++ = txdr_unsigned(len); + *tl = 0; + } + nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred); + if (v3) { + nfsm_postop_attr(vp, attrflag); + if (error) { + m_freem(mrep); + goto nfsmout; + } + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + eof = fxdr_unsigned(int, *(tl + 1)); + } else + nfsm_loadattr(vp, (struct vattr *)0); + nfsm_strsiz(retlen, nmp->nm_rsize); + nfsm_mtouio(uiop, retlen); + m_freem(mrep); + tsiz -= retlen; + if (v3) { + if (eof || retlen == 0) + tsiz = 0; + } else if (retlen < len) + tsiz = 0; + } +nfsmout: + return (error); +} + +/* + * nfs write call + */ +int +nfs_writerpc(vp, uiop, cred, iomode, must_commit) + register struct vnode *vp; + register struct uio *uiop; + struct ucred *cred; + int *iomode, *must_commit; +{ + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2, backup; + caddr_t bpos, dpos, cp2; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; + int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; + +#ifndef DIAGNOSTIC + if (uiop->uio_iovcnt != 1) + panic("nfs: writerpc iovcnt > 1"); +#endif + *must_commit = 0; + tsiz = uiop->uio_resid; + if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) + return (EFBIG); + while (tsiz > 0) { + nfsstats.rpccnt[NFSPROC_WRITE]++; + len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; + nfsm_reqhead(vp, NFSPROC_WRITE, + NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); + nfsm_fhtom(vp, v3); + if (v3) { + nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED); + txdr_hyper(uiop->uio_offset, tl); + tl += 2; + *tl++ = txdr_unsigned(len); + *tl++ = txdr_unsigned(*iomode); + *tl = txdr_unsigned(len); + } else { + register u_int32_t x; + + nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + /* Set both "begin" and "current" to non-garbage. */ + x = txdr_unsigned((u_int32_t)uiop->uio_offset); + *tl++ = x; /* "begin offset" */ + *tl++ = x; /* "current offset" */ + x = txdr_unsigned(len); + *tl++ = x; /* total to this offset */ + *tl = x; /* size of this write */ + } + nfsm_uiotom(uiop, len); + nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred); + if (v3) { + wccflag = NFSV3_WCCCHK; + nfsm_wcc_data(vp, wccflag); + if (!error) { + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED + + NFSX_V3WRITEVERF); + rlen = fxdr_unsigned(int, *tl++); + if (rlen == 0) { + error = NFSERR_IO; + m_freem(mrep); + break; + } else if (rlen < len) { + backup = len - rlen; + uiop->uio_iov->iov_base -= backup; + uiop->uio_iov->iov_len += backup; + uiop->uio_offset -= backup; + uiop->uio_resid += backup; + len = rlen; + } + commit = fxdr_unsigned(int, *tl++); + + /* + * Return the lowest committment level + * obtained by any of the RPCs. + */ + if (committed == NFSV3WRITE_FILESYNC) + committed = commit; + else if (committed == NFSV3WRITE_DATASYNC && + commit == NFSV3WRITE_UNSTABLE) + committed = commit; + if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ + bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, + NFSX_V3WRITEVERF); + nmp->nm_state |= NFSSTA_HASWRITEVERF; + } else if (bcmp((caddr_t)tl, + (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { + *must_commit = 1; + bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, + NFSX_V3WRITEVERF); + } + } + } else + nfsm_loadattr(vp, (struct vattr *)0); + if (wccflag) + VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; + m_freem(mrep); + if (error) + break; + tsiz -= len; + } +nfsmout: + if (vp->v_mount->mnt_flag & MNT_ASYNC) + committed = NFSV3WRITE_FILESYNC; + *iomode = committed; + if (error) + uiop->uio_resid = tsiz; + return (error); +} + +/* + * nfs mknod rpc + * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the + * mode set to specify the file type and the size field for rdev. + */ +static int +nfs_mknodrpc(dvp, vpp, cnp, vap) + register struct vnode *dvp; + register struct vnode **vpp; + register struct componentname *cnp; + register struct vattr *vap; +{ + register struct nfsv2_sattr *sp; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + struct vnode *newvp = (struct vnode *)0; + struct nfsnode *np = (struct nfsnode *)0; + struct vattr vattr; + char *cp2; + caddr_t bpos, dpos; + int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + u_int32_t rdev; + int v3 = NFS_ISV3(dvp); + + if (vap->va_type == VCHR || vap->va_type == VBLK) + rdev = txdr_unsigned(vap->va_rdev); + else if (vap->va_type == VFIFO || vap->va_type == VSOCK) + rdev = nfs_xdrneg1; + else { + return (EOPNOTSUPP); + } + if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) { + return (error); + } + nfsstats.rpccnt[NFSPROC_MKNOD]++; + nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + if (v3) { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + *tl++ = vtonfsv3_type(vap->va_type); + nfsm_v3attrbuild(vap, FALSE); + if (vap->va_type == VCHR || vap->va_type == VBLK) { + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(umajor(vap->va_rdev)); + *tl = txdr_unsigned(uminor(vap->va_rdev)); + } + } else { + nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); + sp->sa_uid = nfs_xdrneg1; + sp->sa_gid = nfs_xdrneg1; + sp->sa_size = rdev; + txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); + txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + } + nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred); + if (!error) { + nfsm_mtofh(dvp, newvp, v3, gotvp); + if (!gotvp) { + if (newvp) { + vput(newvp); + newvp = (struct vnode *)0; + } + error = nfs_lookitup(dvp, cnp->cn_nameptr, + cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); + if (!error) + newvp = NFSTOV(np); + } + } + if (v3) + nfsm_wcc_data(dvp, wccflag); + nfsm_reqdone; + if (error) { + if (newvp) + vput(newvp); + } else { + if (cnp->cn_flags & MAKEENTRY) + cache_enter(dvp, newvp, cnp); + *vpp = newvp; + } + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + return (error); +} + +/* + * nfs mknod vop + * just call nfs_mknodrpc() to do the work. + */ +/* ARGSUSED */ +static int +nfs_mknod(ap) + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + return nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap); +} + +static u_long create_verf; +/* + * nfs file create call + */ +static int +nfs_create(ap) + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + register struct vnode *dvp = ap->a_dvp; + register struct vattr *vap = ap->a_vap; + register struct componentname *cnp = ap->a_cnp; + register struct nfsv2_sattr *sp; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + struct nfsnode *np = (struct nfsnode *)0; + struct vnode *newvp = (struct vnode *)0; + caddr_t bpos, dpos, cp2; + int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct vattr vattr; + int v3 = NFS_ISV3(dvp); + + /* + * Oops, not for me.. + */ + if (vap->va_type == VSOCK) + return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); + + if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) { + return (error); + } + if (vap->va_vaflags & VA_EXCLUSIVE) + fmode |= O_EXCL; +again: + nfsstats.rpccnt[NFSPROC_CREATE]++; + nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + if (v3) { + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); + if (fmode & O_EXCL) { + *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); + nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF); +#ifdef INET + if (!TAILQ_EMPTY(&in_ifaddrhead)) + *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr; + else +#endif + *tl++ = create_verf; + *tl = ++create_verf; + } else { + *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); + nfsm_v3attrbuild(vap, FALSE); + } + } else { + nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); + sp->sa_uid = nfs_xdrneg1; + sp->sa_gid = nfs_xdrneg1; + sp->sa_size = 0; + txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); + txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + } + nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); + if (!error) { + nfsm_mtofh(dvp, newvp, v3, gotvp); + if (!gotvp) { + if (newvp) { + vput(newvp); + newvp = (struct vnode *)0; + } + error = nfs_lookitup(dvp, cnp->cn_nameptr, + cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); + if (!error) + newvp = NFSTOV(np); + } + } + if (v3) + nfsm_wcc_data(dvp, wccflag); + nfsm_reqdone; + if (error) { + if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { + fmode &= ~O_EXCL; + goto again; + } + if (newvp) + vput(newvp); + } else if (v3 && (fmode & O_EXCL)) + error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc); + if (!error) { + if (cnp->cn_flags & MAKEENTRY) + cache_enter(dvp, newvp, cnp); + *ap->a_vpp = newvp; + } + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + return (error); +} + +/* + * nfs file remove call + * To try and make nfs semantics closer to ufs semantics, a file that has + * other processes using the vnode is renamed instead of removed and then + * removed later on the last close. + * - If v_usecount > 1 + * If a rename is not already in the works + * call nfs_sillyrename() to set it up + * else + * do the remove rpc + */ +static int +nfs_remove(ap) + struct vop_remove_args /* { + struct vnodeop_desc *a_desc; + struct vnode * a_dvp; + struct vnode * a_vp; + struct componentname * a_cnp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vnode *dvp = ap->a_dvp; + register struct componentname *cnp = ap->a_cnp; + register struct nfsnode *np = VTONFS(vp); + int error = 0; + struct vattr vattr; + +#ifndef DIAGNOSTIC + if ((cnp->cn_flags & HASBUF) == 0) + panic("nfs_remove: no name"); + if (vp->v_usecount < 1) + panic("nfs_remove: bad v_usecount"); +#endif + if (vp->v_type == VDIR) + error = EPERM; + else if (vp->v_usecount == 1 || (np->n_sillyrename && + VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 && + vattr.va_nlink > 1)) { + /* + * Purge the name cache so that the chance of a lookup for + * the name succeeding while the remove is in progress is + * minimized. Without node locking it can still happen, such + * that an I/O op returns ESTALE, but since you get this if + * another host removes the file.. + */ + cache_purge(vp); + /* + * throw away biocache buffers, mainly to avoid + * unnecessary delayed writes later. + */ + error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); + /* Do the rpc */ + if (error != EINTR) + error = nfs_removerpc(dvp, cnp->cn_nameptr, + cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc); + /* + * Kludge City: If the first reply to the remove rpc is lost.. + * the reply to the retransmitted request will be ENOENT + * since the file was in fact removed + * Therefore, we cheat and return success. + */ + if (error == ENOENT) + error = 0; + } else if (!np->n_sillyrename) + error = nfs_sillyrename(dvp, vp, cnp); + np->n_attrstamp = 0; + return (error); +} + +/* + * nfs file remove rpc called from nfs_inactive + */ +int +nfs_removeit(sp) + register struct sillyrename *sp; +{ + + return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, + (struct proc *)0)); +} + +/* + * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). + */ +static int +nfs_removerpc(dvp, name, namelen, cred, proc) + register struct vnode *dvp; + const char *name; + int namelen; + struct ucred *cred; + struct proc *proc; +{ + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + int error = 0, wccflag = NFSV3_WCCRATTR; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int v3 = NFS_ISV3(dvp); + + nfsstats.rpccnt[NFSPROC_REMOVE]++; + nfsm_reqhead(dvp, NFSPROC_REMOVE, + NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(name, namelen, NFS_MAXNAMLEN); + nfsm_request(dvp, NFSPROC_REMOVE, proc, cred); + if (v3) + nfsm_wcc_data(dvp, wccflag); + nfsm_reqdone; + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + return (error); +} + +/* + * nfs file rename call + */ +static int +nfs_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; +{ + register struct vnode *fvp = ap->a_fvp; + register struct vnode *tvp = ap->a_tvp; + register struct vnode *fdvp = ap->a_fdvp; + register struct vnode *tdvp = ap->a_tdvp; + register struct componentname *tcnp = ap->a_tcnp; + register struct componentname *fcnp = ap->a_fcnp; + int error; + +#ifndef DIAGNOSTIC + if ((tcnp->cn_flags & HASBUF) == 0 || + (fcnp->cn_flags & HASBUF) == 0) + panic("nfs_rename: no name"); +#endif + /* Check for cross-device rename */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out; + } + + /* + * We have to flush B_DELWRI data prior to renaming + * the file. If we don't, the delayed-write buffers + * can be flushed out later after the file has gone stale + * under NFSV3. NFSV2 does not have this problem because + * ( as far as I can tell ) it flushes dirty buffers more + * often. + */ + + VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc); + if (tvp) + VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc); + + /* + * If the tvp exists and is in use, sillyrename it before doing the + * rename of the new file over it. + * XXX Can't sillyrename a directory. + */ + if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename && + tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { + vput(tvp); + tvp = NULL; + } + + error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, + tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, + tcnp->cn_proc); + + if (fvp->v_type == VDIR) { + if (tvp != NULL && tvp->v_type == VDIR) + cache_purge(tdvp); + cache_purge(fdvp); + } + +out: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + /* + * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. + */ + if (error == ENOENT) + error = 0; + return (error); +} + +/* + * nfs file rename rpc called from nfs_remove() above + */ +static int +nfs_renameit(sdvp, scnp, sp) + struct vnode *sdvp; + struct componentname *scnp; + register struct sillyrename *sp; +{ + return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, + sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc)); +} + +/* + * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). + */ +static int +nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) + register struct vnode *fdvp; + const char *fnameptr; + int fnamelen; + register struct vnode *tdvp; + const char *tnameptr; + int tnamelen; + struct ucred *cred; + struct proc *proc; +{ + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int v3 = NFS_ISV3(fdvp); + + nfsstats.rpccnt[NFSPROC_RENAME]++; + nfsm_reqhead(fdvp, NFSPROC_RENAME, + (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + + nfsm_rndup(tnamelen)); + nfsm_fhtom(fdvp, v3); + nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); + nfsm_fhtom(tdvp, v3); + nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); + nfsm_request(fdvp, NFSPROC_RENAME, proc, cred); + if (v3) { + nfsm_wcc_data(fdvp, fwccflag); + nfsm_wcc_data(tdvp, twccflag); + } + nfsm_reqdone; + VTONFS(fdvp)->n_flag |= NMODIFIED; + VTONFS(tdvp)->n_flag |= NMODIFIED; + if (!fwccflag) + VTONFS(fdvp)->n_attrstamp = 0; + if (!twccflag) + VTONFS(tdvp)->n_attrstamp = 0; + return (error); +} + +/* + * nfs hard link create call + */ +static int +nfs_link(ap) + struct vop_link_args /* { + struct vnode *a_tdvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vnode *tdvp = ap->a_tdvp; + register struct componentname *cnp = ap->a_cnp; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int v3; + + if (vp->v_mount != tdvp->v_mount) { + return (EXDEV); + } + + /* + * Push all writes to the server, so that the attribute cache + * doesn't get "out of sync" with the server. + * XXX There should be a better way! + */ + VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc); + + v3 = NFS_ISV3(vp); + nfsstats.rpccnt[NFSPROC_LINK]++; + nfsm_reqhead(vp, NFSPROC_LINK, + NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); + nfsm_fhtom(vp, v3); + nfsm_fhtom(tdvp, v3); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred); + if (v3) { + nfsm_postop_attr(vp, attrflag); + nfsm_wcc_data(tdvp, wccflag); + } + nfsm_reqdone; + VTONFS(tdvp)->n_flag |= NMODIFIED; + if (!attrflag) + VTONFS(vp)->n_attrstamp = 0; + if (!wccflag) + VTONFS(tdvp)->n_attrstamp = 0; + /* + * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. + */ + if (error == EEXIST) + error = 0; + return (error); +} + +/* + * nfs symbolic link create call + */ +static int +nfs_symlink(ap) + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap; +{ + register struct vnode *dvp = ap->a_dvp; + register struct vattr *vap = ap->a_vap; + register struct componentname *cnp = ap->a_cnp; + register struct nfsv2_sattr *sp; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct vnode *newvp = (struct vnode *)0; + int v3 = NFS_ISV3(dvp); + + nfsstats.rpccnt[NFSPROC_SYMLINK]++; + slen = strlen(ap->a_target); + nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + if (v3) { + nfsm_v3attrbuild(vap, FALSE); + } + nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); + if (!v3) { + nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); + sp->sa_uid = nfs_xdrneg1; + sp->sa_gid = nfs_xdrneg1; + sp->sa_size = nfs_xdrneg1; + txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); + txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + } + + /* + * Issue the NFS request and get the rpc response. + * + * Only NFSv3 responses returning an error of 0 actually return + * a file handle that can be converted into newvp without having + * to do an extra lookup rpc. + */ + nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred); + if (v3) { + if (error == 0) + nfsm_mtofh(dvp, newvp, v3, gotvp); + nfsm_wcc_data(dvp, wccflag); + } + + /* + * out code jumps -> here, mrep is also freed. + */ + + nfsm_reqdone; + + /* + * If we get an EEXIST error, silently convert it to no-error + * in case of an NFS retry. + */ + if (error == EEXIST) + error = 0; + + /* + * If we do not have (or no longer have) an error, and we could + * not extract the newvp from the response due to the request being + * NFSv2 or the error being EEXIST. We have to do a lookup in order + * to obtain a newvp to return. + */ + if (error == 0 && newvp == NULL) { + struct nfsnode *np = NULL; + + error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, + cnp->cn_cred, cnp->cn_proc, &np); + if (!error) + newvp = NFSTOV(np); + } + if (error) { + if (newvp) + vput(newvp); + } else { + *ap->a_vpp = newvp; + } + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + return (error); +} + +/* + * nfs make dir call + */ +static int +nfs_mkdir(ap) + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + register struct vnode *dvp = ap->a_dvp; + register struct vattr *vap = ap->a_vap; + register struct componentname *cnp = ap->a_cnp; + register struct nfsv2_sattr *sp; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + register int len; + struct nfsnode *np = (struct nfsnode *)0; + struct vnode *newvp = (struct vnode *)0; + caddr_t bpos, dpos, cp2; + int error = 0, wccflag = NFSV3_WCCRATTR; + int gotvp = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct vattr vattr; + int v3 = NFS_ISV3(dvp); + + if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) { + return (error); + } + len = cnp->cn_namelen; + nfsstats.rpccnt[NFSPROC_MKDIR]++; + nfsm_reqhead(dvp, NFSPROC_MKDIR, + NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); + if (v3) { + nfsm_v3attrbuild(vap, FALSE); + } else { + nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); + sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); + sp->sa_uid = nfs_xdrneg1; + sp->sa_gid = nfs_xdrneg1; + sp->sa_size = nfs_xdrneg1; + txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); + txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + } + nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred); + if (!error) + nfsm_mtofh(dvp, newvp, v3, gotvp); + if (v3) + nfsm_wcc_data(dvp, wccflag); + nfsm_reqdone; + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + /* + * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry + * if we can succeed in looking up the directory. + */ + if (error == EEXIST || (!error && !gotvp)) { + if (newvp) { + vrele(newvp); + newvp = (struct vnode *)0; + } + error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, + cnp->cn_proc, &np); + if (!error) { + newvp = NFSTOV(np); + if (newvp->v_type != VDIR) + error = EEXIST; + } + } + if (error) { + if (newvp) + vrele(newvp); + } else + *ap->a_vpp = newvp; + return (error); +} + +/* + * nfs remove directory call + */ +static int +nfs_rmdir(ap) + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vnode *dvp = ap->a_dvp; + register struct componentname *cnp = ap->a_cnp; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + caddr_t bpos, dpos, cp2; + int error = 0, wccflag = NFSV3_WCCRATTR; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int v3 = NFS_ISV3(dvp); + + if (dvp == vp) + return (EINVAL); + nfsstats.rpccnt[NFSPROC_RMDIR]++; + nfsm_reqhead(dvp, NFSPROC_RMDIR, + NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred); + if (v3) + nfsm_wcc_data(dvp, wccflag); + nfsm_reqdone; + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + cache_purge(dvp); + cache_purge(vp); + /* + * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. + */ + if (error == ENOENT) + error = 0; + return (error); +} + +/* + * nfs readdir call + */ +static int +nfs_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register struct uio *uio = ap->a_uio; + int tresid, error; + struct vattr vattr; + + if (vp->v_type != VDIR) + return (EPERM); + /* + * First, check for hit on the EOF offset cache + */ + if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && + (np->n_flag & NMODIFIED) == 0) { + if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { + if (NQNFS_CKCACHABLE(vp, ND_READ)) { + nfsstats.direofcache_hits++; + return (0); + } + } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 && + np->n_mtime == vattr.va_mtime.tv_sec) { + nfsstats.direofcache_hits++; + return (0); + } + } + + /* + * Call nfs_bioread() to do the real work. + */ + tresid = uio->uio_resid; + error = nfs_bioread(vp, uio, 0, ap->a_cred); + + if (!error && uio->uio_resid == tresid) + nfsstats.direofcache_misses++; + return (error); +} + +/* + * Readdir rpc call. + * Called from below the buffer cache by nfs_doio(). + */ +int +nfs_readdirrpc(vp, uiop, cred) + struct vnode *vp; + register struct uio *uiop; + struct ucred *cred; + +{ + register int len, left; + register struct dirent *dp = NULL; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + register nfsuint64 *cookiep; + caddr_t bpos, dpos, cp2; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + nfsuint64 cookie; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + struct nfsnode *dnp = VTONFS(vp); + u_quad_t fileno; + int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; + int attrflag; + int v3 = NFS_ISV3(vp); + +#ifndef DIAGNOSTIC + if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || + (uiop->uio_resid & (DIRBLKSIZ - 1))) + panic("nfs readdirrpc bad uio"); +#endif + + /* + * If there is no cookie, assume directory was stale. + */ + cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); + if (cookiep) + cookie = *cookiep; + else + return (NFSERR_BAD_COOKIE); + /* + * Loop around doing readdir rpc's of size nm_readdirsize + * truncated to a multiple of DIRBLKSIZ. + * The stopping criteria is EOF or buffer full. + */ + while (more_dirs && bigenough) { + nfsstats.rpccnt[NFSPROC_READDIR]++; + nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + + NFSX_READDIR(v3)); + nfsm_fhtom(vp, v3); + if (v3) { + nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED); + *tl++ = cookie.nfsuquad[0]; + *tl++ = cookie.nfsuquad[1]; + *tl++ = dnp->n_cookieverf.nfsuquad[0]; + *tl++ = dnp->n_cookieverf.nfsuquad[1]; + } else { + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = cookie.nfsuquad[0]; + } + *tl = txdr_unsigned(nmp->nm_readdirsize); + nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred); + if (v3) { + nfsm_postop_attr(vp, attrflag); + if (!error) { + nfsm_dissect(tl, u_int32_t *, + 2 * NFSX_UNSIGNED); + dnp->n_cookieverf.nfsuquad[0] = *tl++; + dnp->n_cookieverf.nfsuquad[1] = *tl; + } else { + m_freem(mrep); + goto nfsmout; + } + } + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + more_dirs = fxdr_unsigned(int, *tl); + + /* loop thru the dir entries, doctoring them to 4bsd form */ + while (more_dirs && bigenough) { + if (v3) { + nfsm_dissect(tl, u_int32_t *, + 3 * NFSX_UNSIGNED); + fileno = fxdr_hyper(tl); + len = fxdr_unsigned(int, *(tl + 2)); + } else { + nfsm_dissect(tl, u_int32_t *, + 2 * NFSX_UNSIGNED); + fileno = fxdr_unsigned(u_quad_t, *tl++); + len = fxdr_unsigned(int, *tl); + } + if (len <= 0 || len > NFS_MAXNAMLEN) { + error = EBADRPC; + m_freem(mrep); + goto nfsmout; + } + tlen = nfsm_rndup(len); + if (tlen == len) + tlen += 4; /* To ensure null termination */ + left = DIRBLKSIZ - blksiz; + if ((tlen + DIRHDSIZ) > left) { + dp->d_reclen += left; + uiop->uio_iov->iov_base += left; + uiop->uio_iov->iov_len -= left; + uiop->uio_offset += left; + uiop->uio_resid -= left; + blksiz = 0; + } + if ((tlen + DIRHDSIZ) > uiop->uio_resid) + bigenough = 0; + if (bigenough) { + dp = (struct dirent *)uiop->uio_iov->iov_base; + dp->d_fileno = (int)fileno; + dp->d_namlen = len; + dp->d_reclen = tlen + DIRHDSIZ; + dp->d_type = DT_UNKNOWN; + blksiz += dp->d_reclen; + if (blksiz == DIRBLKSIZ) + blksiz = 0; + uiop->uio_offset += DIRHDSIZ; + uiop->uio_resid -= DIRHDSIZ; + uiop->uio_iov->iov_base += DIRHDSIZ; + uiop->uio_iov->iov_len -= DIRHDSIZ; + nfsm_mtouio(uiop, len); + cp = uiop->uio_iov->iov_base; + tlen -= len; + *cp = '\0'; /* null terminate */ + uiop->uio_iov->iov_base += tlen; + uiop->uio_iov->iov_len -= tlen; + uiop->uio_offset += tlen; + uiop->uio_resid -= tlen; + } else + nfsm_adv(nfsm_rndup(len)); + if (v3) { + nfsm_dissect(tl, u_int32_t *, + 3 * NFSX_UNSIGNED); + } else { + nfsm_dissect(tl, u_int32_t *, + 2 * NFSX_UNSIGNED); + } + if (bigenough) { + cookie.nfsuquad[0] = *tl++; + if (v3) + cookie.nfsuquad[1] = *tl++; + } else if (v3) + tl += 2; + else + tl++; + more_dirs = fxdr_unsigned(int, *tl); + } + /* + * If at end of rpc data, get the eof boolean + */ + if (!more_dirs) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + more_dirs = (fxdr_unsigned(int, *tl) == 0); + } + m_freem(mrep); + } + /* + * Fill last record, iff any, out to a multiple of DIRBLKSIZ + * by increasing d_reclen for the last record. + */ + if (blksiz > 0) { + left = DIRBLKSIZ - blksiz; + dp->d_reclen += left; + uiop->uio_iov->iov_base += left; + uiop->uio_iov->iov_len -= left; + uiop->uio_offset += left; + uiop->uio_resid -= left; + } + + /* + * We are now either at the end of the directory or have filled the + * block. + */ + if (bigenough) + dnp->n_direofoffset = uiop->uio_offset; + else { + if (uiop->uio_resid > 0) + printf("EEK! readdirrpc resid > 0\n"); + cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); + *cookiep = cookie; + } +nfsmout: + return (error); +} + +/* + * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). + */ +int +nfs_readdirplusrpc(vp, uiop, cred) + struct vnode *vp; + register struct uio *uiop; + struct ucred *cred; +{ + register int len, left; + register struct dirent *dp; + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + register struct vnode *newvp; + register nfsuint64 *cookiep; + caddr_t bpos, dpos, cp2, dpossav1, dpossav2; + struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2; + struct nameidata nami, *ndp = &nami; + struct componentname *cnp = &ndp->ni_cnd; + nfsuint64 cookie; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + struct nfsnode *dnp = VTONFS(vp), *np; + nfsfh_t *fhp; + u_quad_t fileno; + int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; + int attrflag, fhsize; + +#ifndef nolint + dp = (struct dirent *)0; +#endif +#ifndef DIAGNOSTIC + if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || + (uiop->uio_resid & (DIRBLKSIZ - 1))) + panic("nfs readdirplusrpc bad uio"); +#endif + ndp->ni_dvp = vp; + newvp = NULLVP; + + /* + * If there is no cookie, assume directory was stale. + */ + cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); + if (cookiep) + cookie = *cookiep; + else + return (NFSERR_BAD_COOKIE); + /* + * Loop around doing readdir rpc's of size nm_readdirsize + * truncated to a multiple of DIRBLKSIZ. + * The stopping criteria is EOF or buffer full. + */ + while (more_dirs && bigenough) { + nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; + nfsm_reqhead(vp, NFSPROC_READDIRPLUS, + NFSX_FH(1) + 6 * NFSX_UNSIGNED); + nfsm_fhtom(vp, 1); + nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED); + *tl++ = cookie.nfsuquad[0]; + *tl++ = cookie.nfsuquad[1]; + *tl++ = dnp->n_cookieverf.nfsuquad[0]; + *tl++ = dnp->n_cookieverf.nfsuquad[1]; + *tl++ = txdr_unsigned(nmp->nm_readdirsize); + *tl = txdr_unsigned(nmp->nm_rsize); + nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred); + nfsm_postop_attr(vp, attrflag); + if (error) { + m_freem(mrep); + goto nfsmout; + } + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + dnp->n_cookieverf.nfsuquad[0] = *tl++; + dnp->n_cookieverf.nfsuquad[1] = *tl++; + more_dirs = fxdr_unsigned(int, *tl); + + /* loop thru the dir entries, doctoring them to 4bsd form */ + while (more_dirs && bigenough) { + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + fileno = fxdr_hyper(tl); + len = fxdr_unsigned(int, *(tl + 2)); + if (len <= 0 || len > NFS_MAXNAMLEN) { + error = EBADRPC; + m_freem(mrep); + goto nfsmout; + } + tlen = nfsm_rndup(len); + if (tlen == len) + tlen += 4; /* To ensure null termination*/ + left = DIRBLKSIZ - blksiz; + if ((tlen + DIRHDSIZ) > left) { + dp->d_reclen += left; + uiop->uio_iov->iov_base += left; + uiop->uio_iov->iov_len -= left; + uiop->uio_offset += left; + uiop->uio_resid -= left; + blksiz = 0; + } + if ((tlen + DIRHDSIZ) > uiop->uio_resid) + bigenough = 0; + if (bigenough) { + dp = (struct dirent *)uiop->uio_iov->iov_base; + dp->d_fileno = (int)fileno; + dp->d_namlen = len; + dp->d_reclen = tlen + DIRHDSIZ; + dp->d_type = DT_UNKNOWN; + blksiz += dp->d_reclen; + if (blksiz == DIRBLKSIZ) + blksiz = 0; + uiop->uio_offset += DIRHDSIZ; + uiop->uio_resid -= DIRHDSIZ; + uiop->uio_iov->iov_base += DIRHDSIZ; + uiop->uio_iov->iov_len -= DIRHDSIZ; + cnp->cn_nameptr = uiop->uio_iov->iov_base; + cnp->cn_namelen = len; + nfsm_mtouio(uiop, len); + cp = uiop->uio_iov->iov_base; + tlen -= len; + *cp = '\0'; + uiop->uio_iov->iov_base += tlen; + uiop->uio_iov->iov_len -= tlen; + uiop->uio_offset += tlen; + uiop->uio_resid -= tlen; + } else + nfsm_adv(nfsm_rndup(len)); + nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + if (bigenough) { + cookie.nfsuquad[0] = *tl++; + cookie.nfsuquad[1] = *tl++; + } else + tl += 2; + + /* + * Since the attributes are before the file handle + * (sigh), we must skip over the attributes and then + * come back and get them. + */ + attrflag = fxdr_unsigned(int, *tl); + if (attrflag) { + dpossav1 = dpos; + mdsav1 = md; + nfsm_adv(NFSX_V3FATTR); + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + doit = fxdr_unsigned(int, *tl); + if (doit) { + nfsm_getfh(fhp, fhsize, 1); + if (NFS_CMPFH(dnp, fhp, fhsize)) { + VREF(vp); + newvp = vp; + np = dnp; + } else { + error = nfs_nget(vp->v_mount, fhp, + fhsize, &np); + if (error) + doit = 0; + else + newvp = NFSTOV(np); + } + } + if (doit && bigenough) { + dpossav2 = dpos; + dpos = dpossav1; + mdsav2 = md; + md = mdsav1; + nfsm_loadattr(newvp, (struct vattr *)0); + dpos = dpossav2; + md = mdsav2; + dp->d_type = + IFTODT(VTTOIF(np->n_vattr.va_type)); + ndp->ni_vp = newvp; + cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); + } + } else { + /* Just skip over the file handle */ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + nfsm_adv(nfsm_rndup(i)); + } + if (newvp != NULLVP) { + if (newvp == vp) + vrele(newvp); + else + vput(newvp); + newvp = NULLVP; + } + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + more_dirs = fxdr_unsigned(int, *tl); + } + /* + * If at end of rpc data, get the eof boolean + */ + if (!more_dirs) { + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); + more_dirs = (fxdr_unsigned(int, *tl) == 0); + } + m_freem(mrep); + } + /* + * Fill last record, iff any, out to a multiple of DIRBLKSIZ + * by increasing d_reclen for the last record. + */ + if (blksiz > 0) { + left = DIRBLKSIZ - blksiz; + dp->d_reclen += left; + uiop->uio_iov->iov_base += left; + uiop->uio_iov->iov_len -= left; + uiop->uio_offset += left; + uiop->uio_resid -= left; + } + + /* + * We are now either at the end of the directory or have filled the + * block. + */ + if (bigenough) + dnp->n_direofoffset = uiop->uio_offset; + else { + if (uiop->uio_resid > 0) + printf("EEK! readdirplusrpc resid > 0\n"); + cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); + *cookiep = cookie; + } +nfsmout: + if (newvp != NULLVP) { + if (newvp == vp) + vrele(newvp); + else + vput(newvp); + newvp = NULLVP; + } + return (error); +} + +/* + * Silly rename. To make the NFS filesystem that is stateless look a little + * more like the "ufs" a remove of an active vnode is translated to a rename + * to a funny looking filename that is removed by nfs_inactive on the + * nfsnode. There is the potential for another process on a different client + * to create the same funny name between the nfs_lookitup() fails and the + * nfs_rename() completes, but... + */ +static int +nfs_sillyrename(dvp, vp, cnp) + struct vnode *dvp, *vp; + struct componentname *cnp; +{ + register struct sillyrename *sp; + struct nfsnode *np; + int error; + short pid; + + cache_purge(dvp); + np = VTONFS(vp); +#ifndef DIAGNOSTIC + if (vp->v_type == VDIR) + panic("nfs: sillyrename dir"); +#endif + MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), + M_NFSREQ, M_WAITOK); + sp->s_cred = crdup(cnp->cn_cred); + sp->s_dvp = dvp; + VREF(dvp); + + /* Fudge together a funny name */ + pid = cnp->cn_proc->p_pid; + sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid); + + /* Try lookitups until we get one that isn't there */ + while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, + cnp->cn_proc, (struct nfsnode **)0) == 0) { + sp->s_name[4]++; + if (sp->s_name[4] > 'z') { + error = EINVAL; + goto bad; + } + } + error = nfs_renameit(dvp, cnp, sp); + if (error) + goto bad; + error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, + cnp->cn_proc, &np); + np->n_sillyrename = sp; + return (0); +bad: + vrele(sp->s_dvp); + crfree(sp->s_cred); + free((caddr_t)sp, M_NFSREQ); + return (error); +} + +/* + * Look up a file name and optionally either update the file handle or + * allocate an nfsnode, depending on the value of npp. + * npp == NULL --> just do the lookup + * *npp == NULL --> allocate a new nfsnode and make sure attributes are + * handled too + * *npp != NULL --> update the file handle in the vnode + */ +static int +nfs_lookitup(dvp, name, len, cred, procp, npp) + register struct vnode *dvp; + const char *name; + int len; + struct ucred *cred; + struct proc *procp; + struct nfsnode **npp; +{ + register u_int32_t *tl; + register caddr_t cp; + register int32_t t1, t2; + struct vnode *newvp = (struct vnode *)0; + struct nfsnode *np, *dnp = VTONFS(dvp); + caddr_t bpos, dpos, cp2; + int error = 0, fhlen, attrflag; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + nfsfh_t *nfhp; + int v3 = NFS_ISV3(dvp); + + nfsstats.rpccnt[NFSPROC_LOOKUP]++; + nfsm_reqhead(dvp, NFSPROC_LOOKUP, + NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); + nfsm_fhtom(dvp, v3); + nfsm_strtom(name, len, NFS_MAXNAMLEN); + nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred); + if (npp && !error) { + nfsm_getfh(nfhp, fhlen, v3); + if (*npp) { + np = *npp; + if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { + free((caddr_t)np->n_fhp, M_NFSBIGFH); + np->n_fhp = &np->n_fh; + } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) + np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK); + bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); + np->n_fhsize = fhlen; + newvp = NFSTOV(np); + } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { + VREF(dvp); + newvp = dvp; + } else { + error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); + if (error) { + m_freem(mrep); + return (error); + } + newvp = NFSTOV(np); + } + if (v3) { + nfsm_postop_attr(newvp, attrflag); + if (!attrflag && *npp == NULL) { + m_freem(mrep); + if (newvp == dvp) + vrele(newvp); + else + vput(newvp); + return (ENOENT); + } + } else + nfsm_loadattr(newvp, (struct vattr *)0); + } + nfsm_reqdone; + if (npp && *npp == NULL) { + if (error) { + if (newvp) { + if (newvp == dvp) + vrele(newvp); + else + vput(newvp); + } + } else + *npp = np; + } + return (error); +} + +/* + * Nfs Version 3 commit rpc + */ +int +nfs_commit(vp, offset, cnt, cred, procp) + struct vnode *vp; + u_quad_t offset; + int cnt; + struct ucred *cred; + struct proc *procp; +{ + register caddr_t cp; + register u_int32_t *tl; + register int32_t t1, t2; + register struct nfsmount *nmp = VFSTONFS(vp->v_mount); + caddr_t bpos, dpos, cp2; + int error = 0, wccflag = NFSV3_WCCRATTR; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) + return (0); + nfsstats.rpccnt[NFSPROC_COMMIT]++; + nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); + nfsm_fhtom(vp, 1); + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + *tl = txdr_unsigned(cnt); + nfsm_request(vp, NFSPROC_COMMIT, procp, cred); + nfsm_wcc_data(vp, wccflag); + if (!error) { + nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF); + if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, + NFSX_V3WRITEVERF)) { + bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, + NFSX_V3WRITEVERF); + error = NFSERR_STALEWRITEVERF; + } + } + nfsm_reqdone; + return (error); +} + +/* + * Kludge City.. + * - make nfs_bmap() essentially a no-op that does no translation + * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc + * (Maybe I could use the process's page mapping, but I was concerned that + * Kernel Write might not be enabled and also figured copyout() would do + * a lot more work than bcopy() and also it currently happens in the + * context of the swapper process (2). + */ +static int +nfs_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + if (ap->a_vpp != NULL) + *ap->a_vpp = vp; + if (ap->a_bnp != NULL) + *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize); + if (ap->a_runp != NULL) + *ap->a_runp = 0; + if (ap->a_runb != NULL) + *ap->a_runb = 0; + return (0); +} + +/* + * Strategy routine. + * For async requests when nfsiod(s) are running, queue the request by + * calling nfs_asyncio(), otherwise just all nfs_doio() to do the + * request. + */ +static int +nfs_strategy(ap) + struct vop_strategy_args *ap; +{ + register struct buf *bp = ap->a_bp; + struct ucred *cr; + struct proc *p; + int error = 0; + + KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); + KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp)); + + if (bp->b_flags & B_PHYS) + panic("nfs physio"); + + if (bp->b_flags & B_ASYNC) + p = (struct proc *)0; + else + p = curproc; /* XXX */ + + if (bp->b_flags & B_READ) + cr = bp->b_rcred; + else + cr = bp->b_wcred; + + /* + * If the op is asynchronous and an i/o daemon is waiting + * queue the request, wake it up and wait for completion + * otherwise just do it ourselves. + */ + if ((bp->b_flags & B_ASYNC) == 0 || + nfs_asyncio(bp, NOCRED, p)) + error = nfs_doio(bp, cr, p); + return (error); +} + +/* + * Mmap a file + * + * NB Currently unsupported. + */ +/* ARGSUSED */ +static int +nfs_mmap(ap) + struct vop_mmap_args /* { + struct vnode *a_vp; + int a_fflags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + + return (EINVAL); +} + +/* + * fsync vnode op. Just call nfs_flush() with commit == 1. + */ +/* ARGSUSED */ +static int +nfs_fsync(ap) + struct vop_fsync_args /* { + struct vnodeop_desc *a_desc; + struct vnode * a_vp; + struct ucred * a_cred; + int a_waitfor; + struct proc * a_p; + } */ *ap; +{ + + return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1)); +} + +/* + * Flush all the blocks associated with a vnode. + * Walk through the buffer pool and push any dirty pages + * associated with the vnode. + */ +static int +nfs_flush(vp, cred, waitfor, p, commit) + register struct vnode *vp; + struct ucred *cred; + int waitfor; + struct proc *p; + int commit; +{ + register struct nfsnode *np = VTONFS(vp); + register struct buf *bp; + register int i; + struct buf *nbp; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; + int passone = 1; + u_quad_t off, endoff, toff; + struct ucred* wcred = NULL; + struct buf **bvec = NULL; +#ifndef NFS_COMMITBVECSIZ +#define NFS_COMMITBVECSIZ 20 +#endif + struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; + int bvecsize = 0, bveccount; + + if (nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; + if (!commit) + passone = 0; + /* + * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the + * server, but nas not been committed to stable storage on the server + * yet. On the first pass, the byte range is worked out and the commit + * rpc is done. On the second pass, nfs_writebp() is called to do the + * job. + */ +again: + off = (u_quad_t)-1; + endoff = 0; + bvecpos = 0; + if (NFS_ISV3(vp) && commit) { + s = splbio(); + /* + * Count up how many buffers waiting for a commit. + */ + bveccount = 0; + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if (BUF_REFCNT(bp) == 0 && + (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) + == (B_DELWRI | B_NEEDCOMMIT)) + bveccount++; + } + /* + * Allocate space to remember the list of bufs to commit. It is + * important to use M_NOWAIT here to avoid a race with nfs_write. + * If we can't get memory (for whatever reason), we will end up + * committing the buffers one-by-one in the loop below. + */ + if (bveccount > NFS_COMMITBVECSIZ) { + if (bvec != NULL && bvec != bvec_on_stack) + free(bvec, M_TEMP); + bvec = (struct buf **) + malloc(bveccount * sizeof(struct buf *), + M_TEMP, M_NOWAIT); + if (bvec == NULL) { + bvec = bvec_on_stack; + bvecsize = NFS_COMMITBVECSIZ; + } else + bvecsize = bveccount; + } else { + bvec = bvec_on_stack; + bvecsize = NFS_COMMITBVECSIZ; + } + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if (bvecpos >= bvecsize) + break; + if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != + (B_DELWRI | B_NEEDCOMMIT) || + BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) + continue; + bremfree(bp); + /* + * Work out if all buffers are using the same cred + * so we can deal with them all with one commit. + * + * NOTE: we are not clearing B_DONE here, so we have + * to do it later on in this routine if we intend to + * initiate I/O on the bp. + */ + if (wcred == NULL) + wcred = bp->b_wcred; + else if (wcred != bp->b_wcred) + wcred = NOCRED; + bp->b_flags |= B_WRITEINPROG; + vfs_busy_pages(bp, 1); + + /* + * bp is protected by being locked, but nbp is not + * and vfs_busy_pages() may sleep. We have to + * recalculate nbp. + */ + nbp = TAILQ_NEXT(bp, b_vnbufs); + + /* + * A list of these buffers is kept so that the + * second loop knows which buffers have actually + * been committed. This is necessary, since there + * may be a race between the commit rpc and new + * uncommitted writes on the file. + */ + bvec[bvecpos++] = bp; + toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + + bp->b_dirtyoff; + if (toff < off) + off = toff; + toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); + if (toff > endoff) + endoff = toff; + } + splx(s); + } + if (bvecpos > 0) { + /* + * Commit data on the server, as required. + * If all bufs are using the same wcred, then use that with + * one call for all of them, otherwise commit each one + * separately. + */ + if (wcred != NOCRED) + retv = nfs_commit(vp, off, (int)(endoff - off), + wcred, p); + else { + retv = 0; + for (i = 0; i < bvecpos; i++) { + off_t off, size; + bp = bvec[i]; + off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + + bp->b_dirtyoff; + size = (u_quad_t)(bp->b_dirtyend + - bp->b_dirtyoff); + retv = nfs_commit(vp, off, (int)size, + bp->b_wcred, p); + if (retv) break; + } + } + + if (retv == NFSERR_STALEWRITEVERF) + nfs_clearcommit(vp->v_mount); + + /* + * Now, either mark the blocks I/O done or mark the + * blocks dirty, depending on whether the commit + * succeeded. + */ + for (i = 0; i < bvecpos; i++) { + bp = bvec[i]; + bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK); + if (retv) { + /* + * Error, leave B_DELWRI intact + */ + vfs_unbusy_pages(bp); + brelse(bp); + } else { + /* + * Success, remove B_DELWRI ( bundirty() ). + * + * b_dirtyoff/b_dirtyend seem to be NFS + * specific. We should probably move that + * into bundirty(). XXX + */ + s = splbio(); + vp->v_numoutput++; + bp->b_flags |= B_ASYNC; + bundirty(bp); + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR); + bp->b_dirtyoff = bp->b_dirtyend = 0; + splx(s); + biodone(bp); + } + } + } + + /* + * Start/do any write(s) that are required. + */ +loop: + s = splbio(); + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { + if (waitfor != MNT_WAIT || passone) + continue; + error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL, + "nfsfsync", slpflag, slptimeo); + splx(s); + if (error == 0) + panic("nfs_fsync: inconsistent lock"); + if (error == ENOLCK) + goto loop; + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { + error = EINTR; + goto done; + } + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + goto loop; + } + if ((bp->b_flags & B_DELWRI) == 0) + panic("nfs_fsync: not dirty"); + if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { + BUF_UNLOCK(bp); + continue; + } + bremfree(bp); + if (passone || !commit) + bp->b_flags |= B_ASYNC; + else + bp->b_flags |= B_ASYNC | B_WRITEINPROG; + splx(s); + VOP_BWRITE(bp->b_vp, bp); + goto loop; + } + splx(s); + if (passone) { + passone = 0; + goto again; + } + if (waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + error = tsleep((caddr_t)&vp->v_numoutput, + slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); + if (error) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { + error = EINTR; + goto done; + } + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + } + if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) { + goto loop; + } + } + if (np->n_flag & NWRITEERR) { + error = np->n_error; + np->n_flag &= ~NWRITEERR; + } +done: + if (bvec != NULL && bvec != bvec_on_stack) + free(bvec, M_TEMP); + return (error); +} + +/* + * NFS advisory byte-level locks. + * Currently unsupported. + */ +static int +nfs_advlock(ap) + struct vop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * The following kludge is to allow diskless support to work + * until a real NFS lockd is implemented. Basically, just pretend + * that this is a local lock. + */ + return (lf_advlock(ap, &(np->n_lockf), np->n_size)); +} + +/* + * Print out the contents of an nfsnode. + */ +static int +nfs_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + + printf("tag VT_NFS, fileid %ld fsid 0x%x", + np->n_vattr.va_fileid, np->n_vattr.va_fsid); + if (vp->v_type == VFIFO) + fifo_printinfo(vp); + printf("\n"); + return (0); +} + +/* + * Just call nfs_writebp() with the force argument set to 1. + * + * NOTE: B_DONE may or may not be set in a_bp on call. + */ +static int +nfs_bwrite(ap) + struct vop_bwrite_args /* { + struct vnode *a_bp; + } */ *ap; +{ + return (nfs_writebp(ap->a_bp, 1, curproc)); +} + +/* + * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless + * the force flag is one and it also handles the B_NEEDCOMMIT flag. We set + * B_CACHE if this is a VMIO buffer. + */ +int +nfs_writebp(bp, force, procp) + register struct buf *bp; + int force; + struct proc *procp; +{ + int s; + int oldflags = bp->b_flags; +#if 0 + int retv = 1; + off_t off; +#endif + + if (BUF_REFCNT(bp) == 0) + panic("bwrite: buffer is not locked???"); + + if (bp->b_flags & B_INVAL) { + brelse(bp); + return(0); + } + + bp->b_flags |= B_CACHE; + + /* + * Undirty the bp. We will redirty it later if the I/O fails. + */ + + s = splbio(); + bundirty(bp); + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR); + + bp->b_vp->v_numoutput++; + curproc->p_stats->p_ru.ru_oublock++; + splx(s); + + vfs_busy_pages(bp, 1); + if (force) + bp->b_flags |= B_WRITEINPROG; + BUF_KERNPROC(bp); + VOP_STRATEGY(bp->b_vp, bp); + + if( (oldflags & B_ASYNC) == 0) { + int rtval = biowait(bp); + + if (oldflags & B_DELWRI) { + s = splbio(); + reassignbuf(bp, bp->b_vp); + splx(s); + } + + brelse(bp); + return (rtval); + } + + return (0); +} + +/* + * nfs special file access vnode op. + * Essentially just get vattr and then imitate iaccess() since the device is + * local to the client. + */ +static int +nfsspec_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vattr *vap; + register gid_t *gp; + register struct ucred *cred = ap->a_cred; + struct vnode *vp = ap->a_vp; + mode_t mode = ap->a_mode; + struct vattr vattr; + register int i; + int error; + + /* + * Disallow write attempts on filesystems mounted read-only; + * unless the file is a socket, fifo, or a block or character + * device resident on the filesystem. + */ + if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } + /* + * If you're the super-user, + * you always get access. + */ + if (cred->cr_uid == 0) + return (0); + vap = &vattr; + error = VOP_GETATTR(vp, vap, cred, ap->a_p); + if (error) + return (error); + /* + * Access check is based on only one of owner, group, public. + * If not owner, then check group. If not a member of the + * group, then check public access. + */ + if (cred->cr_uid != vap->va_uid) { + mode >>= 3; + gp = cred->cr_groups; + for (i = 0; i < cred->cr_ngroups; i++, gp++) + if (vap->va_gid == *gp) + goto found; + mode >>= 3; +found: + ; + } + error = (vap->va_mode & mode) == mode ? 0 : EACCES; + return (error); +} + +/* + * Read wrapper for special devices. + */ +static int +nfsspec_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set access flag. + */ + np->n_flag |= NACC; + getnanotime(&np->n_atim); + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); +} + +/* + * Write wrapper for special devices. + */ +static int +nfsspec_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set update flag. + */ + np->n_flag |= NUPD; + getnanotime(&np->n_mtim); + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); +} + +/* + * Close wrapper for special devices. + * + * Update the times on the nfsnode then do device close. + */ +static int +nfsspec_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + struct vattr vattr; + + if (np->n_flag & (NACC | NUPD)) { + np->n_flag |= NCHG; + if (vp->v_usecount == 1 && + (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + VATTR_NULL(&vattr); + if (np->n_flag & NACC) + vattr.va_atime = np->n_atim; + if (np->n_flag & NUPD) + vattr.va_mtime = np->n_mtim; + (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); + } + } + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); +} + +/* + * Read wrapper for fifos. + */ +static int +nfsfifo_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set access flag. + */ + np->n_flag |= NACC; + getnanotime(&np->n_atim); + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); +} + +/* + * Write wrapper for fifos. + */ +static int +nfsfifo_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set update flag. + */ + np->n_flag |= NUPD; + getnanotime(&np->n_mtim); + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); +} + +/* + * Close wrapper for fifos. + * + * Update the times on the nfsnode then do fifo close. + */ +static int +nfsfifo_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + struct vattr vattr; + struct timespec ts; + + if (np->n_flag & (NACC | NUPD)) { + getnanotime(&ts); + if (np->n_flag & NACC) + np->n_atim = ts; + if (np->n_flag & NUPD) + np->n_mtim = ts; + np->n_flag |= NCHG; + if (vp->v_usecount == 1 && + (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + VATTR_NULL(&vattr); + if (np->n_flag & NACC) + vattr.va_atime = np->n_atim; + if (np->n_flag & NUPD) + vattr.va_mtime = np->n_mtim; + (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); + } + } + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); +} diff --git a/sys/nfs/nfsdiskless.h b/sys/nfs/nfsdiskless.h new file mode 100644 index 0000000..487e0bf --- /dev/null +++ b/sys/nfs/nfsdiskless.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsdiskless.h 8.2 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NFSDISKLESS_H_ +#define _NFS_NFSDISKLESS_H_ + +/* + * Structure that must be initialized for a diskless nfs client. + * This structure is used by nfs_mountroot() to set up the root vnode, + * and to do a partial ifconfig(8) and route(8) so that the critical net + * interface can communicate with the server. + * The primary bootstrap is expected to fill in the appropriate fields before + * starting the kernel. Whether or not the swap area is nfs mounted is + * determined by the value in swdevt[0]. (equal to NODEV --> swap over nfs) + * Currently only works for AF_INET protocols. + * NB: All fields are stored in net byte order to avoid hassles with + * client/server byte ordering differences. + */ + +/* + * I have defined a new structure that can handle an NFS Version 3 file handle + * but the kernel still expects the old Version 2 one to be provided. The + * changes required in nfs_vfsops.c for using the new are documented there in + * comments. (I felt that breaking network booting code by changing this + * structure would not be prudent at this time, since almost all servers are + * still Version 2 anyhow.) + */ +struct nfsv3_diskless { + struct ifaliasreq myif; /* Default interface */ + struct sockaddr_in mygateway; /* Default gateway */ + struct nfs_args swap_args; /* Mount args for swap file */ + int swap_fhsize; /* Size of file handle */ + u_char swap_fh[NFSX_V3FHMAX]; /* Swap file's file handle */ + struct sockaddr_in swap_saddr; /* Address of swap server */ + char swap_hostnam[MNAMELEN]; /* Host name for mount pt */ + int swap_nblks; /* Size of server swap file */ + struct ucred swap_ucred; /* Swap credentials */ + struct nfs_args root_args; /* Mount args for root fs */ + int root_fhsize; /* Size of root file handle */ + u_char root_fh[NFSX_V3FHMAX]; /* File handle of root dir */ + struct sockaddr_in root_saddr; /* Address of root server */ + char root_hostnam[MNAMELEN]; /* Host name for mount pt */ + long root_time; /* Timestamp of root fs */ + char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */ +}; + +/* + * Old arguments to mount NFS + */ +struct onfs_args { + struct sockaddr *addr; /* file server address */ + int addrlen; /* length of address */ + int sotype; /* Socket type */ + int proto; /* and Protocol */ + u_char *fh; /* File handle to be mounted */ + int fhsize; /* Size, in bytes, of fh */ + int flags; /* flags */ + int wsize; /* write size in bytes */ + int rsize; /* read size in bytes */ + int readdirsize; /* readdir size in bytes */ + int timeo; /* initial timeout in .1 secs */ + int retrans; /* times to retry send */ + int maxgrouplist; /* Max. size of group list */ + int readahead; /* # of blocks to readahead */ + int leaseterm; /* Term (sec) of lease */ + int deadthresh; /* Retrans threshold */ + char *hostname; /* server's name */ +}; + +struct nfs_diskless { + struct ifaliasreq myif; /* Default interface */ + struct sockaddr_in mygateway; /* Default gateway */ + struct onfs_args swap_args; /* Mount args for swap file */ + u_char swap_fh[NFSX_V2FH]; /* Swap file's file handle */ + struct sockaddr_in swap_saddr; /* Address of swap server */ + char swap_hostnam[MNAMELEN]; /* Host name for mount pt */ + int swap_nblks; /* Size of server swap file */ + struct ucred swap_ucred; /* Swap credentials */ + struct onfs_args root_args; /* Mount args for root fs */ + u_char root_fh[NFSX_V2FH]; /* File handle of root dir */ + struct sockaddr_in root_saddr; /* Address of root server */ + char root_hostnam[MNAMELEN]; /* Host name for mount pt */ + long root_time; /* Timestamp of root fs */ + char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */ +}; + +#endif diff --git a/sys/nfs/nfsm_subs.h b/sys/nfs/nfsm_subs.h new file mode 100644 index 0000000..b13c009 --- /dev/null +++ b/sys/nfs/nfsm_subs.h @@ -0,0 +1,564 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsm_subs.h 8.2 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NFSM_SUBS_H_ +#define _NFS_NFSM_SUBS_H_ + +struct ucred; +struct vnode; + +/* + * These macros do strange and peculiar things to mbuf chains for + * the assistance of the nfs code. To attempt to use them for any + * other purpose will be dangerous. (they make weird assumptions) + */ + +/* + * First define what the actual subs. return + */ +struct mbuf *nfsm_reqh __P((struct vnode *vp, u_long procid, int hsiz, + caddr_t *bposp)); +struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, + int auth_type, int auth_len, char *auth_str, + int verf_len, char *verf_str, + struct mbuf *mrest, int mrest_len, + struct mbuf **mbp, u_int32_t *xidp)); + +#define M_HASCL(m) ((m)->m_flags & M_EXT) +#define NFSMINOFF(m) \ + do { \ + if (M_HASCL(m)) \ + (m)->m_data = (m)->m_ext.ext_buf; \ + else if ((m)->m_flags & M_PKTHDR) \ + (m)->m_data = (m)->m_pktdat; \ + else \ + (m)->m_data = (m)->m_dat; \ + } while (0) +#define NFSMADV(m, s) \ + do { \ + (m)->m_data += (s); \ + } while (0) +#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ + (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) + +/* + * Now for the macros that do the simple stuff and call the functions + * for the hard stuff. + * These macros use several vars. declared in nfsm_reqhead and these + * vars. must not be used elsewhere unless you are careful not to corrupt + * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries + * that may be used so long as the value is not expected to retained + * after a macro. + * I know, this is kind of dorkey, but it makes the actual op functions + * fairly clean and deals with the mess caused by the xdr discriminating + * unions. + */ + +#define nfsm_build(a,c,s) \ + do { \ + if ((s) > M_TRAILINGSPACE(mb)) { \ + MGET(mb2, M_WAIT, MT_DATA); \ + if ((s) > MLEN) \ + panic("build > MLEN"); \ + mb->m_next = mb2; \ + mb = mb2; \ + mb->m_len = 0; \ + bpos = mtod(mb, caddr_t); \ + } \ + (a) = (c)(bpos); \ + mb->m_len += (s); \ + bpos += (s); \ + } while (0) + +#define nfsm_dissect(a, c, s) \ + do { \ + t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + (a) = (c)(dpos); \ + dpos += (s); \ + } else if ((t1 = nfsm_disct(&md, &dpos, (s), t1, &cp2)) != 0){ \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } else { \ + (a) = (c)cp2; \ + } \ + } while (0) + +#define nfsm_fhtom(v, v3) \ + do { \ + if (v3) { \ + t2 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; \ + if (t2 <= M_TRAILINGSPACE(mb)) { \ + nfsm_build(tl, u_int32_t *, t2); \ + *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); \ + *(tl + ((t2>>2) - 2)) = 0; \ + bcopy((caddr_t)VTONFS(v)->n_fhp,(caddr_t)tl, \ + VTONFS(v)->n_fhsize); \ + } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, \ + (caddr_t)VTONFS(v)->n_fhp, \ + VTONFS(v)->n_fhsize)) != 0) { \ + error = t2; \ + m_freem(mreq); \ + goto nfsmout; \ + } \ + } else { \ + nfsm_build(cp, caddr_t, NFSX_V2FH); \ + bcopy((caddr_t)VTONFS(v)->n_fhp, cp, NFSX_V2FH); \ + } \ + } while (0) + +#define nfsm_srvfhtom(f, v3) \ + do { \ + if (v3) { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FH);\ + *tl++ = txdr_unsigned(NFSX_V3FH); \ + bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \ + } else { \ + nfsm_build(cp, caddr_t, NFSX_V2FH); \ + bcopy((caddr_t)(f), cp, NFSX_V2FH); \ + } \ + } while (0) + +#define nfsm_srvpostop_fh(f) \ + do { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_V3FH); \ + *tl++ = nfs_true; \ + *tl++ = txdr_unsigned(NFSX_V3FH); \ + bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \ + } while (0) + +#define nfsm_mtofh(d, v, v3, f) \ + do { \ + struct nfsnode *ttnp; nfsfh_t *ttfhp; int ttfhsize; \ + if (v3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (f) = fxdr_unsigned(int, *tl); \ + } else \ + (f) = 1; \ + if (f) { \ + nfsm_getfh(ttfhp, ttfhsize, (v3)); \ + if ((t1 = nfs_nget((d)->v_mount, ttfhp, ttfhsize, \ + &ttnp)) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = NFSTOV(ttnp); \ + } \ + if (v3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (f) \ + (f) = fxdr_unsigned(int, *tl); \ + else if (fxdr_unsigned(int, *tl)) \ + nfsm_adv(NFSX_V3FATTR); \ + } \ + if (f) \ + nfsm_loadattr((v), (struct vattr *)0); \ + } while (0) + +#define nfsm_getfh(f, s, v3) \ + do { \ + if (v3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int, *tl)) <= 0 || \ + (s) > NFSX_V3FHMAX) { \ + m_freem(mrep); \ + error = EBADRPC; \ + goto nfsmout; \ + } \ + } else \ + (s) = NFSX_V2FH; \ + nfsm_dissect((f), nfsfh_t *, nfsm_rndup(s)); \ + } while (0) + +#define nfsm_loadattr(v, a) \ + do { \ + struct vnode *ttvp = (v); \ + if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a))) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = ttvp; \ + } while (0) + +#define nfsm_postop_attr(v, f) \ + do { \ + struct vnode *ttvp = (v); \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (((f) = fxdr_unsigned(int, *tl)) != 0) { \ + if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \ + (struct vattr *)0)) != 0) { \ + error = t1; \ + (f) = 0; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = ttvp; \ + } \ + } while (0) + +/* Used as (f) for nfsm_wcc_data() */ +#define NFSV3_WCCRATTR 0 +#define NFSV3_WCCCHK 1 + +#define nfsm_wcc_data(v, f) \ + do { \ + int ttattrf, ttretf = 0; \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED); \ + if (f) \ + ttretf = (VTONFS(v)->n_mtime == \ + fxdr_unsigned(u_int32_t, *(tl + 2))); \ + } \ + nfsm_postop_attr((v), ttattrf); \ + if (f) { \ + (f) = ttretf; \ + } else { \ + (f) = ttattrf; \ + } \ + } while (0) + +/* If full is true, set all fields, otherwise just set mode and time fields */ +#define nfsm_v3attrbuild(a, full) \ + do { \ + if ((a)->va_mode != (mode_t)VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned((a)->va_mode); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((full) && (a)->va_uid != (uid_t)VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned((a)->va_uid); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((full) && (a)->va_gid != (gid_t)VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned((a)->va_gid); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((full) && (a)->va_size != VNOVAL) { \ + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + txdr_hyper((a)->va_size, tl); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if ((a)->va_atime.tv_sec != VNOVAL) { \ + if ((a)->va_atime.tv_sec != time_second) { \ + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\ + *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\ + txdr_nfsv3time(&(a)->va_atime, tl); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ + } \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \ + } \ + if ((a)->va_mtime.tv_sec != VNOVAL) { \ + if ((a)->va_mtime.tv_sec != time_second) { \ + nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);\ + *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);\ + txdr_nfsv3time(&(a)->va_mtime, tl); \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ + } \ + } else { \ + nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \ + } \ + } while (0) + + +#define nfsm_strsiz(s,m) \ + do { \ + nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int32_t,*tl)) > (m)) { \ + m_freem(mrep); \ + error = EBADRPC; \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_srvstrsiz(s,m) \ + do { \ + nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int32_t,*tl)) > (m) || (s) <= 0) { \ + error = EBADRPC; \ + nfsm_reply(0); \ + } \ + } while (0) + +#define nfsm_srvnamesiz(s) \ + do { \ + nfsm_dissect(tl,u_int32_t *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(int32_t,*tl)) > NFS_MAXNAMLEN) \ + error = NFSERR_NAMETOL; \ + if ((s) <= 0) \ + error = EBADRPC; \ + if (error) \ + nfsm_reply(0); \ + } while (0) + +#define nfsm_mtouio(p,s) \ + do {\ + if ((s) > 0 && \ + (t1 = nfsm_mbuftouio(&md,(p),(s),&dpos)) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_uiotom(p,s) \ + do { \ + if ((t1 = nfsm_uiotombuf((p),&mb,(s),&bpos)) != 0) { \ + error = t1; \ + m_freem(mreq); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_reqhead(v,a,s) \ + do { \ + mb = mreq = nfsm_reqh((v),(a),(s),&bpos); \ + } while (0) + +#define nfsm_reqdone \ + do { \ + m_freem(mrep); \ + nfsmout: \ + } while (0) + +#define nfsm_rndup(a) (((a)+3)&(~0x3)) + +#define nfsm_request(v, t, p, c) \ + do { \ + if ((error = nfs_request((v), mreq, (t), (p), \ + (c), &mrep, &md, &dpos)) != 0) { \ + if (error & NFSERR_RETERR) \ + error &= ~NFSERR_RETERR; \ + else \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_strtom(a,s,m) \ + do {\ + if ((s) > (m)) { \ + m_freem(mreq); \ + error = ENAMETOOLONG; \ + goto nfsmout; \ + } \ + t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \ + if (t2 <= M_TRAILINGSPACE(mb)) { \ + nfsm_build(tl,u_int32_t *,t2); \ + *tl++ = txdr_unsigned(s); \ + *(tl+((t2>>2)-2)) = 0; \ + bcopy((const char *)(a), (caddr_t)tl, (s)); \ + } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, (a), (s))) != 0) { \ + error = t2; \ + m_freem(mreq); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_srvdone \ + do { \ + nfsmout: \ + return (error); \ + } while (0) + +#define nfsm_reply(s) \ + do { \ + nfsd->nd_repstat = error; \ + if (error && !(nfsd->nd_flag & ND_NFSV3)) \ + (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \ + mrq, &mb, &bpos); \ + else \ + (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \ + mrq, &mb, &bpos); \ + if (mrep != NULL) { \ + m_freem(mrep); \ + mrep = NULL; \ + } \ + mreq = *mrq; \ + if (error && (!(nfsd->nd_flag & ND_NFSV3) || \ + error == EBADRPC)) { \ + error = 0; \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_writereply(s, v3) \ + do { \ + nfsd->nd_repstat = error; \ + if (error && !(v3)) \ + (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \ + &mreq, &mb, &bpos); \ + else \ + (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \ + &mreq, &mb, &bpos); \ + } while (0) + +#define nfsm_adv(s) \ + do { \ + t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + dpos += (s); \ + } else if ((t1 = nfs_adv(&md, &dpos, (s), t1)) != 0) { \ + error = t1; \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + } while (0) + +#define nfsm_srvmtofh(f) \ + do { \ + int fhlen; \ + if (nfsd->nd_flag & ND_NFSV3) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + fhlen = fxdr_unsigned(int, *tl); \ + if (fhlen != 0 && fhlen != NFSX_V3FH) { \ + error = EBADRPC; \ + nfsm_reply(0); \ + } \ + } else { \ + fhlen = NFSX_V2FH; \ + } \ + if (fhlen != 0) { \ + nfsm_dissect(tl, u_int32_t *, fhlen); \ + bcopy((caddr_t)tl, (caddr_t)(f), fhlen); \ + } else {\ + bzero((caddr_t)(f), NFSX_V3FH); \ + } \ + } while (0) + +#define nfsm_clget \ + do { \ + if (bp >= be) { \ + if (mp == mb) \ + mp->m_len += bp-bpos; \ + MGET(mp, M_WAIT, MT_DATA); \ + MCLGET(mp, M_WAIT); \ + mp->m_len = NFSMSIZ(mp); \ + mp2->m_next = mp; \ + mp2 = mp; \ + bp = mtod(mp, caddr_t); \ + be = bp+mp->m_len; \ + } \ + tl = (u_int32_t *)bp; \ + } while (0) + +#define nfsm_srvfillattr(a, f) \ + do { \ + nfsm_srvfattr(nfsd, (a), (f)); \ + } while (0) + +#define nfsm_srvwcc_data(br, b, ar, a) \ + do { \ + nfsm_srvwcc(nfsd, (br), (b), (ar), (a), &mb, &bpos); \ + } while (0) + +#define nfsm_srvpostop_attr(r, a) \ + do { \ + nfsm_srvpostopattr(nfsd, (r), (a), &mb, &bpos); \ + } while (0) + +#define nfsm_srvsattr(a) \ + do { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (a)->va_mode = nfstov_mode(*tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (a)->va_uid = fxdr_unsigned(uid_t, *tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + (a)->va_gid = fxdr_unsigned(gid_t, *tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + if (*tl == nfs_true) { \ + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + (a)->va_size = fxdr_hyper(tl); \ + } \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + switch (fxdr_unsigned(int, *tl)) { \ + case NFSV3SATTRTIME_TOCLIENT: \ + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + fxdr_nfsv3time(tl, &(a)->va_atime); \ + break; \ + case NFSV3SATTRTIME_TOSERVER: \ + getnanotime(&(a)->va_atime); \ + break; \ + }; \ + nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ + switch (fxdr_unsigned(int, *tl)) { \ + case NFSV3SATTRTIME_TOCLIENT: \ + nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); \ + fxdr_nfsv3time(tl, &(a)->va_mtime); \ + break; \ + case NFSV3SATTRTIME_TOSERVER: \ + getnanotime(&(a)->va_mtime); \ + break; \ + } \ + } while (0) + +#endif diff --git a/sys/nfs/nfsmount.h b/sys/nfs/nfsmount.h new file mode 100644 index 0000000..77ac7e3 --- /dev/null +++ b/sys/nfs/nfsmount.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsmount.h 8.3 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NFSMOUNT_H_ +#define _NFS_NFSMOUNT_H_ + +/* + * Mount structure. + * One allocated on every NFS mount. + * Holds NFS specific information for mount. + */ +struct nfsmount { + int nm_flag; /* Flags for soft/hard... */ + int nm_state; /* Internal state flags */ + struct mount *nm_mountp; /* Vfs structure for this filesystem */ + int nm_numgrps; /* Max. size of groupslist */ + u_char nm_fh[NFSX_V3FHMAX]; /* File handle of root dir */ + int nm_fhsize; /* Size of root file handle */ + struct socket *nm_so; /* Rpc socket */ + int nm_sotype; /* Type of socket */ + int nm_soproto; /* and protocol */ + int nm_soflags; /* pr_flags for socket protocol */ + struct sockaddr *nm_nam; /* Addr of server */ + int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */ + int nm_retry; /* Max retries */ + int nm_srtt[4]; /* Timers for rpcs */ + int nm_sdrtt[4]; + int nm_sent; /* Request send count */ + int nm_cwnd; /* Request send window */ + int nm_timeouts; /* Request timeouts */ + int nm_deadthresh; /* Threshold of timeouts-->dead server*/ + int nm_rsize; /* Max size of read rpc */ + int nm_wsize; /* Max size of write rpc */ + int nm_readdirsize; /* Size of a readdir rpc */ + int nm_readahead; /* Num. of blocks to readahead */ + int nm_leaseterm; /* Term (sec) for NQNFS lease */ + int nm_acdirmin; /* Directory attr cache min lifetime */ + int nm_acdirmax; /* Directory attr cache max lifetime */ + int nm_acregmin; /* Reg file attr cache min lifetime */ + int nm_acregmax; /* Reg file attr cache max lifetime */ + CIRCLEQ_HEAD(, nfsnode) nm_timerhead; /* Head of lease timer queue */ + struct vnode *nm_inprog; /* Vnode in prog by nqnfs_clientd() */ + uid_t nm_authuid; /* Uid for authenticator */ + int nm_authtype; /* Authenticator type */ + int nm_authlen; /* and length */ + char *nm_authstr; /* Authenticator string */ + char *nm_verfstr; /* and the verifier */ + int nm_verflen; + u_char nm_verf[NFSX_V3WRITEVERF]; /* V3 write verifier */ + NFSKERBKEY_T nm_key; /* and the session key */ + int nm_numuids; /* Number of nfsuid mappings */ + TAILQ_HEAD(, nfsuid) nm_uidlruhead; /* Lists of nfsuid mappings */ + LIST_HEAD(, nfsuid) nm_uidhashtbl[NFS_MUIDHASHSIZ]; + TAILQ_HEAD(, buf) nm_bufq; /* async io buffer queue */ + short nm_bufqlen; /* number of buffers in queue */ + short nm_bufqwant; /* process wants to add to the queue */ + int nm_bufqiods; /* number of iods processing queue */ + u_int64_t nm_maxfilesize; /* maximum file size */ +}; + +#if defined(KERNEL) || defined(_KERNEL) +/* + * Convert mount ptr to nfsmount ptr. + */ +#define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) + +#endif /* KERNEL */ + +#endif diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h new file mode 100644 index 0000000..e654bc0 --- /dev/null +++ b/sys/nfs/nfsnode.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsnode.h 8.9 (Berkeley) 5/14/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NFSNODE_H_ +#define _NFS_NFSNODE_H_ + +#if !defined(_NFS_NFS_H_) && !defined(KERNEL) +#include <nfs/nfs.h> +#endif + +/* + * Silly rename structure that hangs off the nfsnode until the name + * can be removed by nfs_inactive() + */ +struct sillyrename { + struct ucred *s_cred; + struct vnode *s_dvp; + long s_namlen; + char s_name[20]; +}; + +/* + * This structure is used to save the logical directory offset to + * NFS cookie mappings. + * The mappings are stored in a list headed + * by n_cookies, as required. + * There is one mapping for each NFS_DIRBLKSIZ bytes of directory information + * stored in increasing logical offset byte order. + */ +#define NFSNUMCOOKIES 31 + +struct nfsdmap { + LIST_ENTRY(nfsdmap) ndm_list; + int ndm_eocookie; + nfsuint64 ndm_cookies[NFSNUMCOOKIES]; +}; + +/* + * The nfsnode is the nfs equivalent to ufs's inode. Any similarity + * is purely coincidental. + * There is a unique nfsnode allocated for each active file, + * each current directory, each mounted-on file, text file, and the root. + * An nfsnode is 'named' by its file handle. (nget/nfs_node.c) + * If this structure exceeds 256 bytes (it is currently 256 using 4.4BSD-Lite + * type definitions), file handles of > 32 bytes should probably be split out + * into a separate MALLOC()'d data structure. (Reduce the size of nfsfh_t by + * changing the definition in nfsproto.h of NFS_SMALLFH.) + * NB: Hopefully the current order of the fields is such that everything will + * be well aligned and, therefore, tightly packed. + */ +struct nfsnode { + LIST_ENTRY(nfsnode) n_hash; /* Hash chain */ + CIRCLEQ_ENTRY(nfsnode) n_timer; /* Nqnfs timer chain */ + u_quad_t n_size; /* Current size of file */ + u_quad_t n_brev; /* Modify rev when cached */ + u_quad_t n_lrev; /* Modify rev for lease */ + struct vattr n_vattr; /* Vnode attribute cache */ + time_t n_attrstamp; /* Attr. cache timestamp */ + u_int32_t n_mode; /* ACCESS mode cache */ + uid_t n_modeuid; /* credentials having mode */ + time_t n_modestamp; /* mode cache timestamp */ + time_t n_mtime; /* Prev modify time. */ + time_t n_ctime; /* Prev create time. */ + time_t n_expiry; /* Lease expiry time */ + nfsfh_t *n_fhp; /* NFS File Handle */ + struct vnode *n_vnode; /* associated vnode */ + struct lockf *n_lockf; /* Locking record of file */ + int n_error; /* Save write error value */ + union { + struct timespec nf_atim; /* Special file times */ + nfsuint64 nd_cookieverf; /* Cookie verifier (dir only) */ + } n_un1; + union { + struct timespec nf_mtim; + off_t nd_direof; /* Dir. EOF offset cache */ + } n_un2; + union { + struct sillyrename *nf_silly; /* Ptr to silly rename struct */ + LIST_HEAD(, nfsdmap) nd_cook; /* cookies */ + } n_un3; + short n_fhsize; /* size in bytes, of fh */ + short n_flag; /* Flag for locking.. */ + nfsfh_t n_fh; /* Small File Handle */ + struct lock n_rslock; +}; + +#define n_atim n_un1.nf_atim +#define n_mtim n_un2.nf_mtim +#define n_sillyrename n_un3.nf_silly +#define n_cookieverf n_un1.nd_cookieverf +#define n_direofoffset n_un2.nd_direof +#define n_cookies n_un3.nd_cook + +/* + * Flags for n_flag + */ +#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */ +#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */ +#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */ +#define NWRITEERR 0x0008 /* Flag write errors so close will know */ +#define NQNFSNONCACHE 0x0020 /* Non-cachable lease */ +#define NQNFSWRITE 0x0040 /* Write lease */ +#define NQNFSEVICTED 0x0080 /* Has been evicted */ +#define NACC 0x0100 /* Special file accessed */ +#define NUPD 0x0200 /* Special file updated */ +#define NCHG 0x0400 /* Special file times changed */ +#define NLOCKED 0x0800 /* node is locked */ +#define NWANTED 0x0100 /* someone wants to lock */ + +/* + * Convert between nfsnode pointers and vnode pointers + */ +#define VTONFS(vp) ((struct nfsnode *)(vp)->v_data) +#define NFSTOV(np) ((struct vnode *)(np)->n_vnode) + +/* + * Queue head for nfsiod's + */ +extern TAILQ_HEAD(nfs_bufq, buf) nfs_bufq; +extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +extern struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; + +#if defined(KERNEL) || defined(_KERNEL) + +/* + * nfs_rslock - Attempt to obtain lock on nfsnode + * + * Attempt to obtain a lock on the passed nfsnode, returning ENOLCK + * if the lock could not be obtained due to our having to sleep. This + * function is generally used to lock around code that modifies an + * NFS file's size. In order to avoid deadlocks the lock + * should not be obtained while other locks are being held. + */ + +static __inline +int +nfs_rslock(struct nfsnode *np, struct proc *p) +{ + return(lockmgr(&np->n_rslock, LK_EXCLUSIVE | LK_CANRECURSE | LK_SLEEPFAIL, NULL, p)); +} + +static __inline +void +nfs_rsunlock(struct nfsnode *np, struct proc *p) +{ + (void)lockmgr(&np->n_rslock, LK_RELEASE, NULL, p); +} + +extern vop_t **fifo_nfsv2nodeop_p; +extern vop_t **nfsv2_vnodeop_p; +extern vop_t **spec_nfsv2nodeop_p; + +/* + * Prototypes for NFS vnode operations + */ +int nfs_getpages __P((struct vop_getpages_args *)); +int nfs_putpages __P((struct vop_putpages_args *)); +int nfs_write __P((struct vop_write_args *)); +int nqnfs_vop_lease_check __P((struct vop_lease_args *)); +int nfs_inactive __P((struct vop_inactive_args *)); +int nfs_reclaim __P((struct vop_reclaim_args *)); + +/* other stuff */ +int nfs_removeit __P((struct sillyrename *)); +int nfs_nget __P((struct mount *,nfsfh_t *,int,struct nfsnode **)); +nfsuint64 *nfs_getcookie __P((struct nfsnode *, off_t, int)); +void nfs_invaldir __P((struct vnode *)); + +#define nqnfs_lease_updatetime nfs_lease_updatetime + +#endif /* KERNEL */ + +#endif diff --git a/sys/nfs/nfsproto.h b/sys/nfs/nfsproto.h new file mode 100644 index 0000000..cbb2832 --- /dev/null +++ b/sys/nfs/nfsproto.h @@ -0,0 +1,439 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsproto.h 8.2 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + +#ifndef _NFS_NFSPROTO_H_ +#define _NFS_NFSPROTO_H_ + +/* + * nfs definitions as per the Version 2 and 3 specs + */ + +/* + * Constants as defined in the Sun NFS Version 2 and 3 specs. + * "NFS: Network File System Protocol Specification" RFC1094 + * and in the "NFS: Network File System Version 3 Protocol + * Specification" + */ + +#define NFS_PORT 2049 +#define NFS_PROG 100003 +#define NFS_VER2 2 +#define NFS_VER3 3 +#define NFS_V2MAXDATA 8192 +#define NFS_MAXDGRAMDATA 16384 +#define NFS_MAXDATA 32768 +#define NFS_MAXPATHLEN 1024 +#define NFS_MAXNAMLEN 255 +#define NFS_MAXPKTHDR 404 +#define NFS_MAXPACKET (NFS_MAXPKTHDR + NFS_MAXDATA) +#define NFS_MINPACKET 20 +#define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ + +/* Stat numbers for rpc returns (version 2 and 3) */ +#define NFS_OK 0 +#define NFSERR_PERM 1 +#define NFSERR_NOENT 2 +#define NFSERR_IO 5 +#define NFSERR_NXIO 6 +#define NFSERR_ACCES 13 +#define NFSERR_EXIST 17 +#define NFSERR_XDEV 18 /* Version 3 only */ +#define NFSERR_NODEV 19 +#define NFSERR_NOTDIR 20 +#define NFSERR_ISDIR 21 +#define NFSERR_INVAL 22 /* Version 3 only */ +#define NFSERR_FBIG 27 +#define NFSERR_NOSPC 28 +#define NFSERR_ROFS 30 +#define NFSERR_MLINK 31 /* Version 3 only */ +#define NFSERR_NAMETOL 63 +#define NFSERR_NOTEMPTY 66 +#define NFSERR_DQUOT 69 +#define NFSERR_STALE 70 +#define NFSERR_REMOTE 71 /* Version 3 only */ +#define NFSERR_WFLUSH 99 /* Version 2 only */ +#define NFSERR_BADHANDLE 10001 /* The rest Version 3 only */ +#define NFSERR_NOT_SYNC 10002 +#define NFSERR_BAD_COOKIE 10003 +#define NFSERR_NOTSUPP 10004 +#define NFSERR_TOOSMALL 10005 +#define NFSERR_SERVERFAULT 10006 +#define NFSERR_BADTYPE 10007 +#define NFSERR_JUKEBOX 10008 +#define NFSERR_TRYLATER NFSERR_JUKEBOX +#define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */ + +#define NFSERR_RETVOID 0x20000000 /* Return void, not error */ +#define NFSERR_AUTHERR 0x40000000 /* Mark an authentication error */ +#define NFSERR_RETERR 0x80000000 /* Mark an error return for V3 */ + +/* Sizes in bytes of various nfs rpc components */ +#define NFSX_UNSIGNED 4 + +/* specific to NFS Version 2 */ +#define NFSX_V2FH 32 +#define NFSX_V2FATTR 68 +#define NFSX_V2SATTR 32 +#define NFSX_V2COOKIE 4 +#define NFSX_V2STATFS 20 + +/* specific to NFS Version 3 */ +#define NFSX_V3FH (sizeof (fhandle_t)) /* size this server uses */ +#define NFSX_V3FHMAX 64 /* max. allowed by protocol */ +#define NFSX_V3FATTR 84 +#define NFSX_V3SATTR 60 /* max. all fields filled in */ +#define NFSX_V3SRVSATTR (sizeof (struct nfsv3_sattr)) +#define NFSX_V3POSTOPATTR (NFSX_V3FATTR + NFSX_UNSIGNED) +#define NFSX_V3WCCDATA (NFSX_V3POSTOPATTR + 8 * NFSX_UNSIGNED) +#define NFSX_V3COOKIEVERF 8 +#define NFSX_V3WRITEVERF 8 +#define NFSX_V3CREATEVERF 8 +#define NFSX_V3STATFS 52 +#define NFSX_V3FSINFO 48 +#define NFSX_V3PATHCONF 24 + +/* variants for both versions */ +#define NFSX_FH(v3) ((v3) ? (NFSX_V3FHMAX + NFSX_UNSIGNED) : \ + NFSX_V2FH) +#define NFSX_SRVFH(v3) ((v3) ? NFSX_V3FH : NFSX_V2FH) +#define NFSX_FATTR(v3) ((v3) ? NFSX_V3FATTR : NFSX_V2FATTR) +#define NFSX_PREOPATTR(v3) ((v3) ? (7 * NFSX_UNSIGNED) : 0) +#define NFSX_POSTOPATTR(v3) ((v3) ? (NFSX_V3FATTR + NFSX_UNSIGNED) : 0) +#define NFSX_POSTOPORFATTR(v3) ((v3) ? (NFSX_V3FATTR + NFSX_UNSIGNED) : \ + NFSX_V2FATTR) +#define NFSX_WCCDATA(v3) ((v3) ? NFSX_V3WCCDATA : 0) +#define NFSX_WCCORFATTR(v3) ((v3) ? NFSX_V3WCCDATA : NFSX_V2FATTR) +#define NFSX_SATTR(v3) ((v3) ? NFSX_V3SATTR : NFSX_V2SATTR) +#define NFSX_COOKIEVERF(v3) ((v3) ? NFSX_V3COOKIEVERF : 0) +#define NFSX_WRITEVERF(v3) ((v3) ? NFSX_V3WRITEVERF : 0) +#define NFSX_READDIR(v3) ((v3) ? (5 * NFSX_UNSIGNED) : \ + (2 * NFSX_UNSIGNED)) +#define NFSX_STATFS(v3) ((v3) ? NFSX_V3STATFS : NFSX_V2STATFS) + +/* nfs rpc procedure numbers (before version mapping) */ +#define NFSPROC_NULL 0 +#define NFSPROC_GETATTR 1 +#define NFSPROC_SETATTR 2 +#define NFSPROC_LOOKUP 3 +#define NFSPROC_ACCESS 4 +#define NFSPROC_READLINK 5 +#define NFSPROC_READ 6 +#define NFSPROC_WRITE 7 +#define NFSPROC_CREATE 8 +#define NFSPROC_MKDIR 9 +#define NFSPROC_SYMLINK 10 +#define NFSPROC_MKNOD 11 +#define NFSPROC_REMOVE 12 +#define NFSPROC_RMDIR 13 +#define NFSPROC_RENAME 14 +#define NFSPROC_LINK 15 +#define NFSPROC_READDIR 16 +#define NFSPROC_READDIRPLUS 17 +#define NFSPROC_FSSTAT 18 +#define NFSPROC_FSINFO 19 +#define NFSPROC_PATHCONF 20 +#define NFSPROC_COMMIT 21 + +/* And leasing (nqnfs) procedure numbers (must be last) */ +#define NQNFSPROC_GETLEASE 22 +#define NQNFSPROC_VACATED 23 +#define NQNFSPROC_EVICTED 24 + +#define NFSPROC_NOOP 25 +#define NFS_NPROCS 26 + +/* Actual Version 2 procedure numbers */ +#define NFSV2PROC_NULL 0 +#define NFSV2PROC_GETATTR 1 +#define NFSV2PROC_SETATTR 2 +#define NFSV2PROC_NOOP 3 +#define NFSV2PROC_ROOT NFSV2PROC_NOOP /* Obsolete */ +#define NFSV2PROC_LOOKUP 4 +#define NFSV2PROC_READLINK 5 +#define NFSV2PROC_READ 6 +#define NFSV2PROC_WRITECACHE NFSV2PROC_NOOP /* Obsolete */ +#define NFSV2PROC_WRITE 8 +#define NFSV2PROC_CREATE 9 +#define NFSV2PROC_REMOVE 10 +#define NFSV2PROC_RENAME 11 +#define NFSV2PROC_LINK 12 +#define NFSV2PROC_SYMLINK 13 +#define NFSV2PROC_MKDIR 14 +#define NFSV2PROC_RMDIR 15 +#define NFSV2PROC_READDIR 16 +#define NFSV2PROC_STATFS 17 + +/* + * Constants used by the Version 3 protocol for various RPCs + */ +#define NFSV3SATTRTIME_DONTCHANGE 0 +#define NFSV3SATTRTIME_TOSERVER 1 +#define NFSV3SATTRTIME_TOCLIENT 2 + +#define NFSV3ACCESS_READ 0x01 +#define NFSV3ACCESS_LOOKUP 0x02 +#define NFSV3ACCESS_MODIFY 0x04 +#define NFSV3ACCESS_EXTEND 0x08 +#define NFSV3ACCESS_DELETE 0x10 +#define NFSV3ACCESS_EXECUTE 0x20 + +#define NFSV3WRITE_UNSTABLE 0 +#define NFSV3WRITE_DATASYNC 1 +#define NFSV3WRITE_FILESYNC 2 + +#define NFSV3CREATE_UNCHECKED 0 +#define NFSV3CREATE_GUARDED 1 +#define NFSV3CREATE_EXCLUSIVE 2 + +#define NFSV3FSINFO_LINK 0x01 +#define NFSV3FSINFO_SYMLINK 0x02 +#define NFSV3FSINFO_HOMOGENEOUS 0x08 +#define NFSV3FSINFO_CANSETTIME 0x10 + +/* Conversion macros */ +#define vtonfsv2_mode(t,m) \ + txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \ + MAKEIMODE((t), (m))) +#define vtonfsv3_mode(m) txdr_unsigned((m) & ALLPERMS) +#define nfstov_mode(a) (fxdr_unsigned(u_int32_t, (a)) & ALLPERMS) +#define vtonfsv2_type(a) txdr_unsigned(nfsv2_type[((int32_t)(a))]) +#define vtonfsv3_type(a) txdr_unsigned(nfsv3_type[((int32_t)(a))]) +#define nfsv2tov_type(a) nv2tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] +#define nfsv3tov_type(a) nv3tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] + +/* File types */ +typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, + NFSOCK=6, NFFIFO=7 } nfstype; + +/* Structs for common parts of the rpc's */ +/* + * File Handle (32 bytes for version 2), variable up to 64 for version 3. + * File Handles of up to NFS_SMALLFH in size are stored directly in the + * nfs node, whereas larger ones are malloc'd. (This never happens when + * NFS_SMALLFH is set to 64.) + * NFS_SMALLFH should be in the range of 32 to 64 and be divisible by 4. + */ +#ifndef NFS_SMALLFH +#define NFS_SMALLFH 64 +#endif +union nfsfh { + fhandle_t fh_generic; + u_char fh_bytes[NFS_SMALLFH]; +}; +typedef union nfsfh nfsfh_t; + +struct nfsv2_time { + u_int32_t nfsv2_sec; + u_int32_t nfsv2_usec; +}; +typedef struct nfsv2_time nfstime2; + +struct nfsv3_time { + u_int32_t nfsv3_sec; + u_int32_t nfsv3_nsec; +}; +typedef struct nfsv3_time nfstime3; + +/* + * Quads are defined as arrays of 2 longs to ensure dense packing for the + * protocol and to facilitate xdr conversion. + */ +struct nfs_uquad { + u_int32_t nfsuquad[2]; +}; +typedef struct nfs_uquad nfsuint64; + +/* + * Used to convert between two u_longs and a u_quad_t. + */ +union nfs_quadconvert { + u_int32_t lval[2]; + u_quad_t qval; +}; +typedef union nfs_quadconvert nfsquad_t; + +/* + * NFS Version 3 special file number. + */ +struct nfsv3_spec { + u_int32_t specdata1; + u_int32_t specdata2; +}; +typedef struct nfsv3_spec nfsv3spec; + +/* + * File attributes and setable attributes. These structures cover both + * NFS version 2 and the version 3 protocol. Note that the union is only + * used so that one pointer can refer to both variants. These structures + * go out on the wire and must be densely packed, so no quad data types + * are used. (all fields are longs or u_longs or structures of same) + * NB: You can't do sizeof(struct nfs_fattr), you must use the + * NFSX_FATTR(v3) macro. + */ +struct nfs_fattr { + u_int32_t fa_type; + u_int32_t fa_mode; + u_int32_t fa_nlink; + u_int32_t fa_uid; + u_int32_t fa_gid; + union { + struct { + u_int32_t nfsv2fa_size; + u_int32_t nfsv2fa_blocksize; + u_int32_t nfsv2fa_rdev; + u_int32_t nfsv2fa_blocks; + u_int32_t nfsv2fa_fsid; + u_int32_t nfsv2fa_fileid; + nfstime2 nfsv2fa_atime; + nfstime2 nfsv2fa_mtime; + nfstime2 nfsv2fa_ctime; + } fa_nfsv2; + struct { + nfsuint64 nfsv3fa_size; + nfsuint64 nfsv3fa_used; + nfsv3spec nfsv3fa_rdev; + nfsuint64 nfsv3fa_fsid; + nfsuint64 nfsv3fa_fileid; + nfstime3 nfsv3fa_atime; + nfstime3 nfsv3fa_mtime; + nfstime3 nfsv3fa_ctime; + } fa_nfsv3; + } fa_un; +}; + +/* and some ugly defines for accessing union components */ +#define fa2_size fa_un.fa_nfsv2.nfsv2fa_size +#define fa2_blocksize fa_un.fa_nfsv2.nfsv2fa_blocksize +#define fa2_rdev fa_un.fa_nfsv2.nfsv2fa_rdev +#define fa2_blocks fa_un.fa_nfsv2.nfsv2fa_blocks +#define fa2_fsid fa_un.fa_nfsv2.nfsv2fa_fsid +#define fa2_fileid fa_un.fa_nfsv2.nfsv2fa_fileid +#define fa2_atime fa_un.fa_nfsv2.nfsv2fa_atime +#define fa2_mtime fa_un.fa_nfsv2.nfsv2fa_mtime +#define fa2_ctime fa_un.fa_nfsv2.nfsv2fa_ctime +#define fa3_size fa_un.fa_nfsv3.nfsv3fa_size +#define fa3_used fa_un.fa_nfsv3.nfsv3fa_used +#define fa3_rdev fa_un.fa_nfsv3.nfsv3fa_rdev +#define fa3_fsid fa_un.fa_nfsv3.nfsv3fa_fsid +#define fa3_fileid fa_un.fa_nfsv3.nfsv3fa_fileid +#define fa3_atime fa_un.fa_nfsv3.nfsv3fa_atime +#define fa3_mtime fa_un.fa_nfsv3.nfsv3fa_mtime +#define fa3_ctime fa_un.fa_nfsv3.nfsv3fa_ctime + +struct nfsv2_sattr { + u_int32_t sa_mode; + u_int32_t sa_uid; + u_int32_t sa_gid; + u_int32_t sa_size; + nfstime2 sa_atime; + nfstime2 sa_mtime; +}; + +/* + * NFS Version 3 sattr structure for the new node creation case. + */ +struct nfsv3_sattr { + u_int32_t sa_modetrue; + u_int32_t sa_mode; + u_int32_t sa_uidfalse; + u_int32_t sa_gidfalse; + u_int32_t sa_sizefalse; + u_int32_t sa_atimetype; + nfstime3 sa_atime; + u_int32_t sa_mtimetype; + nfstime3 sa_mtime; +}; + +struct nfs_statfs { + union { + struct { + u_int32_t nfsv2sf_tsize; + u_int32_t nfsv2sf_bsize; + u_int32_t nfsv2sf_blocks; + u_int32_t nfsv2sf_bfree; + u_int32_t nfsv2sf_bavail; + } sf_nfsv2; + struct { + nfsuint64 nfsv3sf_tbytes; + nfsuint64 nfsv3sf_fbytes; + nfsuint64 nfsv3sf_abytes; + nfsuint64 nfsv3sf_tfiles; + nfsuint64 nfsv3sf_ffiles; + nfsuint64 nfsv3sf_afiles; + u_int32_t nfsv3sf_invarsec; + } sf_nfsv3; + } sf_un; +}; + +#define sf_tsize sf_un.sf_nfsv2.nfsv2sf_tsize +#define sf_bsize sf_un.sf_nfsv2.nfsv2sf_bsize +#define sf_blocks sf_un.sf_nfsv2.nfsv2sf_blocks +#define sf_bfree sf_un.sf_nfsv2.nfsv2sf_bfree +#define sf_bavail sf_un.sf_nfsv2.nfsv2sf_bavail +#define sf_tbytes sf_un.sf_nfsv3.nfsv3sf_tbytes +#define sf_fbytes sf_un.sf_nfsv3.nfsv3sf_fbytes +#define sf_abytes sf_un.sf_nfsv3.nfsv3sf_abytes +#define sf_tfiles sf_un.sf_nfsv3.nfsv3sf_tfiles +#define sf_ffiles sf_un.sf_nfsv3.nfsv3sf_ffiles +#define sf_afiles sf_un.sf_nfsv3.nfsv3sf_afiles +#define sf_invarsec sf_un.sf_nfsv3.nfsv3sf_invarsec + +struct nfsv3_fsinfo { + u_int32_t fs_rtmax; + u_int32_t fs_rtpref; + u_int32_t fs_rtmult; + u_int32_t fs_wtmax; + u_int32_t fs_wtpref; + u_int32_t fs_wtmult; + u_int32_t fs_dtpref; + nfsuint64 fs_maxfilesize; + nfstime3 fs_timedelta; + u_int32_t fs_properties; +}; + +struct nfsv3_pathconf { + u_int32_t pc_linkmax; + u_int32_t pc_namemax; + u_int32_t pc_notrunc; + u_int32_t pc_chownrestricted; + u_int32_t pc_caseinsensitive; + u_int32_t pc_casepreserving; +}; + +#endif diff --git a/sys/nfs/nfsrtt.h b/sys/nfs/nfsrtt.h new file mode 100644 index 0000000..4473086 --- /dev/null +++ b/sys/nfs/nfsrtt.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsrtt.h 8.2 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NFSRTT_H_ +#define _NFS_NFSRTT_H_ + +/* + * Definitions for performance monitor. + * The client and server logging are turned on by setting the global + * constant "nfsrtton" to 1. + */ +#define NFSRTTLOGSIZ 128 + +/* + * Circular log of client side rpc activity. Each log entry is for one + * rpc filled in upon completion. (ie. in order of completion) + * The "pos" is the table index for the "next" entry, therefore the + * list goes from nfsrtt.rttl[pos] --> nfsrtt.rttl[pos - 1] in + * chronological order of completion. + */ +struct nfsrtt { + int pos; /* Position in array for next entry */ + struct rttl { + u_int32_t proc; /* NFS procedure number */ + int rtt; /* Measured round trip time */ + int rto; /* Round Trip Timeout */ + int sent; /* # rpcs in progress */ + int cwnd; /* Send window */ + int srtt; /* Ave Round Trip Time */ + int sdrtt; /* Ave mean deviation of RTT */ + fsid_t fsid; /* Fsid for mount point */ + struct timeval tstamp; /* Timestamp of log entry */ + } rttl[NFSRTTLOGSIZ]; +}; + +/* + * And definitions for server side performance monitor. + * The log organization is the same as above except it is filled in at the + * time the server sends the rpc reply. + */ + +/* + * Bits for the flags field. + */ +#define DRT_NQNFS 0x01 /* Rpc used Nqnfs protocol */ +#define DRT_TCP 0x02 /* Client used TCP transport */ +#define DRT_CACHEREPLY 0x04 /* Reply was from recent request cache */ +#define DRT_CACHEDROP 0x08 /* Rpc request dropped, due to recent reply */ +#define DRT_NFSV3 0x10 /* Rpc used NFS Version 3 */ + +/* + * Server log structure + * NB: ipadr == INADDR_ANY indicates a client using a non IP protocol. + * (ISO perhaps?) + */ +struct nfsdrt { + int pos; /* Position of next log entry */ + struct drt { + int flag; /* Bits as defined above */ + u_int32_t proc; /* NFS procedure number */ + u_int32_t ipadr; /* IP address of client */ + int resptime; /* Response time (usec) */ + struct timeval tstamp; /* Timestamp of log entry */ + } drt[NFSRTTLOGSIZ]; +}; + +#endif diff --git a/sys/nfs/nfsrvcache.h b/sys/nfs/nfsrvcache.h new file mode 100644 index 0000000..eec5850 --- /dev/null +++ b/sys/nfs/nfsrvcache.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsrvcache.h 8.3 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NFSRVCACHE_H_ +#define _NFS_NFSRVCACHE_H_ + +#include <sys/queue.h> + +/* + * Definitions for the server recent request cache + */ + +#define NFSRVCACHESIZ 64 + +struct nfsrvcache { + TAILQ_ENTRY(nfsrvcache) rc_lru; /* LRU chain */ + LIST_ENTRY(nfsrvcache) rc_hash; /* Hash chain */ + u_int32_t rc_xid; /* rpc id number */ + union { + struct mbuf *ru_repmb; /* Reply mbuf list OR */ + int ru_repstat; /* Reply status */ + } rc_un; + union nethostaddr rc_haddr; /* Host address */ + u_int32_t rc_proc; /* rpc proc number */ + u_char rc_state; /* Current state of request */ + u_char rc_flag; /* Flag bits */ +}; + +#define rc_reply rc_un.ru_repmb +#define rc_status rc_un.ru_repstat +#define rc_inetaddr rc_haddr.had_inetaddr +#define rc_nam rc_haddr.had_nam + +/* Cache entry states */ +#define RC_UNUSED 0 +#define RC_INPROG 1 +#define RC_DONE 2 + +/* Return values */ +#define RC_DROPIT 0 +#define RC_REPLY 1 +#define RC_DOIT 2 +#define RC_CHECKIT 3 + +/* Flag bits */ +#define RC_LOCKED 0x01 +#define RC_WANTED 0x02 +#define RC_REPSTATUS 0x04 +#define RC_REPMBUF 0x08 +#define RC_NQNFS 0x10 +#define RC_INETADDR 0x20 +#define RC_NAM 0x40 + +#endif diff --git a/sys/nfs/nfsv2.h b/sys/nfs/nfsv2.h new file mode 100644 index 0000000..6f8f85e --- /dev/null +++ b/sys/nfs/nfsv2.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsv2.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#include <nfs/nfsproto.h> diff --git a/sys/nfs/nqnfs.h b/sys/nfs/nqnfs.h new file mode 100644 index 0000000..2ba78ae --- /dev/null +++ b/sys/nfs/nqnfs.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nqnfs.h 8.3 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_NQNFS_H_ +#define _NFS_NQNFS_H_ + +/* + * Definitions for NQNFS (Not Quite NFS) cache consistency protocol. + */ + +/* Tunable constants */ +#define NQ_CLOCKSKEW 3 /* Clock skew factor (sec) */ +#define NQ_WRITESLACK 5 /* Delay for write cache flushing */ +#define NQ_MAXLEASE 60 /* Max lease duration (sec) */ +#define NQ_MINLEASE 5 /* Min lease duration (sec) */ +#define NQ_DEFLEASE 30 /* Default lease duration (sec) */ +#define NQ_RENEWAL 3 /* Time before expiry (sec) to renew */ +#define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */ +#define NQ_MAXNUMLEASE 2048 /* Upper bound on number of server leases */ +#define NQ_DEADTHRESH NQ_NEVERDEAD /* Default nm_deadthresh */ +#define NQ_NEVERDEAD 9 /* Greater than max. nm_timeouts */ +#define NQLCHSZ 256 /* Server hash table size */ + +#define NQNFS_PROG 300105 /* As assigned by Sun */ +#define NQNFS_VER3 3 +#define NQNFS_EVICTSIZ 156 /* Size of eviction request in bytes */ + +#if defined(KERNEL) || defined(_KERNEL) +/* + * Definitions used for saving the "last lease expires" time in Non-volatile + * RAM on the server. The default definitions below assume that NOVRAM is not + * available. + */ +#ifdef HASNVRAM +# undef HASNVRAM +#endif +#define NQSTORENOVRAM(t) +#define NQLOADNOVRAM(t) + +/* + * Defn and structs used on the server to maintain state for current leases. + * The list of host(s) that hold the lease are kept as nqhost structures. + * The first one lives in nqlease and any others are held in a linked + * list of nqm structures hanging off of nqlease. + * + * Each nqlease structure is chained into two lists. The first is a list + * ordered by increasing expiry time for nqsrv_timer() and the second is a chain + * hashed on lc_fh. + */ +#define LC_MOREHOSTSIZ 10 + +struct nqhost { + union { + struct { + u_int16_t udp_flag; + u_int16_t udp_port; + union nethostaddr udp_haddr; + } un_udp; + struct { + u_int16_t connless_flag; + u_int16_t connless_spare; + union nethostaddr connless_haddr; + } un_connless; + struct { + u_int16_t conn_flag; + u_int16_t conn_spare; + struct nfssvc_sock *conn_slp; + } un_conn; + } lph_un; +}; +#define lph_flag lph_un.un_udp.udp_flag +#define lph_port lph_un.un_udp.udp_port +#define lph_haddr lph_un.un_udp.udp_haddr +#define lph_inetaddr lph_un.un_udp.udp_haddr.had_inetaddr +#define lph_claddr lph_un.un_connless.connless_haddr +#define lph_nam lph_un.un_connless.connless_haddr.had_nam +#define lph_slp lph_un.un_conn.conn_slp + +struct nqlease { + LIST_ENTRY(nqlease) lc_hash; /* Fhandle hash list */ + CIRCLEQ_ENTRY(nqlease) lc_timer; /* Timer queue list */ + time_t lc_expiry; /* Expiry time (sec) */ + struct nqhost lc_host; /* Host that got lease */ + struct nqm *lc_morehosts; /* Other hosts that share read lease */ + fsid_t lc_fsid; /* Fhandle */ + char lc_fiddata[MAXFIDSZ]; + struct vnode *lc_vp; /* Soft reference to associated vnode */ +}; +#define lc_flag lc_host.lph_un.un_udp.udp_flag + +/* lc_flag bits */ +#define LC_VALID 0x0001 /* Host address valid */ +#define LC_WRITE 0x0002 /* Write cache */ +#define LC_NONCACHABLE 0x0004 /* Non-cachable lease */ +#define LC_LOCKED 0x0008 /* Locked */ +#define LC_WANTED 0x0010 /* Lock wanted */ +#define LC_EXPIREDWANTED 0x0020 /* Want lease when expired */ +#define LC_UDP 0x0040 /* Host address for udp socket */ +#define LC_CLTP 0x0080 /* Host address for other connectionless */ +#define LC_LOCAL 0x0100 /* Host is server */ +#define LC_VACATED 0x0200 /* Host has vacated lease */ +#define LC_WRITTEN 0x0400 /* Recently wrote to the leased file */ +#define LC_SREF 0x0800 /* Holds a nfssvc_sock reference */ + +struct nqm { + struct nqm *lpm_next; + struct nqhost lpm_hosts[LC_MOREHOSTSIZ]; +}; + +/* + * Special value for slp for local server calls. + */ +#define NQLOCALSLP ((struct nfssvc_sock *) -1) + +/* + * Server side macros. + */ +#define nqsrv_getl(v, l) \ + (void) nqsrv_getlease((v), &nfsd->nd_duration, \ + ((nfsd->nd_flag & ND_LEASE) ? (nfsd->nd_flag & ND_LEASE) : \ + ((l) | ND_CHECK)), \ + slp, procp, nfsd->nd_nam, &cache, &frev, cred) + +/* + * Client side macros that check for a valid lease. + */ +#define NQNFS_CKINVALID(v, n, f) \ + ((time_second > (n)->n_expiry && \ + VFSTONFS((v)->v_mount)->nm_timeouts < VFSTONFS((v)->v_mount)->nm_deadthresh) \ + || ((f) == ND_WRITE && ((n)->n_flag & NQNFSWRITE) == 0)) + +#define NQNFS_CKCACHABLE(v, f) \ + ((time_second <= VTONFS(v)->n_expiry || \ + VFSTONFS((v)->v_mount)->nm_timeouts >= VFSTONFS((v)->v_mount)->nm_deadthresh) \ + && (VTONFS(v)->n_flag & NQNFSNONCACHE) == 0 && \ + ((f) == ND_READ || (VTONFS(v)->n_flag & NQNFSWRITE))) + +#define NQNFS_NEEDLEASE(v, p) \ + (time_second > VTONFS(v)->n_expiry ? \ + ((VTONFS(v)->n_flag & NQNFSEVICTED) ? 0 : nqnfs_piggy[p]) : \ + (((time_second + NQ_RENEWAL) > VTONFS(v)->n_expiry && \ + nqnfs_piggy[p]) ? \ + ((VTONFS(v)->n_flag & NQNFSWRITE) ? \ + ND_WRITE : nqnfs_piggy[p]) : 0)) + +/* + * List head for timer queue. + */ +extern CIRCLEQ_HEAD(nqtimerhead, nqlease) nqtimerhead; + +/* + * List head for the file handle hash table. + */ +#define NQFHHASH(f) \ + (&nqfhhashtbl[(*((u_int32_t *)(f))) & nqfhhash]) +extern LIST_HEAD(nqfhhashhead, nqlease) *nqfhhashtbl; +extern u_long nqfhhash; + +/* + * Nqnfs return status numbers. + */ +#define NQNFS_EXPIRED 500 +#define NQNFS_TRYLATER 501 + +void nqnfs_lease_check __P((struct vnode *, struct proc *, struct ucred *, int)); +void nqnfs_lease_updatetime __P((int)); +int nqsrv_getlease __P((struct vnode *, u_int32_t *, int, + struct nfssvc_sock *, struct proc *, + struct sockaddr *, int *, u_quad_t *, + struct ucred *)); +int nqnfs_getlease __P((struct vnode *,int,struct ucred *,struct proc *)); +int nqnfs_callback __P((struct nfsmount *,struct mbuf *,struct mbuf *,caddr_t)); +int nqnfs_clientd __P((struct nfsmount *,struct ucred *,struct nfsd_cargs *,int,caddr_t,struct proc *)); +struct nfsnode; +void nqnfs_clientlease __P((struct nfsmount *, struct nfsnode *, int, int, time_t, u_quad_t)); +void nqnfs_serverd __P((void)); +int nqnfsrv_getlease __P((struct nfsrv_descript *, struct nfssvc_sock *, struct proc *, struct mbuf **)); +int nqnfsrv_vacated __P((struct nfsrv_descript *, struct nfssvc_sock *, struct proc *, struct mbuf **)); +#endif + +#endif diff --git a/sys/nfs/rpcv2.h b/sys/nfs/rpcv2.h new file mode 100644 index 0000000..525df09 --- /dev/null +++ b/sys/nfs/rpcv2.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)rpcv2.h 8.2 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_RPCV2_H_ +#define _NFS_RPCV2_H_ + +/* + * Definitions for Sun RPC Version 2, from + * "RPC: Remote Procedure Call Protocol Specification" RFC1057 + */ + +/* Version # */ +#define RPC_VER2 2 + +/* Authentication */ +#define RPCAUTH_NULL 0 +#define RPCAUTH_UNIX 1 +#define RPCAUTH_SHORT 2 +#define RPCAUTH_KERB4 4 +#define RPCAUTH_NQNFS 300000 +#define RPCAUTH_MAXSIZ 400 +#define RPCVERF_MAXSIZ 12 /* For Kerb, can actually be 400 */ +#define RPCAUTH_UNIXGIDS 16 + +/* + * Constants associated with authentication flavours. + */ +#define RPCAKN_FULLNAME 0 +#define RPCAKN_NICKNAME 1 + +/* Rpc Constants */ +#define RPC_CALL 0 +#define RPC_REPLY 1 +#define RPC_MSGACCEPTED 0 +#define RPC_MSGDENIED 1 +#define RPC_PROGUNAVAIL 1 +#define RPC_PROGMISMATCH 2 +#define RPC_PROCUNAVAIL 3 +#define RPC_GARBAGE 4 /* I like this one */ +#define RPC_MISMATCH 0 +#define RPC_AUTHERR 1 + +/* Authentication failures */ +#define AUTH_BADCRED 1 +#define AUTH_REJECTCRED 2 +#define AUTH_BADVERF 3 +#define AUTH_REJECTVERF 4 +#define AUTH_TOOWEAK 5 /* Give em wheaties */ + +/* Sizes of rpc header parts */ +#define RPC_SIZ 24 +#define RPC_REPLYSIZ 28 + +/* RPC Prog definitions */ +#define RPCPROG_MNT 100005 +#define RPCMNT_VER1 1 +#define RPCMNT_VER3 3 +#define RPCMNT_MOUNT 1 +#define RPCMNT_DUMP 2 +#define RPCMNT_UMOUNT 3 +#define RPCMNT_UMNTALL 4 +#define RPCMNT_EXPORT 5 +#define RPCMNT_NAMELEN 255 +#define RPCMNT_PATHLEN 1024 +#define RPCPROG_NFS 100003 + +/* + * Structures used for RPCAUTH_KERB4. + */ +struct nfsrpc_fullverf { + u_int32_t t1; + u_int32_t t2; + u_int32_t w2; +}; + +struct nfsrpc_fullblock { + u_int32_t t1; + u_int32_t t2; + u_int32_t w1; + u_int32_t w2; +}; + +struct nfsrpc_nickverf { + u_int32_t kind; + struct nfsrpc_fullverf verf; +}; + +/* + * and their sizes in bytes.. If sizeof (struct nfsrpc_xx) != these + * constants, well then things will break in mount_nfs and nfsd. + */ +#define RPCX_FULLVERF 12 +#define RPCX_FULLBLOCK 16 +#define RPCX_NICKVERF 16 + +#ifdef NFSKERB +XXX +#else +typedef u_char NFSKERBKEY_T[2]; +typedef u_char NFSKERBKEYSCHED_T[2]; +#endif +#define NFS_KERBSRV "rcmd" /* Kerberos Service for NFS */ +#define NFS_KERBTTL (30 * 60) /* Credential ttl (sec) */ +#define NFS_KERBCLOCKSKEW (5 * 60) /* Clock skew (sec) */ +#define NFS_KERBW1(t) (*((u_long *)(&((t).dat[((t).length + 3) & ~0x3])))) +#endif diff --git a/sys/nfs/xdr_subs.h b/sys/nfs/xdr_subs.h new file mode 100644 index 0000000..82ee38a --- /dev/null +++ b/sys/nfs/xdr_subs.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)xdr_subs.h 8.3 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + + +#ifndef _NFS_XDR_SUBS_H_ +#define _NFS_XDR_SUBS_H_ + +/* + * Macros used for conversion to/from xdr representation by nfs... + * These use the MACHINE DEPENDENT routines ntohl, htonl + * As defined by "XDR: External Data Representation Standard" RFC1014 + * + * To simplify the implementation, we use ntohl/htonl even on big-endian + * machines, and count on them being `#define'd away. Some of these + * might be slightly more efficient as quad_t copies on a big-endian, + * but we cannot count on their alignment anyway. + */ + +#define fxdr_unsigned(t, v) ((t)ntohl((int32_t)(v))) +#define txdr_unsigned(v) (htonl((int32_t)(v))) + +#define fxdr_nfsv2time(f, t) \ +do { \ + (t)->tv_sec = ntohl(((struct nfsv2_time *)(f))->nfsv2_sec); \ + if (((struct nfsv2_time *)(f))->nfsv2_usec != 0xffffffff) \ + (t)->tv_nsec = 1000 * ntohl(((struct nfsv2_time *)(f))->nfsv2_usec); \ + else \ + (t)->tv_nsec = 0; \ +} while (0) +#define txdr_nfsv2time(f, t) \ +do { \ + ((struct nfsv2_time *)(t))->nfsv2_sec = htonl((f)->tv_sec); \ + if ((f)->tv_nsec != -1) \ + ((struct nfsv2_time *)(t))->nfsv2_usec = htonl((f)->tv_nsec / 1000); \ + else \ + ((struct nfsv2_time *)(t))->nfsv2_usec = 0xffffffff; \ +} while (0) + +#define fxdr_nfsv3time(f, t) \ +do { \ + (t)->tv_sec = ntohl(((struct nfsv3_time *)(f))->nfsv3_sec); \ + (t)->tv_nsec = ntohl(((struct nfsv3_time *)(f))->nfsv3_nsec); \ +} while (0) +#define txdr_nfsv3time(f, t) \ +do { \ + ((struct nfsv3_time *)(t))->nfsv3_sec = htonl((f)->tv_sec); \ + ((struct nfsv3_time *)(t))->nfsv3_nsec = htonl((f)->tv_nsec); \ +} while (0) + +#define fxdr_hyper(f) \ + ((((u_quad_t)ntohl(((u_int32_t *)(f))[0])) << 32) | \ + (u_quad_t)(ntohl(((u_int32_t *)(f))[1]))) +#define txdr_hyper(f, t) \ +do { \ + ((u_int32_t *)(t))[0] = htonl((u_int32_t)((f) >> 32)); \ + ((u_int32_t *)(t))[1] = htonl((u_int32_t)((f) & 0xffffffff)); \ +} while (0) + +#endif |