diff options
author | alfred <alfred@FreeBSD.org> | 2003-11-14 20:54:10 +0000 |
---|---|---|
committer | alfred <alfred@FreeBSD.org> | 2003-11-14 20:54:10 +0000 |
commit | 5b076fe9da6e7bfd69aca6e09e64d75d72477b92 (patch) | |
tree | 551663ad558af360faf7b32b0b3e67a667de012c | |
parent | af7f62665d5a48d63d7f4c984c6511297dbb645c (diff) | |
download | FreeBSD-src-5b076fe9da6e7bfd69aca6e09e64d75d72477b92.zip FreeBSD-src-5b076fe9da6e7bfd69aca6e09e64d75d72477b92.tar.gz |
University of Michigan's Citi NFSv4 kernel client code.
Submitted by: Jim Rees <rees@umich.edu>
37 files changed, 11249 insertions, 22 deletions
diff --git a/sys/conf/files b/sys/conf/files index 2482345..08dfcab 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1562,6 +1562,15 @@ nfsclient/nfs_nfsiod.c optional nfsclient nfsclient/nfs_vfsops.c optional nfsclient nfsclient/nfs_vnops.c optional nfsclient nfsclient/nfs_lock.c optional nfsclient +nfs4client/nfs4_socket.c optional nfsclient +nfs4client/nfs4_vfsops.c optional nfsclient +nfs4client/nfs4_vnops.c optional nfsclient +nfs4client/nfs4_subs.c optional nfsclient +nfs4client/nfs4_vfs_subs.c optional nfsclient +nfs4client/nfs4_vn_subs.c optional nfsclient +nfs4client/nfs4_dev.c optional nfsclient +nfs4client/nfs4_idmap.c optional nfsclient +rpc/rpcclnt.c optional nfsclient nfsserver/nfs_serv.c optional nfsserver nfsserver/nfs_srvsock.c optional nfsserver nfsserver/nfs_srvcache.c optional nfsserver diff --git a/sys/nfs/nfsproto.h b/sys/nfs/nfsproto.h index 8f62ab1..4c8a090 100644 --- a/sys/nfs/nfsproto.h +++ b/sys/nfs/nfsproto.h @@ -55,17 +55,19 @@ #define NFS_PROG 100003 #define NFS_VER2 2 #define NFS_VER3 3 +#define NFS_VER4 4 + #define NFS_V2MAXDATA 8192 #define NFS_MAXDGRAMDATA 16384 #define NFS_MAXDATA 32768 #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 -#define NFS_MAXPKTHDR 404 +#define NFS_MAXPKTHDR 404 /* XXXv4 this needs to be adjust for v4 */ #define NFS_MAXPACKET (NFS_MAXPKTHDR + NFS_MAXDATA) #define NFS_MINPACKET 20 #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ -/* Stat numbers for rpc returns (version 2 and 3) */ +/* Stat numbers for rpc returns (version 2, 3 and 4) */ #define NFS_OK 0 #define NFSERR_PERM 1 #define NFSERR_NOENT 2 @@ -88,7 +90,7 @@ #define NFSERR_STALE 70 #define NFSERR_REMOTE 71 /* Version 3 only */ #define NFSERR_WFLUSH 99 /* Version 2 only */ -#define NFSERR_BADHANDLE 10001 /* The rest Version 3 only */ +#define NFSERR_BADHANDLE 10001 /* The rest Version 3, 4 only */ #define NFSERR_NOT_SYNC 10002 #define NFSERR_BAD_COOKIE 10003 #define NFSERR_NOTSUPP 10004 @@ -97,8 +99,48 @@ #define NFSERR_BADTYPE 10007 #define NFSERR_JUKEBOX 10008 #define NFSERR_TRYLATER NFSERR_JUKEBOX +#define NFSERR_SAME 10009 /* The rest Version 4 only */ +#define NFSERR_DENIED 10010 +#define NFSERR_EXPIRED 10011 +#define NFSERR_LOCKED 10012 +#define NFSERR_GRACE 10013 +#define NFSERR_FHEXPIRED 10014 +#define NFSERR_SHARDE_DENIED 10015 +#define NFSERR_WRONGSEC 10016 +#define NFSERR_CLID_INUSE 10017 +#define NFSERR_RESOURCE 10018 +#define NFSERR_MOVED 10019 +#define NFSERR_NOFILEHANDLE 10020 +#define NFSERR_MINOR_VERS_MISMATCH 10021 +#define NFSERR_STALE_CLIENTID 10022 +#define NFSERR_STALE_STATEID 10023 +#define NFSERR_OLD_STATEID 10024 +#define NFSERR_BAD_STATEID 10025 +#define NFSERR_BAD_SEQID 10026 +#define NFSERR_NOT_SAME 10027 +#define NFSERR_LOCK_RANGE 10028 +#define NFSERR_SYMLINK 10029 +#define NFSERR_READDIR_NOSPC 10030 +#define NFSERR_LEASE_MOVED 10031 +#define NFSERR_ATTRNOTSUPP 10032 +#define NFSERR_NO_GRACE 10033 +#define NFSERR_RECLAIM_BAD 10034 +#define NFSERR_RECLAIM_CONFLICT 10035 +#define NFSERR_BADXDR 10036 +#define NFSERR_LOCKS_HELD 10037 +#define NFSERR_OPENMODE 10038 +#define NFSERR_BADOWNER 10039 +#define NFSERR_BADCHAR 10040 +#define NFSERR_BADNAME 10041 +#define NFSERR_BAD_RANGE 10042 +#define NFSERR_LOCK_NOTSUPP 10043 +#define NFSERR_OP_ILLEGAL 10044 +#define NFSERR_DEADLOCK 10045 +#define NFSERR_FILE_OPEN 10046 #define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */ + + #define NFSERR_RETVOID 0x20000000 /* Return void, not error */ #define NFSERR_AUTHERR 0x40000000 /* Mark an authentication error */ #define NFSERR_RETERR 0x80000000 /* Mark an error return for V3 */ @@ -128,6 +170,11 @@ #define NFSX_V3FSINFO 48 #define NFSX_V3PATHCONF 24 +/* specific to NFS Version 4 */ +#define NFSX_V4VERF 8 +#define NFSX_V4FH 128 +#define NFSX_V4STATEID 16 + /* variants for both versions */ #define NFSX_FH(v3) ((v3) ? (NFSX_V3FHMAX + NFSX_UNSIGNED) : \ NFSX_V2FH) @@ -193,6 +240,48 @@ #define NFSV2PROC_READDIR 16 #define NFSV2PROC_STATFS 17 +/* Version 4 procedure numbers */ +#define NFSV4PROC_NULL 0 +#define NFSV4PROC_COMPOUND 1 + +/* Version 4 operation numbers */ +#define NFSV4OP_ACCESS 3 +#define NFSV4OP_CLOSE 4 +#define NFSV4OP_COMMIT 5 +#define NFSV4OP_CREATE 6 +#define NFSV4OP_DELEGPURGE 7 +#define NFSV4OP_DELEGRETURN 8 +#define NFSV4OP_GETATTR 9 +#define NFSV4OP_GETFH 10 +#define NFSV4OP_LINK 11 +#define NFSV4OP_LOCK 12 +#define NFSV4OP_LOCKT 13 +#define NFSV4OP_LOCKU 14 +#define NFSV4OP_LOOKUP 15 +#define NFSV4OP_LOOKUPP 16 +#define NFSV4OP_NVERIFY 17 +#define NFSV4OP_OPEN 18 +#define NFSV4OP_OPENATTR 19 +#define NFSV4OP_OPEN_CONFIRM 20 +#define NFSV4OP_OPEN_DOWNGRADE 21 +#define NFSV4OP_PUTFH 22 +#define NFSV4OP_PUTPUBFH 23 +#define NFSV4OP_PUTROOTFH 24 +#define NFSV4OP_READ 25 +#define NFSV4OP_READDIR 26 +#define NFSV4OP_READLINK 27 +#define NFSV4OP_REMOVE 28 +#define NFSV4OP_RENAME 29 +#define NFSV4OP_RENEW 30 +#define NFSV4OP_RESTOREFH 31 +#define NFSV4OP_SAVEFH 32 +#define NFSV4OP_SECINFO 33 +#define NFSV4OP_SETATTR 34 +#define NFSV4OP_SETCLIENTID 35 +#define NFSV4OP_SETCLIENTID_CONFIRM 36 +#define NFSV4OP_VERIFY 37 +#define NFSV4OP_WRITE 38 + /* * Constants used by the Version 3 protocol for various RPCs */ @@ -220,11 +309,65 @@ #define NFSV3FSINFO_HOMOGENEOUS 0x08 #define NFSV3FSINFO_CANSETTIME 0x10 +/* + * Constants used by the Version 4 protocol for various RPCs + */ + +#define NFSV4ACCESS_READ 0x01 +#define NFSV4ACCESS_LOOKUP 0x02 +#define NFSV4ACCESS_MODIFY 0x04 +#define NFSV4ACCESS_EXTEND 0x08 +#define NFSV4ACCESS_DELETE 0x10 +#define NFSV4ACCESS_EXECUTE 0x20 + +#define NFSV4OPENRES_MLOCK 0x01 +#define NFSV4OPENRES_CONFIRM 0x02 + +#define NFSV4OPENSHARE_ACCESS_READ 0x01 +#define NFSV4OPENSHARE_ACCESS_WRITE 0x02 +#define NFSV4OPENSHARE_ACCESS_BOTH 0x03 +#define NFSV4OPENSHARE_DENY_NONE 0x00 +#define NFSV4OPENSHARE_DENY_READ 0x01 +#define NFSV4OPENSHARE_DENY_WRITE 0x02 +#define NFSV4OPENSHARE_DENY_BOTH 0x03 + /* File types */ -typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, - NFSOCK=6, NFFIFO=7 } nfstype; +typedef enum { + NFNON=0, + NFREG=1, + NFDIR=2, + NFBLK=3, + NFCHR=4, + NFLNK=5, + NFSOCK=6, + NFFIFO=7, + NFATTRDIR = 8, + NFNAMEDATTR = 9, + NFBAD = 10, +} nfstype; + +/* NFSv4 claim type */ +typedef enum { + NCLNULL = 0, + NCLPREV = 1, + NCLDELEGCUR = 2, + NCLDELEGPREV = 3, +} nfsv4cltype; + +/* Other NFSv4 types */ +typedef enum { + NSHUNSTABLE = 0, + NSHDATASYNC = 1, + NSHFILESYNC = 2, +} nfsv4stablehow; + +typedef enum { OTNOCREATE = 0, OTCREATE = 1 } nfsv4opentype; +typedef enum { CMUNCHECKED = 0, CMGUARDED = 1, CMEXCLUSIVE = 2 } nfsv4createmode; +typedef enum { THSERVERTIME = 0, THCLIENTTIME = 1 } nfsv4timehow; +typedef enum { ODNONE = 0, ODREAD = 1, ODWRITE = 2 } nfsv4opendelegtype; /* Structs for common parts of the rpc's */ + /* * File Handle (32 bytes for version 2), variable up to 64 for version 3. * File Handles of up to NFS_SMALLFH in size are stored directly in the @@ -233,7 +376,7 @@ typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, * NFS_SMALLFH should be in the range of 32 to 64 and be divisible by 4. */ #ifndef NFS_SMALLFH -#define NFS_SMALLFH 64 +#define NFS_SMALLFH 128 #endif union nfsfh { fhandle_t fh_generic; @@ -281,6 +424,22 @@ struct nfsv3_spec { typedef struct nfsv3_spec nfsv3spec; /* + * NFS Version 4 bitmap. + */ +struct nfsv4_bitmap { + uint32_t bmlen; + uint32_t *bmval; +}; +typedef struct nfsv4_bitmap nfsv4bitmap; + +struct nfsv4_changeinfo { + u_int ciatomic; + uint64_t cibefore; + uint64_t ciafter; +}; +typedef struct nfsv4_changeinfo nfsv4changeinfo; + +/* * File attributes and setable attributes. These structures cover both * NFS version 2 and the version 3 protocol. Note that the union is only * used so that one pointer can refer to both variants. These structures @@ -339,6 +498,126 @@ struct nfs_fattr { #define fa3_mtime fa_un.fa_nfsv3.nfsv3fa_mtime #define fa3_ctime fa_un.fa_nfsv3.nfsv3fa_ctime +struct nfsv4_fattr { + u_int fa4_valid; + nfstype fa4_type; + off_t fa4_size; + uint64_t fa4_fsid_major; + uint64_t fa4_fsid_minor; + uint64_t fa4_fileid; + mode_t fa4_mode; + nlink_t fa4_nlink; + uid_t fa4_uid; + gid_t fa4_gid; + uint32_t fa4_rdev_major; + uint32_t fa4_rdev_minor; + struct timespec fa4_atime; + struct timespec fa4_ctime; + struct timespec fa4_mtime; + uint64_t fa4_maxread; + uint64_t fa4_maxwrite; + uint64_t fa4_ffree; + uint64_t fa4_ftotal; + uint32_t fa4_maxname; + uint64_t fa4_savail; + uint64_t fa4_sfree; + uint64_t fa4_stotal; + uint64_t fa4_changeid; + uint32_t fa4_lease_time; + uint64_t fa4_maxfilesize; +}; + +/* Flags for fa4_valid */ +#define FA4V_SIZE 0x00000001 +#define FA4V_FSID 0x00000002 +#define FA4V_FILEID 0x00000004 +#define FA4V_MODE 0x00000008 +#define FA4V_NLINK 0x00000010 +#define FA4V_UID 0x00000020 +#define FA4V_GID 0x00000040 +#define FA4V_RDEV 0x00000080 +#define FA4V_ATIME 0x00000100 +#define FA4V_CTIME 0x00000200 +#define FA4V_MTIME 0x00000400 +#define FA4V_MAXREAD 0x00000800 +#define FA4V_MAXWRITE 0x00001000 +#define FA4V_TYPE 0x00002000 +#define FA4V_FFREE 0x00004000 +#define FA4V_FTOTAL 0x00008000 +#define FA4V_MAXNAME 0x00010000 +#define FA4V_SAVAIL 0x00020000 +#define FA4V_SFREE 0x00040000 +#define FA4V_STOTAL 0x00080000 +#define FA4V_CHANGEID 0x00100000 +#define FA4V_LEASE_TIME 0x00200000 +#define FA4V_MAXFILESIZE 0x00400000 +#define FA4V_ACL 0x00800000 + +/* Offsets into bitmask */ +#define FA4_SUPPORTED_ATTRS 0 +#define FA4_TYPE 1 +#define FA4_FH_EXPIRE_TYPE 2 +#define FA4_CHANGE 3 +#define FA4_SIZE 4 +#define FA4_LINK_SUPPORT 5 +#define FA4_SYMLINK_SUPPORT 6 +#define FA4_NAMED_ATTR 7 +#define FA4_FSID 8 +#define FA4_UNIQUE_HANDLES 9 +#define FA4_LEASE_TIME 10 +#define FA4_RDATTR_ERROR 11 +#define FA4_ACL 12 +#define FA4_ACLSUPPORT 13 +#define FA4_ARCHIVE 14 +#define FA4_CANSETTIME 15 +#define FA4_CASE_INSENSITIVE 16 +#define FA4_CASE_PRESERVING 17 +#define FA4_CHOWN_RESTRICTED 18 +#define FA4_FILEHANDLE 19 +#define FA4_FILEID 20 +#define FA4_FILES_AVAIL 21 +#define FA4_FILES_FREE 22 +#define FA4_FILES_TOTAL 23 +#define FA4_FS_LOCATIONS 24 +#define FA4_HIDDEN 25 +#define FA4_HOMOGENEOUS 26 +#define FA4_MAXFILESIZE 27 +#define FA4_MAXLINK 28 +#define FA4_MAXNAME 29 +#define FA4_MAXREAD 30 +#define FA4_MAXWRITE 31 +#define FA4_MIMETYPE 32 +#define FA4_MODE 33 +#define FA4_NO_TRUNC 34 +#define FA4_NUMLINKS 35 +#define FA4_OWNER 36 +#define FA4_OWNER_GROUP 37 +#define FA4_QUOTA_HARD 38 +#define FA4_QUOTA_SOFT 39 +#define FA4_QUOTA_USED 40 +#define FA4_RAWDEV 41 +#define FA4_SPACE_AVAIL 42 +#define FA4_SPACE_FREE 43 +#define FA4_SPACE_TOTAL 44 +#define FA4_SPACE_USED 45 +#define FA4_SYSTEM 46 +#define FA4_TIME_ACCESS 47 +#define FA4_TIME_ACCESS_SET 48 +#define FA4_TIME_BACKUP 49 +#define FA4_TIME_CREATE 50 +#define FA4_TIME_DELTA 51 +#define FA4_TIME_METADATA 52 +#define FA4_TIME_MODIFY 53 +#define FA4_TIME_MODIFY_SET 54 +#define FA4_ATTR_MAX 55 + +/* Macros for v4 fattr manipulation */ +#define FA4_SET(n, p) ((p)[(n)/32] |= (1 << ((n) % 32))) +#define FA4_CLR(n, p) ((p)[(n)/32] &= ~(1 << ((n) % 32))) +#define FA4_ISSET(n, p) ((p)[(n)/32] & (1 << ((n) % 32))) +#define FA4_ZERO(p) bzero((p), 8) +#define FA4_SKIP(p) ((p) += 2) + struct nfsv2_sattr { u_int32_t sa_mode; u_int32_t sa_uid; diff --git a/sys/nfs/rpcv2.h b/sys/nfs/rpcv2.h index 977a8d3..e65d87b 100644 --- a/sys/nfs/rpcv2.h +++ b/sys/nfs/rpcv2.h @@ -53,6 +53,7 @@ #define RPCAUTH_NULL 0 #define RPCAUTH_UNIX 1 #define RPCAUTH_SHORT 2 +#define RPCAUTH_KERB4 4 #define RPCAUTH_MAXSIZ 400 #define RPCVERF_MAXSIZ 12 /* For Kerb, can actually be 400 */ #define RPCAUTH_UNIXGIDS 16 @@ -63,19 +64,28 @@ #define RPCAKN_FULLNAME 0 #define RPCAKN_NICKNAME 1 -/* Rpc Constants */ +/* msg type */ #define RPC_CALL 0 #define RPC_REPLY 1 + +/* reply status */ #define RPC_MSGACCEPTED 0 #define RPC_MSGDENIED 1 + +/* accepted status */ +#define RPC_SUCCESS 0 #define RPC_PROGUNAVAIL 1 #define RPC_PROGMISMATCH 2 #define RPC_PROCUNAVAIL 3 #define RPC_GARBAGE 4 /* I like this one */ +#define RPC_SYSTEMERR 5 + +/* rejected status */ #define RPC_MISMATCH 0 #define RPC_AUTHERR 1 /* Authentication failures */ +#define AUTH_OK 0 #define AUTH_BADCRED 1 #define AUTH_REJECTCRED 2 #define AUTH_BADVERF 3 diff --git a/sys/nfs4client/nfs4.h b/sys/nfs4client/nfs4.h new file mode 100644 index 0000000..148c224 --- /dev/null +++ b/sys/nfs4client/nfs4.h @@ -0,0 +1,260 @@ +/* $FreeBSD$ */ +/* $Id: nfs4.h,v 1.25 2003/11/05 14:58:58 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#ifndef _NFS4CLIENT_NFS4_H +#define _NFS4CLIENT_NFS4_H + +#define NFS4_USE_RPCCLNT + +#define NFS4_MINOR_VERSION 0 +#define NFS_PORT 2049 +#define NFS4_DEF_FILE_IO_BUFFER_SIZE 4096 +#define NFS4_MAX_FILE_IO_BUFFER_SIZE 32768 +#define NFS4_DEF_MAXFILESIZE 0xffffffff +#define NFS4_SUPER_MAGIC 0xF00BA4 + +#define NFS4FS_SILLY_RENAME 1 +#define NFS4FS_STRICT_LOCKING 1 +#define NFS4FS_RETRY_OLD_STATEID 1 +#define NFS4FS_MIN_LEASE (1 * hz) +#define NFS4FS_DEFAULT_LEASE (30 * hz) +#define NFS4FS_MAX_LEASE (120 * hz) +#define NFS4FS_RETRY_MIN_DELAY (hz >> 4) +#define NFS4FS_RETRY_MAX_DELAY (hz << 3) +#define NFS4FS_SEMAPHORE_DELAY (hz >> 4) +#define NFS4FS_GRACE_DELAY (hz * 5) +#define NFS4FS_OLD_STATEID_DELAY (hz >> 3) +#define NFS4FS_OP_MAX 10 + + +#define NFS4_BUFSIZE 8192 +#define NFS4FS_MAX_IOV 10 +#define NFS4_SETCLIENTID_MAXTRIES 5 +#define NFS4_READDIR_MAXTRIES 5 +#define NFS4_MAXIO 4 +#define NFS4_MAX_REQUEST_SOFT 192 +#define NFS4_MAX_REQUEST_HARD 256 +#define NFS4_MAXCOMMIT 64 +#define NFS4_READ_DELAY (2 * HZ) +#define NFS4_WRITEBACK_DELAY (5 * HZ) +#define NFS4_WRITEBACK_LOCKDELAY (60 * HZ) +#define NFS4_COMMIT_DELAY (5 * HZ) +#define RPC_SLACK_SPACE 512 + + +struct nfs4_compound { + char *tag; + + int req_nops; + uint32_t *req_nopsp; + uint32_t *req_seqidp; + uint32_t *req_stateidp[NFS4_MAXIO]; + uint32_t req_nstateid; + + u_int seqidused; + + int rep_status; + int rep_nops; + + struct nfs4_fctx *fcp; + + struct vnode *curvp; + struct vnode *savevp; + + struct nfsmount *nmp; +}; + +struct nfs4_fdata { + struct nfsnode *fd_n; + pid_t fd_pid; +}; + +struct nfs4_oparg_putfh { + /* filled in by caller */ +/* struct dentry *dentry;*/ + + /* filled in by setup routine */ +/* nfs_opnum4 op;*/ + uint32_t fh_len; + nfsfh_t fh_val; + int nlookups; +}; + +struct nfs4_oparg_getattr { + struct vnode *vp; + nfsv4bitmap *bm; + struct nfsv4_fattr fa; +}; + +struct nfs4_oparg_getfh { + uint32_t fh_len; + nfsfh_t fh_val; + struct vnode *vp; +}; + +struct nfs4_oparg_lookup { + const char *name; + uint32_t namelen; + struct vnode *vp; +}; + +struct nfs4_oparg_setclientid { + struct nfsmount *np; + uint32_t namelen; + char *name; + char *cb_netid; + uint32_t cb_netidlen; + char *cb_univaddr; + uint32_t cb_univaddrlen; + uint32_t cb_prog; + + uint64_t clientid; + u_char verf[NFSX_V4VERF]; +}; + +struct nfs4_oparg_access { + uint32_t mode; + uint32_t rmode; + uint32_t supported; +}; + +struct nfs4_oparg_open { + uint32_t flags; + uint32_t rflags; + + nfsv4cltype ctype; + struct vattr *vap; + struct componentname *cnp; + + struct nfs4_fctx *fcp; + + char stateid[NFSX_V4STATEID]; +}; + +struct nfs4_oparg_read { + uint64_t off; + uint32_t maxcnt; + uint32_t eof; + uint32_t retlen; + struct uio *uiop; + struct nfs4_fctx *fcp; +}; + +struct nfs4_oparg_write { + uint64_t off; + uint32_t stable; + uint32_t cnt; + uint32_t retlen; + uint32_t committed; + struct uio *uiop; + u_char wverf[NFSX_V4VERF]; + struct nfs4_fctx *fcp; +}; + +struct nfs4_oparg_commit { + uint32_t len; + off_t start; + + u_char verf[NFSX_V4VERF]; +}; + +struct nfs4_oparg_readdir { + uint32_t cnt; + nfsv4bitmap *bm; + uint64_t cookie; + u_char verf[NFSX_V4VERF]; +}; + +struct nfs4_oparg_create { + nfstype type; + char *linktext; + char *name; + uint32_t namelen; + struct vattr *vap; +}; + +struct nfs4_oparg_rename { + const char *fname; + uint32_t fnamelen; + const char *tname; + uint32_t tnamelen; +}; + +struct nfs4_oparg_link { + const char *name; + uint32_t namelen; +}; + +/* + * Lockowner + */ +struct nfs4_lowner { + uint32_t lo_cnt; + uint32_t lo_seqid; + uint32_t lo_id; +}; + +#define NFS4_SEQIDMUTATINGERROR(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID)) + +/* Standard bitmasks */ +extern nfsv4bitmap nfsv4_fsinfobm; +extern nfsv4bitmap nfsv4_fsattrbm; +extern nfsv4bitmap nfsv4_getattrbm; +extern nfsv4bitmap nfsv4_readdirbm; + +vfs_init_t nfs4_init; +vfs_uninit_t nfs4_uninit; + +uint32_t nfs_v4fileid4_to_fileid(uint64_t); + +int nfs4_readrpc(struct vnode *, struct uio *, struct ucred *); +int nfs4_writerpc(struct vnode *, struct uio *, struct ucred *, int *, + int *); +int nfs4_commit(struct vnode *vp, u_quad_t offset, int cnt, + struct ucred *cred, struct thread *td); +int nfs4_readdirrpc(struct vnode *, struct uio *, struct ucred *); +int nfs4_readlinkrpc(struct vnode *, struct uio *, struct ucred *); +int nfs4_sigintr(struct nfsmount *, struct nfsreq *, struct thread *); +int nfs4_writebp(struct buf *, int, struct thread *); +int nfs4_request(struct vnode *, struct mbuf *, int, struct thread *, + struct ucred *, struct mbuf **, struct mbuf **, caddr_t *); +int nfs4_request_mnt(struct nfsmount *, struct mbuf *, int, struct thread *, + struct ucred *, struct mbuf **, struct mbuf **, caddr_t *); +int nfs4_connect(struct nfsmount *); +void nfs4_disconnect(struct nfsmount *); +void nfs4_safedisconnect(struct nfsmount *); +int nfs4_nmcancelreqs(struct nfsmount *); + +void nfs_v4initcompound(struct nfs4_compound *); +int nfs_v4postop(struct nfs4_compound *, int); +int nfs_v4handlestatus(int, struct nfs4_compound *); + +#endif /* _NFS4CLIENT_NFS4_H */ diff --git a/sys/nfs4client/nfs4_dev.c b/sys/nfs4client/nfs4_dev.c new file mode 100644 index 0000000..09c9d57 --- /dev/null +++ b/sys/nfs4client/nfs4_dev.c @@ -0,0 +1,451 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_dev.c,v 1.10 2003/11/05 14:58:59 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/queue.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/poll.h> +#include <sys/mutex.h> +#include <sys/stat.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/wait.h> +#include <sys/signalvar.h> + +#include <nfs4client/nfs4_dev.h> + +#ifdef NFS4DEVVERBOSE +#define NFS4DEV_DEBUG(X...) printf(X) +#else +#define NFS4DEV_DEBUG(X...) +#endif + +#define NFS4DEV_NAME "nfs4" +#define CDEV_MAJOR 29 /* XXX where are these numbers assigned!?!? */ +#define CDEV_MINOR 1 + +MALLOC_DEFINE(M_NFS4DEV, "NFS4 dev", "NFS4 device"); + +struct nfs4dev_upcall { + /* request msg */ + struct nfs4dev_msg up_reqmsg; + size_t up_reqmsglen; + + /* reply (payload only) */ + caddr_t up_rep; + size_t * up_replen; + + int up_copied; /* non-zero when reply has been copied to + '*up_rep' */ + + int up_error; /* non-zero if an error occured */ + + TAILQ_ENTRY(nfs4dev_upcall) up_entry; +}; + + +#define nfs4dev_upcall_get(MP) MALLOC((MP), struct nfs4dev_upcall *, sizeof(struct nfs4dev_upcall), M_NFS4DEV, M_WAITOK | M_ZERO) + +#define nfs4dev_upcall_put(MP) FREE((MP), M_NFS4DEV) + +static int nfs4dev_nopen = 0; +static struct thread * nfs4dev_reader = NULL; +static dev_t nfs4device = 0; +static struct mtx nfs4dev_daemon_mtx; + +static int nfs4dev_xid = 0; +/* queue of pending upcalls */ +TAILQ_HEAD(, nfs4dev_upcall) nfs4dev_newq; +static struct mtx nfs4dev_newq_mtx; + +/* queue of upcalls waiting for replys */ +TAILQ_HEAD(, nfs4dev_upcall) nfs4dev_waitq; +static struct mtx nfs4dev_waitq_mtx; + +/* dev hooks */ +static d_open_t nfs4dev_open; +static d_close_t nfs4dev_close; +static d_ioctl_t nfs4dev_ioctl; +static d_poll_t nfs4dev_poll; + +static struct cdevsw nfs4dev_cdevsw = { + .d_open = nfs4dev_open, + .d_close = nfs4dev_close, + .d_ioctl = nfs4dev_ioctl, + .d_poll = nfs4dev_poll, + .d_name = NFS4DEV_NAME, + .d_maj = CDEV_MAJOR +}; + +static int nfs4dev_reply(caddr_t); +static int nfs4dev_request(caddr_t); + +/* Userland requests a new operation to service */ +static int +nfs4dev_request(caddr_t addr) +{ + struct nfs4dev_upcall * u; + struct nfs4dev_msg * m = (struct nfs4dev_msg *) addr; + + mtx_lock(&nfs4dev_newq_mtx); + + if (TAILQ_EMPTY(&nfs4dev_newq)) { + mtx_unlock(&nfs4dev_newq_mtx); + return EAGAIN; + } + + u = TAILQ_FIRST(&nfs4dev_newq); + TAILQ_REMOVE(&nfs4dev_newq, u, up_entry); + mtx_unlock(&nfs4dev_newq_mtx); + + bcopy(&u->up_reqmsg, m, sizeof(struct nfs4dev_msg)); + + mtx_lock(&nfs4dev_waitq_mtx); + TAILQ_INSERT_TAIL(&nfs4dev_waitq, u, up_entry); + mtx_unlock(&nfs4dev_waitq_mtx); + + return 0; +} + +static int +nfs4dev_reply(caddr_t addr) +{ + struct nfs4dev_upcall * u; + struct nfs4dev_msg * m = (struct nfs4dev_msg *) addr; + int error; + + if (m->msg_vers != NFS4DEV_VERSION) { + printf("nfs4dev version mismatch\n"); + return EINVAL; + } + + if (m->msg_type > NFS4DEV_MAX_TYPE) { + NFS4DEV_DEBUG("nfs4dev: unsupported message type\n"); + return EINVAL; + } + + if (m->msg_len == 0 || m->msg_len > NFS4DEV_MSG_MAX_DATALEN) { + NFS4DEV_DEBUG("bad message length\n"); + return EINVAL; + } + + /* match the reply with a request */ + mtx_lock(&nfs4dev_waitq_mtx); + TAILQ_FOREACH(u, &nfs4dev_waitq, up_entry) { + if (m->msg_xid == u->up_reqmsg.msg_xid) { + if (m->msg_type == u->up_reqmsg.msg_type) + goto found; + NFS4DEV_DEBUG("nfs4dev: op type mismatch!\n"); + break; + } + } + mtx_unlock(&nfs4dev_waitq_mtx); + + NFS4DEV_DEBUG("nfs4dev msg op: %d xid: %x not found.\n", + m->msg_type, m->msg_xid); + + error = EIO; + goto bad; + +found: + TAILQ_REMOVE(&nfs4dev_waitq, u, up_entry); + mtx_unlock(&nfs4dev_waitq_mtx); + + if (m->msg_error) { + error = m->msg_error; + goto bad; + } + + if (m->msg_len > *u->up_replen) { + error = EFAULT; + goto bad; + } + + bcopy(m->msg_data, u->up_rep, m->msg_len); + *u->up_replen = m->msg_len; + + u->up_copied = m->msg_len; + wakeup(u); + + return 0; +bad: + u->up_error = error; + wakeup(u); + return error; +} + +void +nfs4dev_init(void) +{ + nfs4dev_xid = arc4random(); + TAILQ_INIT(&nfs4dev_newq); + TAILQ_INIT(&nfs4dev_waitq); + mtx_init(&nfs4dev_newq_mtx, "nfs4dev newq", NULL, MTX_DEF); + mtx_init(&nfs4dev_waitq_mtx, "nfs4dev waitq", NULL, MTX_DEF); + + mtx_init(&nfs4dev_daemon_mtx, "nfs4dev state", NULL, MTX_DEF); + + nfs4device = make_dev(&nfs4dev_cdevsw, CDEV_MINOR, (uid_t)0, (gid_t)0, + S_IRUSR | S_IWUSR, "nfs4"); +} + +void +nfs4dev_uninit(void) +{ + struct proc * dead = NULL; + + mtx_lock(&nfs4dev_daemon_mtx); + if (nfs4dev_nopen) { + if (nfs4dev_reader == NULL) { + NFS4DEV_DEBUG("nfs4dev uninit(): unregistered reader\n"); + } else { + dead = nfs4dev_reader->td_proc; + } + } + mtx_unlock(&nfs4dev_daemon_mtx); + + if (dead != NULL) { + NFS4DEV_DEBUG("nfs4dev_uninit(): you forgot to kill attached daemon (pid: %u)\n", + dead->p_pid); + PROC_LOCK(dead); + psignal(dead, SIGTERM); + PROC_UNLOCK(dead); + } + + /* XXX moot? */ + nfs4dev_purge(); + + mtx_destroy(&nfs4dev_newq_mtx); + mtx_destroy(&nfs4dev_waitq_mtx); + mtx_destroy(&nfs4dev_daemon_mtx); + + destroy_dev(nfs4device); +} + +/* device interface functions */ +static int +nfs4dev_open(dev_t dev, int flags, int fmt, d_thread_t *td) +{ + if (dev != nfs4device) + return ENODEV; + + mtx_lock(&nfs4dev_daemon_mtx); + if (nfs4dev_nopen) { + mtx_unlock(&nfs4dev_daemon_mtx); + return EBUSY; + } + + nfs4dev_nopen++; + nfs4dev_reader = curthread; + mtx_unlock(&nfs4dev_daemon_mtx); + + return (0); +} + +static int +nfs4dev_close(dev_t dev, int flags, int fmt, d_thread_t *td) +{ + struct nfs4dev_upcall * u; + + if (dev != nfs4device) + return ENODEV; + + mtx_lock(&nfs4dev_daemon_mtx); + if (!nfs4dev_nopen) { + mtx_unlock(&nfs4dev_daemon_mtx); + return ENOENT; + } + + nfs4dev_nopen--; + nfs4dev_reader = NULL; + mtx_unlock(&nfs4dev_daemon_mtx); + + mtx_lock(&nfs4dev_waitq_mtx); + + while (!TAILQ_EMPTY(&nfs4dev_waitq)) { + u = TAILQ_FIRST(&nfs4dev_waitq); + TAILQ_REMOVE(&nfs4dev_waitq, u, up_entry); + u->up_error = EINTR; + wakeup(u); + } + + mtx_unlock(&nfs4dev_waitq_mtx); + + return 0; +} + +static int +nfs4dev_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td) +{ + int error; + + if (dev != nfs4device) + return ENODEV; + + if (data == NULL) + return EFAULT; + + if (nfs4dev_reader != curthread) + nfs4dev_reader = curthread; + + switch (cmd) { + case NFS4DEVIOCGET: + error = nfs4dev_request(data); + break; + case NFS4DEVIOCPUT: + error = nfs4dev_reply(data); + break; + default: + NFS4DEV_DEBUG("nfs4dev_ioctl: unkown ioctl cmd %d\n", (int)cmd); + error = EOPNOTSUPP; + break; + } + + return error; +} + +static int +nfs4dev_poll(dev_t dev, int events, struct thread *td) +{ + int revents; + + if (dev != nfs4device) + return EINVAL; + + mtx_lock(&nfs4dev_daemon_mtx); + if (nfs4dev_nopen == 0) { + mtx_unlock(&nfs4dev_daemon_mtx); + return 0; + } + mtx_unlock(&nfs4dev_daemon_mtx); + + revents = 0; + + /* check readable data */ + mtx_lock(&nfs4dev_newq_mtx); + if (!TAILQ_EMPTY(&nfs4dev_newq)) + revents |= POLLIN; + mtx_unlock(&nfs4dev_newq_mtx); + + mtx_lock(&nfs4dev_waitq_mtx); + if (!TAILQ_EMPTY(&nfs4dev_waitq)) + revents |= POLLOUT; + mtx_unlock(&nfs4dev_waitq_mtx); + + return revents; +} + +int +nfs4dev_call(uint32_t type, caddr_t req_data, size_t req_len, caddr_t rep_data, size_t * rep_lenp) +{ + struct nfs4dev_upcall * u; + int error = 0; + unsigned int xtmp; + + mtx_lock(&nfs4dev_daemon_mtx); + if (nfs4dev_nopen == 0) { + mtx_unlock(&nfs4dev_daemon_mtx); + return EINVAL; + } + mtx_unlock(&nfs4dev_daemon_mtx); + + if (type > NFS4DEV_MAX_TYPE) + return EOPNOTSUPP; + + NFS4DEV_DEBUG("upcall %d/%d:%d\n", type, req_len, *rep_lenp); + + nfs4dev_upcall_get(u); + + u->up_error = 0; + u->up_rep = rep_data; + u->up_replen = rep_lenp; + u->up_copied = 0; + + u->up_reqmsg.msg_vers = NFS4DEV_VERSION; + /* XXX efficient copying */ + bcopy(req_data, u->up_reqmsg.msg_data, req_len); + u->up_reqmsg.msg_len = req_len; + + mtx_lock(&nfs4dev_newq_mtx); + + /* get new XID */ + while ((xtmp = arc4random() % 256) == 0); + nfs4dev_xid += xtmp; + u->up_reqmsg.msg_xid = nfs4dev_xid; + + TAILQ_INSERT_TAIL(&nfs4dev_newq, u, up_entry); + mtx_unlock(&nfs4dev_newq_mtx); + + + NFS4DEV_DEBUG("nfs4dev op: %d xid: %x sleeping\n", u->up_reqmsg.msg_type, u->up_reqmsg.msg_xid); + + do { + tsleep(u, PLOCK, "nfs4dev", 0); + } while (u->up_copied == 0 && u->up_error == 0); + + /* upcall now removed from the queue */ + + NFS4DEV_DEBUG("nfs4dev prog: %d xid: %x continues...\n", + u->up_reqmsg.msg_type, u->up_reqmsg.msg_xid); + + if (u->up_error) { + error = u->up_error; + NFS4DEV_DEBUG("nfs4dev prog: %d xid: %x error: %d\n", + u->up_reqmsg.msg_type, u->up_reqmsg.msg_xid, u->up_error); + goto out; + } + +out: + nfs4dev_upcall_put(u); + return error; +} + +void +nfs4dev_purge(void) +{ + struct nfs4dev_upcall * u; + + mtx_lock(&nfs4dev_newq_mtx); + while (!TAILQ_EMPTY(&nfs4dev_newq)) { + u = TAILQ_FIRST(&nfs4dev_newq); + TAILQ_REMOVE(&nfs4dev_newq, u, up_entry); + u->up_error = EINTR; + wakeup(u); + } + mtx_unlock(&nfs4dev_newq_mtx); + + mtx_lock(&nfs4dev_waitq_mtx); + while (!TAILQ_EMPTY(&nfs4dev_waitq)) { + u = TAILQ_FIRST(&nfs4dev_waitq); + TAILQ_REMOVE(&nfs4dev_waitq, u, up_entry); + u->up_error = EINTR; + wakeup(u); + } + mtx_unlock(&nfs4dev_waitq_mtx); +} diff --git a/sys/nfs4client/nfs4_dev.h b/sys/nfs4client/nfs4_dev.h new file mode 100644 index 0000000..218f1de --- /dev/null +++ b/sys/nfs4client/nfs4_dev.h @@ -0,0 +1,65 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_dev.h,v 1.3 2003/11/05 14:58:59 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#ifndef _NFS3_DEV_H_ +#define _NFS4_DEV_H_ + +#include <sys/ioccom.h> + + +/* type of upcall */ +#define NFS4DEV_TYPE_IDMAP 0 +#define NFS4DEV_TYPE_GSS 1 +#define NFS4DEV_MAX_TYPE 1 + +struct nfs4dev_msg { + unsigned int msg_vers; + unsigned int msg_type; + unsigned int msg_xid; + unsigned int msg_error; + + #define NFS4DEV_MSG_MAX_DATALEN 350 + size_t msg_len; + uint8_t msg_data[NFS4DEV_MSG_MAX_DATALEN]; +}; + +#define NFS4DEV_VERSION (0x3 << 16 | sizeof(struct nfs4dev_msg)) + +/* ioctl commands */ +#define NFS4DEVIOCGET _IOR('A', 0x200, struct nfs4dev_msg) +#define NFS4DEVIOCPUT _IOW('A', 0x201, struct nfs4dev_msg) + +#ifdef _KERNEL +int nfs4dev_call(uint32_t type, caddr_t req_data, size_t req_len, caddr_t rep_datap, size_t * rep_lenp); + +void nfs4dev_purge(void); + +void nfs4dev_init(void); +void nfs4dev_uninit(void); +#endif + +#endif /* _NFS4_DEV_H_ */ diff --git a/sys/nfs4client/nfs4_idmap.c b/sys/nfs4client/nfs4_idmap.c new file mode 100644 index 0000000..1b932e6 --- /dev/null +++ b/sys/nfs4client/nfs4_idmap.c @@ -0,0 +1,513 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_idmap.c,v 1.4 2003/11/05 14:58:59 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +/* TODO: + * o validate ascii + * */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/lockmgr.h> +#include <sys/fnv_hash.h> +#include <sys/proc.h> +#include <sys/syscall.h> +#include <sys/sysent.h> +#include <sys/libkern.h> + +#include <rpc/rpcclnt.h> + +#include <nfs4client/nfs4_dev.h> +#include <nfs4client/nfs4_idmap.h> + + +#ifdef IDMAPVERBOSE +#define IDMAP_DEBUG(X...) printf(X); +#else +#define IDMAP_DEBUG(X...) +#endif + +#define IDMAP_HASH_SIZE 37 + +MALLOC_DEFINE(M_IDMAP, "idmap", "idmap"); + +#define idmap_entry_get(ID) MALLOC((ID), struct idmap_entry, sizeof(struct idmap_entry), M_IDMAP, M_WAITOK | M_ZERO) +#define idmap_entry_put(ID) FREE((ID), M_IDMAP) + + + +struct idmap_entry { + struct idmap_msg id_info; + + TAILQ_ENTRY(idmap_entry) id_entry_id; + TAILQ_ENTRY(idmap_entry) id_entry_name; +}; + +struct idmap_hash { + TAILQ_HEAD(, idmap_entry) hash_name[IDMAP_HASH_SIZE]; + TAILQ_HEAD(, idmap_entry) hash_id[IDMAP_HASH_SIZE]; + + struct lock hash_lock; +}; + +#define IDMAP_RLOCK(lock) lockmgr(lock, LK_SHARED, NULL, curthread) +#define IDMAP_WLOCK(lock) lockmgr(lock, LK_EXCLUSIVE, NULL, curthread) +#define IDMAP_UNLOCK(lock) lockmgr(lock, LK_RELEASE, NULL, curthread) + + +static struct idmap_hash idmap_uid_hash; +static struct idmap_hash idmap_gid_hash; + +static struct idmap_entry * idmap_name_lookup(uint32_t, char *); +static struct idmap_entry * idmap_id_lookup(uint32_t, ident_t); +static int idmap_upcall_name(uint32_t, char *, struct idmap_entry **); +static int idmap_upcall_id(uint32_t , ident_t, struct idmap_entry ** ); +static int idmap_add(struct idmap_entry *); + +static int +idmap_upcall_name(uint32_t type, char * name, struct idmap_entry ** found) +{ + int error; + struct idmap_entry * e; + size_t len, siz; + + if (type > IDMAP_MAX_TYPE || type == 0) { + IDMAP_DEBUG("bad type %d\n", type); + return EINVAL; /* XXX */ + } + + if (name == NULL || (len = strlen(name)) == 0 || len > IDMAP_MAXNAMELEN) { + IDMAP_DEBUG("idmap_upcall_name: bad name\n"); + return EFAULT; /* XXX */ + } + + MALLOC(e, struct idmap_entry *, sizeof(struct idmap_entry), M_IDMAP, + M_WAITOK | M_ZERO); + + e->id_info.id_type = type; + bcopy(name, e->id_info.id_name, len); + e->id_info.id_namelen = len; + + + siz = sizeof(struct idmap_msg); + error = nfs4dev_call(NFS4DEV_TYPE_IDMAP, (caddr_t)&e->id_info, siz, + (caddr_t)&e->id_info, &siz); + + if (error) { + IDMAP_DEBUG("error %d in nfs4dev_upcall()\n", error); + *found = NULL; + return error; + } + + if (siz != sizeof(struct idmap_msg)) { + IDMAP_DEBUG("bad size of returned message\n"); + *found = NULL; + return EFAULT; + } + + + *found = e; + return 0; +} + +static int +idmap_upcall_id(uint32_t type, ident_t id, struct idmap_entry ** found) +{ + int error; + struct idmap_entry * e; + size_t siz; + + if (type > IDMAP_MAX_TYPE) + panic("bad type"); /* XXX */ + + MALLOC(e, struct idmap_entry *, sizeof(struct idmap_entry), M_IDMAP, + M_WAITOK | M_ZERO); + + e->id_info.id_type = type; + e->id_info.id_namelen = 0; /* should already */ + e->id_info.id_id = id; + + siz = sizeof(struct idmap_msg); + error = nfs4dev_call(NFS4DEV_TYPE_IDMAP, (caddr_t)&e->id_info, siz, + (caddr_t)&e->id_info, &siz); + + if (error) { + IDMAP_DEBUG("error %d in nfs4dev_upcall()\n", error); + *found = NULL; + return error; + } + + if (siz != sizeof(struct idmap_msg)) { + IDMAP_DEBUG("bad size of returned message\n"); + *found = NULL; + return EFAULT; + } + + *found = e; + return 0; +} + +static int +idmap_hashf(struct idmap_entry *e, uint32_t * hval_id, uint32_t * hval_name) +{ + switch (e->id_info.id_type) { + case IDMAP_TYPE_UID: + *hval_id = e->id_info.id_id.uid % IDMAP_HASH_SIZE; + break; + case IDMAP_TYPE_GID: + *hval_id = e->id_info.id_id.gid % IDMAP_HASH_SIZE; + break; + default: + /* XXX yikes! */ + panic("hashf: bad type!"); + break; + } + + if (e->id_info.id_namelen == 0) + /* XXX */ panic("hashf: bad name"); + + *hval_name = fnv_32_str(e->id_info.id_name, FNV1_32_INIT) % IDMAP_HASH_SIZE; + return 0; +} + +static int +idmap_add(struct idmap_entry * e) +{ + struct idmap_hash * hash; + uint32_t hval_id, hval_name; + + if (e == NULL) + panic("idmap_add null"); + + if (e->id_info.id_namelen == 0) + panic("idmap_add name of len 0"); + + switch (e->id_info.id_type) { + case IDMAP_TYPE_UID: + hash = &idmap_uid_hash; + break; + case IDMAP_TYPE_GID: + hash = &idmap_gid_hash; + break; + default: + /* XXX yikes */ + panic("idmap add: bad type!"); + break; + } + + if (idmap_hashf(e, &hval_id, &hval_name) != 0) { + IDMAP_DEBUG("idmap_hashf fails!\n"); + return -1; + } + + IDMAP_WLOCK(&hash->hash_lock); + + TAILQ_INSERT_TAIL(&hash->hash_id[hval_id], e, id_entry_id); + TAILQ_INSERT_TAIL(&hash->hash_name[hval_name], e, id_entry_name); + + IDMAP_UNLOCK(&hash->hash_lock); + + return 0; +} + +static struct idmap_entry * +idmap_id_lookup(uint32_t type, ident_t id) +{ + struct idmap_hash * hash; + uint32_t hval; + struct idmap_entry * e; + + switch (type) { + case IDMAP_TYPE_UID: + hash = &idmap_uid_hash; + hval = id.uid % IDMAP_HASH_SIZE; + break; + case IDMAP_TYPE_GID: + hash = &idmap_gid_hash; + hval = id.gid % IDMAP_HASH_SIZE; + break; + default: + /* XXX yikes */ + panic("lookup: bad type!"); + break; + } + + + IDMAP_RLOCK(&hash->hash_lock); + + TAILQ_FOREACH(e, &hash->hash_id[hval], id_entry_name) { + if ((type == IDMAP_TYPE_UID && e->id_info.id_id.uid == id.uid)|| + (type == IDMAP_TYPE_GID && e->id_info.id_id.gid == id.gid)) { + IDMAP_UNLOCK(&hash->hash_lock); + return e; + } + } + + IDMAP_UNLOCK(&hash->hash_lock); + return NULL; +} + +static struct idmap_entry * +idmap_name_lookup(uint32_t type, char * name) +{ + struct idmap_hash * hash; + uint32_t hval; + struct idmap_entry * e; + size_t len; + + switch (type) { + case IDMAP_TYPE_UID: + hash = &idmap_uid_hash; + break; + case IDMAP_TYPE_GID: + hash = &idmap_gid_hash; + break; + default: + /* XXX yikes */ + panic("lookup: bad type!"); + break; + } + + len = strlen(name); + + if (len == 0 || len > IDMAP_MAXNAMELEN) { + IDMAP_DEBUG("bad name length %d\n", len); + return NULL; + } + + hval = fnv_32_str(name, FNV1_32_INIT) % IDMAP_HASH_SIZE; + + IDMAP_RLOCK(&hash->hash_lock); + + TAILQ_FOREACH(e, &hash->hash_name[hval], id_entry_name) { + if ((strlen(e->id_info.id_name) == strlen(name)) && strncmp(e->id_info.id_name, name, strlen(name)) == 0) { + IDMAP_UNLOCK(&hash->hash_lock); + return e; + } + } + + IDMAP_UNLOCK(&hash->hash_lock); + return NULL; +} + +void +idmap_init(void) +{ + unsigned int i; + + for (i=0; i<IDMAP_HASH_SIZE; i++) { + TAILQ_INIT(&idmap_uid_hash.hash_name[i]); + TAILQ_INIT(&idmap_uid_hash.hash_id[i]); + + TAILQ_INIT(&idmap_gid_hash.hash_name[i]); + TAILQ_INIT(&idmap_gid_hash.hash_id[i]); + } + + lockinit(&idmap_uid_hash.hash_lock, PLOCK, "idmap uid hash table", 0,0); + lockinit(&idmap_gid_hash.hash_lock, PLOCK, "idmap gid hash table", 0,0); + +} + +void idmap_uninit(void) +{ + struct idmap_entry * e; + int i; + + lockdestroy(&idmap_uid_hash.hash_lock); + lockdestroy(&idmap_gid_hash.hash_lock); + + for (i=0; i<IDMAP_HASH_SIZE; i++) { + while(!TAILQ_EMPTY(&idmap_uid_hash.hash_name[i])) { + e = TAILQ_FIRST(&idmap_uid_hash.hash_name[i]); + TAILQ_REMOVE(&idmap_uid_hash.hash_name[i], e, id_entry_name); + TAILQ_REMOVE(&idmap_uid_hash.hash_id[i], e, id_entry_id); + FREE(e, M_IDMAP); + } + + while(!TAILQ_EMPTY(&idmap_gid_hash.hash_name[i])) { + e = TAILQ_FIRST(&idmap_gid_hash.hash_name[i]); + TAILQ_REMOVE(&idmap_gid_hash.hash_name[i], e, id_entry_name); + TAILQ_REMOVE(&idmap_gid_hash.hash_id[i], e, id_entry_id); + FREE(e, M_IDMAP); + } + + } +} + +int +idmap_uid_to_name(uid_t uid, char ** name, size_t * len) +{ + struct idmap_entry * e; + int error = 0; + ident_t id; + + id.uid = uid; + + + if ((e = idmap_id_lookup(IDMAP_TYPE_UID, id)) == NULL) { + if ((error = idmap_upcall_id(IDMAP_TYPE_UID, id, &e)) != 0) { + IDMAP_DEBUG("error in upcall\n"); + return error; + } + + if (e == NULL) { + IDMAP_DEBUG("no error from upcall, but no data returned\n"); + return EFAULT; + } + + if (idmap_add(e) != 0) { + IDMAP_DEBUG("idmap_add failed\n"); + FREE(e, M_IDMAP); + } + } + + *name = e->id_info.id_name; + *len = e->id_info.id_namelen; + return 0; +} + +int +idmap_gid_to_name(gid_t gid, char ** name, size_t * len) +{ + struct idmap_entry * e; + int error = 0; + ident_t id; + + id.gid = gid; + + + if ((e = idmap_id_lookup(IDMAP_TYPE_GID, id)) == NULL) { + if ((error = idmap_upcall_id(IDMAP_TYPE_GID, (ident_t)id, &e))) { + IDMAP_DEBUG("error in upcall\n"); + return error; + } + + if (e == NULL) { + IDMAP_DEBUG("no error from upcall, but no data returned\n"); + return EFAULT; + } + + if (idmap_add(e) != 0) { + IDMAP_DEBUG("idmap_add failed\n"); + FREE(e, M_IDMAP); + } + } + + *name = e->id_info.id_name; + *len = e->id_info.id_namelen; + return 0; +} + +int +idmap_name_to_uid(char * name, size_t len, uid_t * id) +{ + struct idmap_entry * e; + int error = 0; + char * namestr; + + if (name == NULL ) + return EFAULT; + + if (len == 0 || len > IDMAP_MAXNAMELEN) { + IDMAP_DEBUG("idmap_name_to_uid: bad len\n"); + return EINVAL; + } + + /* XXX hack */ + MALLOC(namestr, char *, len + 1, M_TEMP, M_WAITOK); + bcopy(name, namestr, len); + namestr[len] = '\0'; + + + if ((e = idmap_name_lookup(IDMAP_TYPE_UID, namestr)) == NULL) { + if ((error = idmap_upcall_name(IDMAP_TYPE_UID, namestr, &e))) { + FREE(namestr, M_TEMP); + return error; + } + + if (e == NULL) { + IDMAP_DEBUG("no error from upcall, but no data returned\n"); + FREE(namestr, M_TEMP); + return EFAULT; + } + + if (idmap_add(e) != 0) { + IDMAP_DEBUG("idmap_add failed\n"); + FREE(e, M_IDMAP); + } + } + + *id = e->id_info.id_id.uid; + FREE(namestr, M_TEMP); + return 0; +} + +int +idmap_name_to_gid(char * name, size_t len, gid_t * id) +{ + struct idmap_entry * e; + int error = 0; + + char * namestr; + + if (name == NULL ) + return EFAULT; + + if (len == 0 || len > IDMAP_MAXNAMELEN) { + IDMAP_DEBUG("idmap_name_to_uid: bad len\n"); + return EINVAL; + } + + /* XXX hack */ + MALLOC(namestr, char *, len + 1, M_TEMP, M_WAITOK); + bcopy(name, namestr, len); + namestr[len] = '\0'; + + + if ((e = idmap_name_lookup(IDMAP_TYPE_GID, namestr)) == NULL) { + if ((error = idmap_upcall_name(IDMAP_TYPE_GID, namestr, &e)) != 0) { + IDMAP_DEBUG("error in upcall\n"); + FREE(namestr, M_TEMP); + return error; + } + + if (e == NULL) { + IDMAP_DEBUG("no error from upcall, but no data returned\n"); + FREE(namestr, M_TEMP); + return EFAULT; + } + + if (idmap_add(e) != 0) { + IDMAP_DEBUG("idmap_add failed\n"); + FREE(e, M_IDMAP); + } + } + + *id = e->id_info.id_id.gid; + FREE(namestr, M_TEMP); + return 0; +} diff --git a/sys/nfs4client/nfs4_idmap.h b/sys/nfs4client/nfs4_idmap.h new file mode 100644 index 0000000..86cd01b --- /dev/null +++ b/sys/nfs4client/nfs4_idmap.h @@ -0,0 +1,64 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_idmap.h,v 1.2 2003/11/05 14:58:59 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#ifndef __NFS4_IDMAP_H__ +#define __NFS4_IDMAP_H__ + +#define IDMAP_TYPE_GID 1 +#define IDMAP_TYPE_UID 2 +#define IDMAP_MAX_TYPE 2 + +typedef union { + uid_t uid; + gid_t gid; +} ident_t; + +#define IDMAP_MAXNAMELEN 249 +struct idmap_msg { + uint32_t id_type; + ident_t id_id; + size_t id_namelen; + char id_name[IDMAP_MAXNAMELEN + 1]; +}; + + +#ifdef _KERNEL +MALLOC_DECLARE(M_IDMAP); + +void idmap_init(void); +void idmap_uninit(void); + +int idmap_gid_to_name(gid_t id, char ** name, size_t * len); +int idmap_uid_to_name(uid_t id, char ** name, size_t * len); + +int idmap_name_to_gid(char *, size_t len, gid_t *); +int idmap_name_to_uid(char *, size_t len, uid_t *); + +#endif /* _KERNEL */ + + +#endif /* __NFS4_GSS_H__ */ diff --git a/sys/nfs4client/nfs4_socket.c b/sys/nfs4client/nfs4_socket.c new file mode 100644 index 0000000..f997d52 --- /dev/null +++ b/sys/nfs4client/nfs4_socket.c @@ -0,0 +1,340 @@ +/* $FreeBSD$ */ +/* $Id: nfs_socket.c,v 1.12 2003/11/05 14:59:01 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +/* + * Copyright (c) 1989, 1991, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * Socket operations for use by nfs + */ + +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/protosw.h> +#include <sys/signalvar.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/syslog.h> +#include <sys/vnode.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include <rpc/rpcclnt.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfsclient/nfs.h> +#include <nfs4client/nfs4.h> +#include <nfs/xdr_subs.h> +#include <nfsclient/nfsm_subs.h> +#include <nfsclient/nfsmount.h> + +#ifdef NFS4_USE_RPCCLNT +#include <rpc/rpcclnt.h> +#include <rpc/rpcm_subs.h> +#endif + +#ifdef NFS4_USE_RPCCLNT +static struct rpc_program nfs_program = { + NFS_PROG, NFS_VER4, "NFSv4" +}; +#endif + + +int +nfs4_connect(struct nfsmount *nmp) +{ + struct rpcclnt * rpc = &nmp->nm_rpcclnt; + struct rpc_auth * auth; + int flag = 0; + int error; + + /* XXX hack! */ +#ifdef __OpenBSD__ + struct proc * td = curproc; +#else + struct thread * td = curthread; +#endif + + MALLOC(auth, struct rpc_auth *, sizeof(struct rpc_auth), M_TEMP, M_WAITOK); + auth->auth_type = RPCAUTH_UNIX; + + /* translate nfs flags -> rpcclnt flags */ + if (nmp->nm_flag & NFSMNT_SOFT) + flag |= RPCCLNT_SOFT; + + if (nmp->nm_flag & NFSMNT_INT) + flag |= RPCCLNT_INT; + + if (nmp->nm_flag & NFSMNT_NOCONN) + flag |= RPCCLNT_NOCONN; + + if (nmp->nm_flag & NFSMNT_DUMBTIMR) + flag |= RPCCLNT_DUMBTIMR; + + /* rpc->rc_servername = nmp->nm_mountp->mnt_stat.f_mntfromname; */ + + error = rpcclnt_setup(rpc, &nfs_program, nmp->nm_nam, nmp->nm_sotype, + nmp->nm_soproto, auth, + /* XXX: check nmp->nm_flag to make sure these are set */ + (nmp->nm_rsize > nmp->nm_readdirsize) ? nmp->nm_rsize : nmp->nm_readdirsize, + nmp->nm_wsize, flag); + + /* set deadthresh, timeo, retry */ + rpc->rc_deadthresh = nmp->nm_deadthresh; + rpc->rc_timeo = nmp->nm_timeo; + rpc->rc_retry = nmp->nm_retry; + + + if (error) + return error; + + return rpcclnt_connect(rpc, td); +} + +/* + * NFS disconnect. Clean up and unlink. + */ +void +nfs4_disconnect(struct nfsmount *nmp) +{ + rpcclnt_disconnect(&nmp->nm_rpcclnt); +} + +void +nfs4_safedisconnect(struct nfsmount *nmp) +{ + rpcclnt_safedisconnect(&nmp->nm_rpcclnt); +} + +/* + * nfs_request - goes something like this + * - fill in request struct + * - links it into list + * - calls nfs_send() for first transmit + * - calls nfs_receive() to get reply + * - break down rpc header and return with nfs reply pointed to + * by mrep or error + * nb: always frees up mreq mbuf list + */ +/* XXX overloaded before */ +#define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */ + +int +nfs4_request(struct vnode *vp, struct mbuf *mrest, int procnum, + struct thread *td, struct ucred *cred, struct mbuf **mrp, + struct mbuf **mdp, caddr_t *dposp) +{ + int error; + u_int32_t *tl; + struct nfsmount * nmp = VFSTONFS(vp->v_mount); + struct rpcclnt * clnt = &nmp->nm_rpcclnt; + struct mbuf *md, *mrep; + caddr_t dpos; + struct rpc_reply reply; + + if ((error = rpcclnt_request(clnt, mrest, procnum, td, cred, + &reply)) != 0) { + goto out; + } + + /* XXX: don't free mrest if an error occured, to allow caller to retry*/ + m_freem(mrest); + mrep = reply.mrep; + md = reply.result_md; + dpos = reply.result_dpos; + + tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + error = fxdr_unsigned(int, *tl); + + #if 0 + if ((nmp->nm_flag & NFSMNT_NFSV3) && + error == NFSERR_TRYLATER) { + m_freem(mrep); + error = 0; + waituntil = time_second + trylater_delay; + while (time_second < waituntil) + (void) tsleep(&lbolt, PSOCK, "nqnfstry", 0); + trylater_delay *= nfs_backoff[trylater_cnt]; + if (trylater_cnt < NFS_NBACKOFF - 1) + trylater_cnt++; + goto tryagain; + } + #endif + + /* + ** If the File Handle was stale, invalidate the + ** lookup cache, just in case. + **/ + if (error == ESTALE) + cache_purge(vp); + goto out; + } + + *mrp = mrep; + *mdp = md; + *dposp = dpos; + return (0); +nfsmout: +out: + m_freem(reply.mrep); + *mrp = NULL; + *mdp = NULL; + return (error); +} + + +int +nfs4_request_mnt(struct nfsmount *nmp, struct mbuf *mrest, int procnum, + struct thread *td, struct ucred *cred, struct mbuf **mrp, + struct mbuf **mdp, caddr_t *dposp) +{ + int error; + u_int32_t *tl; + struct rpcclnt * clnt = &nmp->nm_rpcclnt; + struct mbuf *md, *mrep; + caddr_t dpos; + struct rpc_reply reply; + + if ((error = rpcclnt_request(clnt, mrest, procnum, td, cred, + &reply)) != 0) { + goto out; + } + + /* XXX: don't free mrest if an error occured, to allow caller to retry*/ + m_freem(mrest); + mrep = reply.mrep; + md = reply.result_md; + dpos = reply.result_dpos; + + tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + error = fxdr_unsigned(int, *tl); +#if 0 + if ((nmp->nm_flag & NFSMNT_NFSV3) && + error == NFSERR_TRYLATER) { + m_freem(mrep); + error = 0; + waituntil = time_second + trylater_delay; + while (time_second < waituntil) + (void) tsleep(&lbolt, PSOCK, "nqnfstry", 0); + trylater_delay *= nfs_backoff[trylater_cnt]; + if (trylater_cnt < NFS_NBACKOFF - 1) + trylater_cnt++; + goto tryagain; + } +#endif + goto out; + } + + *mrp = mrep; + *mdp = md; + *dposp = dpos; + return (0); +nfsmout: +out: + m_freem(reply.mrep); + *mrp = NULL; + *mdp = NULL; + return (error); +} + + +/* + * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and + * wait for all requests to complete. This is used by forced unmounts + * to terminate any outstanding RPCs. + */ +int +nfs4_nmcancelreqs(nmp) + struct nfsmount *nmp; +{ + return rpcclnt_cancelreqs(&nmp->nm_rpcclnt); +} + +/* + * Test for a termination condition pending on the process. + * This is used for NFSMNT_INT mounts. + */ +int +nfs4_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td) +{ + if (rep != NULL) { + printf("nfs_sigintr: attempting to use nfsreq != NULL\n"); + return EINTR; + } + return rpcclnt_sigintr(&nmp->nm_rpcclnt, NULL, td); +} diff --git a/sys/nfs4client/nfs4_subs.c b/sys/nfs4client/nfs4_subs.c new file mode 100644 index 0000000..22ccfa8 --- /dev/null +++ b/sys/nfs4client/nfs4_subs.c @@ -0,0 +1,1352 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_subs.c,v 1.52 2003/11/05 14:58:59 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#include <sys/cdefs.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/malloc.h> +#include <sys/sysent.h> +#include <sys/syscall.h> +#include <sys/sysproto.h> +#include <sys/fcntl.h> + +#include <machine/stdarg.h> + +#include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/uma.h> + +#include <rpc/rpcclnt.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfsclient/nfs.h> +#include <nfs4client/nfs4.h> +#include <nfsclient/nfsnode.h> +#include <nfsclient/nfsmount.h> +#include <nfs/xdr_subs.h> +#include <nfsclient/nfsm_subs.h> + +#include <nfs4client/nfs4_dev.h> +#include <nfs4client/nfs4_idmap.h> +#include <nfs4client/nfs4m_subs.h> + +#include <netinet/in.h> + +#define NFSM_DISSECT(s) do { \ + tl = nfsm_dissect_xx((s), md, dpos); \ + if (tl == NULL) { \ + printf("NFSM_DISSECT error; allocation (%s/%d) (%s:%d)\n", #s, s, __FILE__, __LINE__); \ + return (EBADRPC); \ + } \ +} while (0) + +#define NFSM_ADV(s) do { \ + t1 = nfsm_adv_xx((s), md, dpos); \ + if (t1 != 0) { \ + printf("NFSM_ADV error; allocation (%s/%d) (%s:%d)\n", #s, s, __FILE__, __LINE__); \ + return (EBADRPC); \ + } \ +} while (0) + +#define NFSM_MTOTIME(t) do { \ + NFSM_DISSECT(3 * NFSX_UNSIGNED); \ + (t).tv_sec = fxdr_hyper(tl); \ + tl += 2; \ + (t).tv_nsec = fxdr_unsigned(long, *tl++); \ +} while (0) + +static uint32_t __fsinfo_bm[2], __fsattr_bm[2], __getattr_bm[2], __readdir_bm[2]; + +nfsv4bitmap nfsv4_fsinfobm = { 2, __fsinfo_bm }; +nfsv4bitmap nfsv4_fsattrbm = { 2, __fsattr_bm }; +nfsv4bitmap nfsv4_getattrbm = { 2, __getattr_bm }; +nfsv4bitmap nfsv4_readdirbm = { 2, __readdir_bm }; + +/* Helper routines */ +int nfsm_v4build_attrs_xx(struct vattr *, struct mbuf **, caddr_t *); +int nfsm_v4dissect_changeinfo_xx(nfsv4changeinfo *, struct mbuf **, caddr_t *); + +void +nfsm_v4init(void) +{ + + /* Set up bitmasks */ + FA4_SET(FA4_FSID, __fsinfo_bm); + FA4_SET(FA4_MAXREAD, __fsinfo_bm); + FA4_SET(FA4_MAXWRITE, __fsinfo_bm); + FA4_SET(FA4_LEASE_TIME, __fsinfo_bm); + + FA4_SET(FA4_FSID, __fsattr_bm); + FA4_SET(FA4_FILES_FREE, __fsattr_bm); + FA4_SET(FA4_FILES_TOTAL, __fsattr_bm); + FA4_SET(FA4_SPACE_AVAIL, __fsattr_bm); + FA4_SET(FA4_SPACE_FREE, __fsattr_bm); + FA4_SET(FA4_SPACE_TOTAL, __fsattr_bm); + + FA4_SET(FA4_TYPE, __getattr_bm); + FA4_SET(FA4_FSID, __getattr_bm); + FA4_SET(FA4_SIZE, __getattr_bm); + FA4_SET(FA4_MODE, __getattr_bm); + FA4_SET(FA4_RAWDEV, __getattr_bm); + FA4_SET(FA4_NUMLINKS, __getattr_bm); + FA4_SET(FA4_OWNER, __getattr_bm); + FA4_SET(FA4_OWNER_GROUP, __getattr_bm); + FA4_SET(FA4_FILEID, __getattr_bm); + FA4_SET(FA4_TIME_MODIFY, __getattr_bm); + FA4_SET(FA4_TIME_ACCESS, __getattr_bm); +/* FA4_SET(FA4_TIME_CREATE, __getattr_bm);*/ + + FA4_SET(FA4_TYPE, __readdir_bm); + FA4_SET(FA4_FSID, __readdir_bm); + FA4_SET(FA4_FILEID, __readdir_bm); +} + +/* + * Util + */ + +uint32_t +nfs_v4fileid4_to_fileid(uint64_t fid) +{ + return ((uint32_t)((fid >> 32) | fid)); +} + +void +nfs_v4initcompound(struct nfs4_compound *cp) +{ + bzero(cp, sizeof(*cp)); +} + +/* + * Build/dissect XDR buffer with a format string. + * + * u - unsigned + * h - hyper + * s - stringlength, string + * k - skip length (bytes) + * a - arraylength, componentlenght, array + * o - opaque fix length + * O - opaque var length in bytes + */ + +void +nfsm_buildf_xx(struct mbuf **mb, caddr_t *bpos, char *fmt, ...) +{ + uint32_t *tl, t1, len, uval; + uint64_t hval; + va_list args; + char *p, *which; + + va_start(args, fmt); + for (which = fmt; *which != '\0'; which++) + switch (*which) { + case 'u': /* Unsigned */ + tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); + uval = va_arg(args, uint32_t); + *tl++ = txdr_unsigned(uval); + break; + case 'h': /* Hyper */ + tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); + hval = va_arg(args, uint64_t); + txdr_hyper(hval, tl); + break; + case 'o': /* Fixed-length opaque */ + len = va_arg(args, uint32_t); + p = va_arg(args, char *); + tl = nfsm_build_xx(nfsm_rndup(len), mb, bpos); + bcopy(p, tl, len); + break; + case 'O': /* Variable-length opaque */ + case 's': /* String */ + len = va_arg(args, uint32_t); + p = va_arg(args, char *); + t1 = nfsm_strtom_xx(p, len, len, mb, bpos); + break; + case 'k': /* Skip */ + len = va_arg(args, uint32_t); + nfsm_build_xx(nfsm_rndup(len), mb, bpos); + break; + default: + panic("Invalid buildf string %s[%c]", fmt, *which); + break; + } + va_end(args); +} + +int +nfsm_dissectf_xx(struct mbuf **md, caddr_t *dpos, char *fmt, ...) +{ + uint32_t *tl, t1, len, *uval; + uint64_t *hval; + va_list args; + char *p, *which; + + va_start(args, fmt); + for (which = fmt; *which != '\0'; which++) + switch (*which) { + case 'u': /* Unsigned */ + tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); + if (tl == NULL) + return (EBADRPC); + uval = va_arg(args, uint32_t *); + *uval = fxdr_unsigned(uint32_t, *tl++); + break; + case 'h': /* Hyper */ + tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL) + return (EBADRPC); + hval = va_arg(args, uint64_t *); + *hval = fxdr_hyper(tl); + break; + case 'o': /* Fixed-length opaque */ + len = va_arg(args, uint32_t); + p = va_arg(args, void *); + tl = nfsm_dissect_xx(nfsm_rndup(len), md, dpos); + if (tl == NULL) + return (EBADRPC); + bcopy(tl, p, len); + break; + case 'O': /* Variable-length opaque */ + case 's': /* String */ + len = va_arg(args, uint32_t); + p = va_arg(args, char *); + tl = nfsm_dissect_xx(nfsm_rndup(len), md, dpos); + if (tl == NULL) + return (EBADRPC); + bcopy(tl, p, len); + break; + case 'k': /* Skip bytes */ + len = va_arg(args, uint32_t); + t1 = nfsm_adv_xx(nfsm_rndup(len), md, dpos); + break; + default: + panic("Invalid dissectf string %s[%c]", fmt, *which); + break; + } + va_end(args); + + return (0); +} + +/* + * XXX - There are a few problems with the way the postops are places + * in the code. Ideally, they should be taken care of immediately, as + * to avoid uneceesary waits for mutexes, but then we would be + * introducing even more complexity by having to handle two separate + * cases. Also, since they are placed at the end of the vnops', there + * may be operations which sleep in between, further extending this + * wait. It is conceivable that there is a deadlock condition there, + * too. + * + * Also, for vnops that do multiple operations, it's inconvenient + * since on error, individual decoding will got nfsmout. + */ + +int +nfs_v4postop(struct nfs4_compound *cp, int status) +{ + struct nfs4_fctx *fcp = cp->fcp; + + /* + * XXX does the previous result need to be stores with the + * lockowner? ack, spec is unclear .. + */ + + if (fcp != NULL) + if (cp->seqidused < cp->rep_nops || + (cp->seqidused + 1 == cp->rep_nops && + NFS4_SEQIDMUTATINGERROR(status))) + fcp->lop->lo_seqid++; + + return (status); +} + +int +nfs_v4handlestatus(int status, struct nfs4_compound *cp) +{ + return (status); +} + +/* + * Initial setup of compound. + */ + +int +nfsm_v4build_compound_xx(struct nfs4_compound *cp, char *tag, + struct mbuf **mb, caddr_t *bpos) +{ + uint32_t t1, *tl, siz; + + /* Tag */ + siz = strlen(tag); + t1 = nfsm_rndup(siz) + NFSX_UNSIGNED; + if (t1 <= M_TRAILINGSPACE(*mb)) { + tl = nfsm_build_xx(t1, mb, bpos); + *tl++ = txdr_unsigned(siz); + *(tl + ((t1 >> 2) - 2)) = 0; + bcopy(tag, tl, siz); + } else { + t1 = nfsm_strtmbuf(mb, bpos, (const char *)tag, siz); + if (t1 != 0) + return (t1); + } + + /* Minor version and argarray*/ + tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); + *tl++ = txdr_unsigned(NFS4_MINOR_VERSION); + /* Save for backfill */ + cp->req_nopsp = tl; + *tl = txdr_unsigned(0); + + cp->curvp = NULL; + cp->savevp = NULL; + + return (0); +} + +/* + * XXX + * - backfill for stateid, and such + */ +int +nfsm_v4build_finalize_xx(struct nfs4_compound *cp, struct mbuf **mb, caddr_t *bpos) +{ + *cp->req_nopsp = txdr_unsigned(cp->req_nops); + + return (0); +} + +int +nfsm_v4build_putfh_xx(struct nfs4_compound *cp, struct vnode *vp, + struct mbuf **mb, caddr_t *bpos) +{ + uint32_t t1; + + /* Op */ + nfsm_buildf_xx(mb, bpos, "u", NFSV4OP_PUTFH); + + /* FH */ + t1 = nfsm_fhtom_xx(vp, 1, mb, bpos); + if (t1 != 0) + return (t1); + + cp->req_nops++; + cp->curvp = vp; + + return (0); +} + +int +nfsm_v4build_putfh_nv_xx(struct nfs4_compound *cp, struct nfs4_oparg_getfh *gfh, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uuo", + NFSV4OP_PUTFH, + gfh->fh_len, + gfh->fh_len, + &gfh->fh_val); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_simple_xx(struct nfs4_compound *cp, uint32_t op, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "u", op); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_getattr_xx(struct nfs4_compound *cp, struct nfs4_oparg_getattr *ga, + struct mbuf **mb, caddr_t *bpos) +{ + int i; + + /* Op + bitmap length + bitmap */ + nfsm_buildf_xx(mb, bpos, "uu", NFSV4OP_GETATTR, ga->bm->bmlen); + for (i = 0; i < ga->bm->bmlen; i++) + nfsm_buildf_xx(mb, bpos, "u", ga->bm->bmval[i]); + + ga->vp = cp->curvp; + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_setattr_xx(struct nfs4_compound *cp, struct vattr *vap, + struct nfs4_fctx *fcp, struct mbuf **mb, caddr_t *bpos) +{ + int error; + + nfsm_buildf_xx(mb, bpos, "uo", + NFSV4OP_SETATTR, + NFSX_V4STATEID, fcp->stateid); + error = nfsm_v4build_attrs_xx(vap, mb, bpos); + if (error == 0) + cp->req_nops++; + + return (error); +} + +int +nfsm_v4build_getfh_xx(struct nfs4_compound *cp, struct nfs4_oparg_getfh *gfh, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "u", NFSV4OP_GETFH); + + gfh->vp = cp->curvp; + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_lookup_xx(struct nfs4_compound *cp, struct nfs4_oparg_lookup *l, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "us", NFSV4OP_LOOKUP, l->namelen, l->name); + + cp->curvp = l->vp; + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_setclientid_xx(struct nfs4_compound *cp, + struct nfs4_oparg_setclientid *sci, struct mbuf **mb, caddr_t *bpos) +{ + struct timeval tv; + + microtime(&tv); + + nfsm_buildf_xx(mb, bpos, "uuusussu", + NFSV4OP_SETCLIENTID, + tv.tv_sec, tv.tv_usec, + sci->namelen, sci->name, + sci->cb_prog, + sci->cb_netidlen, sci->cb_netid, + sci->cb_univaddrlen, sci->cb_univaddr, + 0xCA11BACC); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_setclientid_confirm_xx(struct nfs4_compound *cp, + struct nfs4_oparg_setclientid *sci, struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uho", + NFSV4OP_SETCLIENTID_CONFIRM, + sci->clientid, + sizeof(sci->verf), sci->verf); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_open_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, + struct mbuf **mb, caddr_t *bpos) +{ + int error = 0; + struct nfs4_lowner *lop = op->fcp->lop; + + nfsm_buildf_xx(mb, bpos, "uuuuhuu", + NFSV4OP_OPEN, + lop->lo_seqid, + op->flags & O_ACCMODE, + NFSV4OPENSHARE_DENY_NONE, + cp->nmp->nm_clientid, + 4, lop->lo_id); + + if (op->flags & O_CREAT) { + nfsm_buildf_xx(mb, bpos, "u", OTCREATE); + /* openflag4: mode */ + nfsm_buildf_xx(mb, bpos, "u", CMUNCHECKED); + /* openflag4: createattrs... */ + if (op->vap != NULL) { + if (op->flags & O_TRUNC) + op->vap->va_size = 0; + error = nfsm_v4build_attrs_xx(op->vap, mb, bpos); + if (error != 0) + return (error); + } else + nfsm_buildf_xx(mb, bpos, "uu", 0, 0); + } else + nfsm_buildf_xx(mb, bpos, "u", OTNOCREATE); + + nfsm_buildf_xx(mb, bpos, "us", op->ctype, + op->cnp->cn_namelen, op->cnp->cn_nameptr); + + cp->seqidused = cp->req_nops++; + cp->fcp = op->fcp; + + return (error); +} + +/* + * XXX + * - Wait on recovery + */ +int +nfsm_v4build_open_confirm_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uou", + NFSV4OP_OPEN_CONFIRM, + NFSX_V4STATEID, op->fcp->stateid, + op->fcp->lop->lo_seqid); + + cp->seqidused = cp->req_nops++; + cp->fcp = op->fcp; + + return (0); +} + +/* + * XXX + * - Wait on recovery + */ +int +nfsm_v4build_close_xx(struct nfs4_compound *cp, struct nfs4_fctx *fcp, + struct mbuf **mb, caddr_t *bpos) +{ + struct nfs4_lowner *lop = fcp->lop; + + nfsm_buildf_xx(mb, bpos, "uuo", + NFSV4OP_CLOSE, + lop->lo_seqid, + NFSX_V4STATEID, fcp->stateid); + + cp->seqidused = cp->req_nops++; + cp->fcp = fcp; + + return (0); +} + +int +nfsm_v4build_access_xx(struct nfs4_compound *cp, struct nfs4_oparg_access *acc, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uu", NFSV4OP_ACCESS, acc->mode); + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_read_xx(struct nfs4_compound *cp, struct nfs4_oparg_read *r, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uohu", + NFSV4OP_READ, + NFSX_V4STATEID, r->fcp->stateid, + r->off, + r->maxcnt); + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_write_xx(struct nfs4_compound *cp, struct nfs4_oparg_write *w, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uohuu", + NFSV4OP_WRITE, + NFSX_V4STATEID, w->fcp->stateid, + w->off, + w->stable, + w->cnt); + cp->req_nops++; + return (nfsm_uiotombuf(w->uiop, mb, w->cnt, bpos)); +} + +int +nfsm_v4build_commit_xx(struct nfs4_compound *cp, struct nfs4_oparg_commit *c, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uhu", NFSV4OP_COMMIT, c->start, c->len); + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_readdir_xx(struct nfs4_compound *cp, struct nfs4_oparg_readdir *r, + struct mbuf **mb, caddr_t *bpos) +{ + int i; + + nfsm_buildf_xx(mb, bpos, "uhouuu", + NFSV4OP_READDIR, + r->cookie, + sizeof(r->verf), r->verf, + r->cnt >> 4, /* meaningless "dircount" field */ + r->cnt, + r->bm->bmlen); + + for (i = 0; i < r->bm->bmlen; i++) + nfsm_buildf_xx(mb, bpos, "u", r->bm->bmval[i]); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_renew_xx(struct nfs4_compound *cp, uint64_t cid, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uh", NFSV4OP_RENEW, cid); + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_create_xx(struct nfs4_compound *cp, struct nfs4_oparg_create *c, + struct mbuf **mb, caddr_t *bpos) +{ + uint32_t t1; + + nfsm_buildf_xx(mb, bpos, "uu", NFSV4OP_CREATE, c->type); + + if (c->type == NFLNK) + /* XXX strlen */ + nfsm_buildf_xx(mb, bpos, "s", strlen(c->linktext), c->linktext); + else if (c->type == NFCHR || c->type == NFBLK) + nfsm_buildf_xx(mb, bpos, "uu", + umajor(c->vap->va_rdev), uminor(c->vap->va_rdev)); + + /* Name */ + nfsm_buildf_xx(mb, bpos, "s", c->namelen, c->name); + + /* Attributes */ + t1 = nfsm_v4build_attrs_xx(c->vap, mb, bpos); + if (t1 != 0) + return (t1); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_rename_xx(struct nfs4_compound *cp, struct nfs4_oparg_rename *r, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "uss", NFSV4OP_RENAME, r->fnamelen, r->fname, + r->tnamelen, r->tname); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_link_xx(struct nfs4_compound *cp, struct nfs4_oparg_link *l, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "us", NFSV4OP_LINK, l->namelen, l->name); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_remove_xx(struct nfs4_compound *cp, const char *name, u_int namelen, + struct mbuf **mb, caddr_t *bpos) +{ + nfsm_buildf_xx(mb, bpos, "us", NFSV4OP_REMOVE, namelen, name); + + cp->req_nops++; + + return (0); +} + +int +nfsm_v4build_attrs_xx(struct vattr *vap, struct mbuf **mb, caddr_t *bpos) +{ + uint32_t *tl, *attrlenp, *bmvalp, siz, len; + + tl = nfsm_build_xx(4 * NFSX_UNSIGNED, mb, bpos); + + *tl++ = txdr_unsigned(2); /* bitmap length */ + bmvalp = tl; + bzero(bmvalp, 8); + tl += 2; + attrlenp = tl; + + len = 0; + if (vap->va_size != VNOVAL) { + tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); + FA4_SET(FA4_SIZE, bmvalp); + txdr_hyper(vap->va_size, tl); tl += 2; + len += 2 * NFSX_UNSIGNED; + } + if (vap->va_mode != (u_short)VNOVAL) { + tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); + FA4_SET(FA4_MODE, bmvalp); + *tl++ = txdr_unsigned(vap->va_mode); + len += NFSX_UNSIGNED; + } + if (vap->va_uid != VNOVAL) { + int error; + char *name; + error = idmap_uid_to_name(vap->va_uid, &name, &siz); + if (error || name == NULL || siz == 0) { + /* XXX */ + siz = sizeof("nobody") - 1; + tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, + bpos); + *tl++ = txdr_unsigned(siz); + bcopy("nobody", tl, siz); + len += NFSX_UNSIGNED + nfsm_rndup(siz); + } else { + tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, + bpos); + *tl++ = txdr_unsigned(siz); + bcopy(name, tl, siz); + len += NFSX_UNSIGNED + nfsm_rndup(siz); + } + FA4_SET(FA4_OWNER, bmvalp); + } + if (vap->va_gid != VNOVAL) { + int error; + char *name; + error = idmap_gid_to_name(vap->va_gid, &name, &siz); + if (error || name == NULL || siz == 0) { + /* XXX */ + siz = sizeof("nogroup") - 1; + tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, + bpos); + *tl++ = txdr_unsigned(siz); + bcopy("nogroup", tl, siz); + len += NFSX_UNSIGNED + nfsm_rndup(siz); + } else { + tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, + bpos); + *tl++ = txdr_unsigned(siz); + bcopy(name, tl, siz); + len += NFSX_UNSIGNED + nfsm_rndup(siz); + } + FA4_SET(FA4_OWNER_GROUP, bmvalp); + } + if (vap->va_atime.tv_sec != VNOVAL) { + uint64_t val = vap->va_atime.tv_sec; + tl = nfsm_build_xx(4 * NFSX_UNSIGNED, mb, bpos); + FA4_SET(FA4_TIME_ACCESS_SET, bmvalp); + *tl++ = txdr_unsigned(THCLIENTTIME); + txdr_hyper(val, tl); tl += 2; + *tl++ = txdr_unsigned(vap->va_atime.tv_nsec); + len += 4 * NFSX_UNSIGNED; + } + if (vap->va_mtime.tv_sec != VNOVAL) { + uint64_t val = vap->va_mtime.tv_sec; + tl = nfsm_build_xx(4 * NFSX_UNSIGNED, mb, bpos); + FA4_SET(FA4_TIME_MODIFY_SET, bmvalp); + *tl++ = txdr_unsigned(THCLIENTTIME); + txdr_hyper(val, tl); tl += 2; + *tl++ = txdr_unsigned(vap->va_mtime.tv_nsec); + len += 4 * NFSX_UNSIGNED; + } + + bmvalp[0] = txdr_unsigned(bmvalp[0]); + bmvalp[1] = txdr_unsigned(bmvalp[1]); + + *attrlenp = txdr_unsigned(len); + + return (0); +} + +int +nfsm_v4dissect_compound_xx(struct nfs4_compound *cp, struct mbuf **md, caddr_t *dpos) +{ + uint32_t taglen, t1, *tl; + + tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); + if (tl == NULL) + return (EBADRPC); + + /* Reply status is handled by the RPC code */ + + taglen = fxdr_unsigned(uint32_t, *tl++); + t1 = nfsm_adv_xx(nfsm_rndup(taglen), md, dpos); + if (t1 != 0) + return (EBADRPC); + + tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); + if (tl == NULL) + return (EBADRPC); + + cp->rep_nops = fxdr_unsigned(uint32_t, *tl++); + + return (0); +} + +int +nfsm_v4dissect_simple_xx(struct nfs4_compound *cp, uint32_t op, + uint32_t skipbytes, struct mbuf **md, caddr_t *dpos) +{ + uint32_t t1, dop, status; + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &dop, &status); + if (t1 != 0) + return (t1); + + if (dop != op || status != 0) + return (EBADRPC); + + if (skipbytes > 0) + NFSM_ADV(nfsm_rndup(skipbytes)); + + return (0); +} + +int +nfsm_v4dissect_getattr_xx(struct nfs4_compound *cp, struct nfs4_oparg_getattr *ga, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl; + + tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_GETATTR || + *tl++ != 0) + return (EBADRPC); + + return (nfsm_v4dissect_attrs_xx(&ga->fa, md, dpos)); +} + +int +nfsm_v4dissect_setattr_xx(struct nfs4_compound *cp, struct mbuf **md, caddr_t *dpos) +{ + uint32_t t1, op, bmlen, status; + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); + if (t1 != 0) + return (t1); + + if (op != NFSV4OP_SETATTR || status != 0) + return (EBADRPC); + + t1 = nfsm_dissectf_xx(md, dpos, "u", &bmlen); + if (t1 != 0) + return (t1); + + return (nfsm_dissectf_xx(md, dpos, "k", bmlen << 2)); +} + +int +nfsm_v4dissect_getfh_xx(struct nfs4_compound *cp, struct nfs4_oparg_getfh *gfh, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl, len, xdrlen; + + tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_GETFH) + return (EBADRPC); + + if (*tl++ != 0) + return (EBADRPC); + + NFSM_DISSECT(NFSX_UNSIGNED); + len = fxdr_unsigned(uint32_t, *tl++); + if (len > NFSX_V4FH) + return (EBADRPC); + + /* XXX integrate this into nfs_mtofh()? */ + + gfh->fh_len = len; + xdrlen = nfsm_rndup(len); + + NFSM_DISSECT(xdrlen); + bcopy(tl, &gfh->fh_val, xdrlen); + + return (0); +} + +int +nfsm_v4dissect_setclientid_xx(struct nfs4_compound *cp, + struct nfs4_oparg_setclientid *sci, struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl; + + tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_SETCLIENTID) + return (EBADRPC); + + /* Handle NFS4ERR_CLID_INUSE specially */ + if (*tl++ != 0) + return (EBADRPC); + + NFSM_DISSECT(2 * NFSX_UNSIGNED); + sci->clientid = fxdr_hyper(tl); + + NFSM_DISSECT(nfsm_rndup(NFSX_V4VERF)); + bcopy(tl, sci->verf, NFSX_V4VERF); + + return (0); +} + +int +nfsm_v4dissect_close_xx(struct nfs4_compound *cp, struct nfs4_fctx *fcp, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl, t1; + + tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_CLOSE || + *tl++ != 0) + return (EBADRPC); + + /* Copy stateid */ + t1 = nfsm_dissectf_xx(md, dpos, "o", NFSX_V4STATEID, fcp->stateid); + if (t1 != 0) + return (t1); + + return (0); +} + +int +nfsm_v4dissect_access_xx(struct nfs4_compound *cp, struct nfs4_oparg_access *acc, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl; + + tl = nfsm_dissect_xx(4 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_ACCESS || + *tl++ != 0) + return (EBADRPC); + + acc->supported = fxdr_unsigned(uint32_t, *tl++); + acc->rmode = fxdr_unsigned(uint32_t, *tl++); + + return (0); +} + +int +nfsm_v4dissect_open_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl, t1, bmlen, delegtype = ODNONE; + int error = 0; + nfsv4changeinfo cinfo; + struct nfs4_fctx *fcp = op->fcp; + + tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_OPEN || + *tl++ != 0) + return (EBADRPC); + + t1 = nfsm_dissectf_xx(md, dpos, "o", NFSX_V4STATEID, fcp->stateid); + if (t1 != 0) + return (t1); + + error = nfsm_v4dissect_changeinfo_xx(&cinfo, md, dpos); + if (error != 0) + goto nfsmout; + + NFSM_DISSECT(2 * NFSX_UNSIGNED); + + op->rflags = fxdr_unsigned(uint32_t, *tl++); + bmlen = fxdr_unsigned(uint32_t, *tl++); + if (bmlen > 2) { + error = EBADRPC; + goto nfsmout; + } + + /* Skip */ + NFSM_ADV(nfsm_rndup(bmlen << 2)); + + NFSM_DISSECT(NFSX_UNSIGNED); + delegtype = fxdr_unsigned(uint32_t, *tl++); + switch (delegtype) { + case ODREAD: + case ODWRITE: + printf("nfs4: client delegation not yet supported\n"); + error = EOPNOTSUPP; + goto nfsmout; + break; + case ODNONE: + default: + break; + } + + nfsmout: + return (error); +} + +int +nfsm_v4dissect_open_confirm_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl; + + tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); + if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_OPEN_CONFIRM || + *tl++ != 0) + return (EBADRPC); + + return nfsm_dissectf_xx(md, dpos, "o", NFSX_V4STATEID, op->fcp->stateid); +} + +int +nfsm_v4dissect_read_xx(struct nfs4_compound *cp, struct nfs4_oparg_read *r, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t op, status, t1; + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); + if (t1 != 0) + return (t1); + + if (op != NFSV4OP_READ || status != 0) + return (EBADRPC); + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &r->eof, &r->retlen); + if (t1 != 0) + return (t1); + + return (nfsm_mbuftouio(md, r->uiop, r->retlen, dpos)); +} + +int +nfsm_v4dissect_write_xx(struct nfs4_compound *cp, struct nfs4_oparg_write *w, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t op, status, t1; + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); + if (t1 != 0) + return (t1); + + if (op != NFSV4OP_WRITE || status != 0) + return (EBADRPC); + + return (nfsm_dissectf_xx(md, dpos, "uuo", &w->retlen, &w->committed, + NFSX_V4VERF, w->wverf)); +} + +int +nfsm_v4dissect_commit_xx(struct nfs4_compound *cp, struct nfs4_oparg_commit *c, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t t1, op, status; + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); + if (t1 != 0) + return (t1); + + if (op != NFSV4OP_COMMIT || status != 0) + return (EBADRPC); + + return (nfsm_dissectf_xx(md, dpos, "o", NFSX_V4VERF, c->verf)); +} + +int +nfsm_v4dissect_create_xx(struct nfs4_compound *cp, struct nfs4_oparg_create *c, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t t1, *tl, op, status, bmlen; + nfsv4changeinfo ci; + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); + if (t1 != 0) + return (t1); + + if (op != NFSV4OP_CREATE || status != 0) + return (EBADRPC); + + /* Just throw this away for now */ + t1 = nfsm_v4dissect_changeinfo_xx(&ci, md, dpos); + if (t1 != 0) + return (t1); + + /* Throw this away too */ + NFSM_DISSECT(NFSX_UNSIGNED); + bmlen = fxdr_unsigned(uint32_t, *tl++); + NFSM_DISSECT(bmlen * NFSX_UNSIGNED); + tl += bmlen; + + return 0; +} + +int +nfsm_v4dissect_readlink_xx(struct nfs4_compound *cp, struct uio *uiop, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t t1, *tl, op, status, linklen; + + t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); + if (t1 != 0) + return (t1); + + if (op != NFSV4OP_READLINK || status != 0) + return (EBADRPC); + + /* Do this one manually for careful checking of sizes. */ + NFSM_DISSECT(NFSX_UNSIGNED); + linklen = fxdr_unsigned(uint32_t, *tl++); + if (linklen <= 0) + return (EBADRPC); + + return (nfsm_mbuftouio(md, uiop, MIN(linklen, uiop->uio_resid), dpos)); +} + +int +nfsm_v4dissect_changeinfo_xx(nfsv4changeinfo *ci, + struct mbuf **md, caddr_t *dpos) +{ + uint32_t *tl; + + NFSM_DISSECT(5 * NFSX_UNSIGNED); + + ci->ciatomic = fxdr_unsigned(uint32_t, *tl++); + ci->cibefore = fxdr_hyper(tl); tl += 2; + ci->ciafter = fxdr_hyper(tl); tl += 2; + + return (0); +} + +int +nfsm_v4dissect_attrs_xx(struct nfsv4_fattr *fa, struct mbuf **md, caddr_t *dpos) +{ + uint32_t t1, *tl, bmlen, bmval[2], attrlen, len = 0; + + /* Bitmap length + value */ + NFSM_DISSECT(NFSX_UNSIGNED); + + bmlen = fxdr_unsigned(uint32_t, *tl++); + if (bmlen > 2) + return (EBADRPC); + + if (bmlen == 0) + return (0); + + NFSM_DISSECT(nfsm_rndup(bmlen << 2) + NFSX_UNSIGNED); + + bmval[0] = bmlen > 0 ? fxdr_unsigned(uint32_t, *tl++) : 0; + bmval[1] = bmlen > 1 ? fxdr_unsigned(uint32_t, *tl++) : 0; + + /* Attribute length */ + attrlen = fxdr_unsigned(uint32_t, *tl++); + + /* + * XXX check for correct (<=) attributes mask return from + * server. need to pass this in. + */ + + if (FA4_ISSET(FA4_TYPE, bmval)) { + /* overflow check */ + NFSM_DISSECT(NFSX_UNSIGNED); + fa->fa4_type = fxdr_unsigned(uint32_t, *tl++); + fa->fa4_valid |= FA4V_TYPE; + len += NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_CHANGE, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_changeid = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_CHANGEID; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_SIZE, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_size = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_SIZE; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_FSID, bmval)) { + NFSM_DISSECT(4 * NFSX_UNSIGNED); + fa->fa4_fsid_major = fxdr_hyper(tl); tl += 2; + fa->fa4_fsid_minor = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_SIZE; + len += 4 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_LEASE_TIME, bmval)) { + NFSM_DISSECT(NFSX_UNSIGNED); + fa->fa4_lease_time = fxdr_unsigned(uint32_t, *tl++); + fa->fa4_valid |= FA4V_LEASE_TIME; + len += NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_FILEID, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_fileid = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_FILEID; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_FILES_FREE, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_ffree = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_FFREE; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_FILES_TOTAL, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_ftotal = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_FTOTAL; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_MAXFILESIZE, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_maxfilesize = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_MAXFILESIZE; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_MAXNAME, bmval)) { + NFSM_DISSECT(NFSX_UNSIGNED); + fa->fa4_maxname = fxdr_unsigned(uint32_t, *tl++); + fa->fa4_valid |= FA4V_MAXNAME; + len += NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_MAXREAD, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_maxread = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_MAXREAD; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_MAXWRITE, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_maxwrite = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_MAXWRITE; + len += 2 * NFSX_UNSIGNED; + } + + if (FA4_ISSET(FA4_MODE, bmval)) { + NFSM_DISSECT(NFSX_UNSIGNED); + fa->fa4_mode = fxdr_unsigned(mode_t, *tl++); + fa->fa4_valid |= FA4V_MODE; + len += NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_NUMLINKS, bmval)) { + NFSM_DISSECT(NFSX_UNSIGNED); + fa->fa4_nlink = fxdr_unsigned(nlink_t, *tl++); + fa->fa4_valid |= FA4V_NLINK; + len += NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_OWNER, bmval)) { + uint32_t ownerlen; + int error; + + NFSM_DISSECT(NFSX_UNSIGNED); + + ownerlen = fxdr_unsigned(uint32_t, *tl++); + NFSM_DISSECT(nfsm_rndup(ownerlen)); + error = idmap_name_to_uid((char *)tl, ownerlen, &fa->fa4_uid); + if (error) + fa->fa4_uid = -2; + fa->fa4_valid |= FA4V_UID; + len += NFSX_UNSIGNED + nfsm_rndup(ownerlen); + } + if (FA4_ISSET(FA4_OWNER_GROUP, bmval)) { + uint32_t ownergrouplen; + int error; + + NFSM_DISSECT(NFSX_UNSIGNED); + ownergrouplen = fxdr_unsigned(uint32_t, *tl++); + NFSM_DISSECT(nfsm_rndup(ownergrouplen)); + error = idmap_name_to_gid((char *)tl, ownergrouplen, &fa->fa4_gid); + if (error) + fa->fa4_gid = -2; + fa->fa4_valid |= FA4V_GID; + len += NFSX_UNSIGNED + nfsm_rndup(ownergrouplen); + } + if (FA4_ISSET(FA4_RAWDEV, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_rdev_major = fxdr_unsigned(uint32_t, *tl++); + fa->fa4_rdev_minor = fxdr_unsigned(uint32_t, *tl++); + fa->fa4_valid |= FA4V_RDEV; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_SPACE_AVAIL, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_savail = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_SAVAIL; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_SPACE_FREE, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_sfree = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_SFREE; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_SPACE_TOTAL, bmval)) { + NFSM_DISSECT(2 * NFSX_UNSIGNED); + fa->fa4_stotal = fxdr_hyper(tl); + fa->fa4_valid |= FA4V_STOTAL; + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_SPACE_USED, bmval)) { + NFSM_ADV(2 * NFSX_UNSIGNED); + len += 2 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_TIME_ACCESS, bmval)) { + NFSM_MTOTIME(fa->fa4_atime); + fa->fa4_valid |= FA4V_ATIME; + len += 3 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_TIME_CREATE, bmval)) { + NFSM_MTOTIME(fa->fa4_ctime); + fa->fa4_valid |= FA4V_CTIME; + len += 3 * NFSX_UNSIGNED; + } + if (FA4_ISSET(FA4_TIME_MODIFY, bmval)) { + NFSM_MTOTIME(fa->fa4_mtime); + fa->fa4_valid |= FA4V_MTIME; + len += 3 * NFSX_UNSIGNED; + } + + if (len != attrlen) + return (EBADRPC); + + return (0); +} diff --git a/sys/nfs4client/nfs4_vfs.h b/sys/nfs4client/nfs4_vfs.h new file mode 100644 index 0000000..18772be3 --- /dev/null +++ b/sys/nfs4client/nfs4_vfs.h @@ -0,0 +1,34 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_vfs.h,v 1.4 2003/11/05 14:59:00 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#ifndef _NFS4CLIENT_NFS4_VFS_H +#define _NFS4CLIENT_NFS4_VFS_H + +void nfs4_vfsop_fsinfo(struct nfsv4_fattr *, struct nfsmount *); +void nfs4_vfsop_statfs(struct nfsv4_fattr *, struct statfs *, struct mount *); + +#endif /* _NFS4CLIENT_NFS4_VFS_H */ diff --git a/sys/nfs4client/nfs4_vfs_subs.c b/sys/nfs4client/nfs4_vfs_subs.c new file mode 100644 index 0000000..ecd8b1d --- /dev/null +++ b/sys/nfs4client/nfs4_vfs_subs.c @@ -0,0 +1,150 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_vfs_subs.c,v 1.5 2003/11/05 14:59:00 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sockio.h> +#include <sys/vnode.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/uma.h> + +#include <net/if.h> +#include <net/route.h> +#include <netinet/in.h> + +#include <rpc/rpcclnt.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfsclient/nfs.h> +#include <nfsclient/nfsmount.h> +#include <nfs/xdr_subs.h> +#include <nfsclient/nfsm_subs.h> +#include <nfsclient/nfsdiskless.h> + +/* NFSv4 */ +#include <nfs4client/nfs4.h> +#include <nfs4client/nfs4m_subs.h> +#include <nfs4client/nfs4_vfs.h> + +#include <nfsclient/nfsnode.h> + +static int nfs_iosize(struct nfsmount *nmp); + +static int +nfs_iosize(struct nfsmount *nmp) +{ + int iosize; + + /* + * Calculate the size used for io buffers. Use the larger + * of the two sizes to minimise nfs requests but make sure + * that it is at least one VM page to avoid wasting buffer + * space. + */ + iosize = max(nmp->nm_rsize, nmp->nm_wsize); + if (iosize < PAGE_SIZE) iosize = PAGE_SIZE; + return iosize; +} + +void +nfs4_vfsop_fsinfo(struct nfsv4_fattr *fap, struct nfsmount *nmp) +{ + uint32_t max; + + if (fap->fa4_valid & FA4V_FSID) { + nmp->nm_fsid.val[0] = fap->fa4_fsid_major; + nmp->nm_fsid.val[1] = fap->fa4_fsid_minor; + } + if (fap->fa4_valid & FA4V_MAXREAD) { + max = fap->fa4_maxread; + if (max < nmp->nm_rsize) { + nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); + if (nmp->nm_rsize == 0) + nmp->nm_rsize = max; + } + if (max < nmp->nm_readdirsize) { + nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); + if (nmp->nm_readdirsize == 0) + nmp->nm_readdirsize = max; + } + } + if (fap->fa4_valid & FA4V_MAXWRITE) { + max = fap->fa4_maxwrite; + if (max < nmp->nm_wsize) { + nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); + if (nmp->nm_wsize == 0) + nmp->nm_wsize = max; + } + } + if (fap->fa4_valid & FA4V_LEASE_TIME) + nmp->nm_lease_time = fap->fa4_lease_time; + + /* nmp->nm_flag |= NFSMNT_GOTFSINFO; */ +} + +void +nfs4_vfsop_statfs(struct nfsv4_fattr *fap, struct statfs *sbp, struct mount *mp) +{ + struct nfsmount *nmp = VFSTONFS(mp); + + sbp->f_flags = nmp->nm_flag; + sbp->f_iosize = nfs_iosize(nmp); + sbp->f_bsize = NFS_FABLKSIZE; + + if (fap->fa4_valid & FA4V_FSID) { + sbp->f_fsid.val[0] = fap->fa4_fsid_major; + sbp->f_fsid.val[1] = fap->fa4_fsid_minor; + } + + sbp->f_ffree = fap->fa4_valid & FA4V_FFREE ? fap->fa4_ffree : 0; + /* sbp->f_ftotal = fa->fa4_valid & FA4_FTOTAL ? fa->fa4_ftotal : 0; */ + sbp->f_bavail = fap->fa4_valid & FA4V_SAVAIL ? + fap->fa4_savail / NFS_FABLKSIZE : 500000; + sbp->f_bfree = fap->fa4_valid & FA4V_SFREE ? + fap->fa4_sfree / NFS_FABLKSIZE : 500000; + sbp->f_blocks = fap->fa4_valid & FA4V_STOTAL ? + fap->fa4_stotal / NFS_FABLKSIZE : 1000000; + + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } +} diff --git a/sys/nfs4client/nfs4_vfsops.c b/sys/nfs4client/nfs4_vfsops.c new file mode 100644 index 0000000..fe168f8 --- /dev/null +++ b/sys/nfs4client/nfs4_vfsops.c @@ -0,0 +1,938 @@ +/* $FreeBSD$ */ +/* $Id: nfs_vfsops.c,v 1.38 2003/11/05 14:59:01 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +/* + * Copyright (c) 1989, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_bootp.h" +#include "opt_nfsroot.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sockio.h> +#include <sys/sysctl.h> +#include <sys/unistd.h> +#include <sys/vnode.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/uma.h> + +#include <net/if.h> +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_var.h> + +#include <rpc/rpcclnt.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfsclient/nfs.h> +#include <nfs4client/nfs4.h> +#include <nfsclient/nfsnode.h> +#include <nfsclient/nfsmount.h> +#include <nfs/xdr_subs.h> +#include <nfsclient/nfsm_subs.h> +#include <nfsclient/nfsdiskless.h> + +#include <nfs4client/nfs4m_subs.h> +#include <nfs4client/nfs4_vfs.h> + +#include <nfs4client/nfs4_dev.h> +#include <nfs4client/nfs4_idmap.h> + +SYSCTL_NODE(_vfs, OID_AUTO, nfs4, CTLFLAG_RW, 0, "NFS4 filesystem"); +SYSCTL_STRUCT(_vfs_nfs4, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, + &nfsstats, nfsstats, "S,nfsstats"); +#ifdef NFS_DEBUG +int nfs_debug; +SYSCTL_INT(_vfs_nfs4, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); +#endif + +static void nfs_decode_args(struct nfsmount *nmp, struct nfs_args *argp); +static void nfs4_daemon(void *arg); +static int mountnfs(struct nfs_args *, struct mount *, + struct sockaddr *, char *, char *, struct vnode **, + struct ucred *cred); +static int nfs4_do_setclientid(struct nfsmount *nmp, struct ucred *cred); +static vfs_mount_t nfs_mount; +static vfs_unmount_t nfs_unmount; +static vfs_root_t nfs_root; +static vfs_statfs_t nfs_statfs; +static vfs_sync_t nfs_sync; + +/* + * nfs vfs operations. + */ +static struct vfsops nfs_vfsops = { + .vfs_init = nfs4_init, + .vfs_mount = nfs_mount, + .vfs_root = nfs_root, + .vfs_statfs = nfs_statfs, + .vfs_sync = nfs_sync, + .vfs_uninit = nfs4_uninit, + .vfs_unmount = nfs_unmount, +}; +VFS_SET(nfs_vfsops, nfs4, VFCF_NETWORK); + +/* So that loader and kldload(2) can find us, wherever we are.. */ +MODULE_VERSION(nfs4, 1); + +void nfsargs_ntoh(struct nfs_args *); + +#ifdef NFS4DISKLESS +/* + * This structure must be filled in by a primary bootstrap or bootstrap + * server for a diskless/dataless machine. It is initialized below just + * to ensure that it is allocated to initialized data (.data not .bss). + */ +struct nfs_diskless nfs_diskless = { { { 0 } } }; +struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; +int nfs_diskless_valid = 0; + +SYSCTL_INT(_vfs_nfs4, OID_AUTO, diskless_valid, CTLFLAG_RD, + &nfs_diskless_valid, 0, ""); + +SYSCTL_STRING(_vfs_nfs4, OID_AUTO, diskless_rootpath, CTLFLAG_RD, + nfsv3_diskless.root_hostnam, 0, ""); + +SYSCTL_OPAQUE(_vfs_nfs4, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, + &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, + "%Ssockaddr_in", ""); + +SYSCTL_STRING(_vfs_nfs4, OID_AUTO, diskless_swappath, CTLFLAG_RD, + nfsv3_diskless.swap_hostnam, 0, ""); + +SYSCTL_OPAQUE(_vfs_nfs4, OID_AUTO, diskless_swapaddr, CTLFLAG_RD, + &nfsv3_diskless.swap_saddr, sizeof nfsv3_diskless.swap_saddr, + "%Ssockaddr_in",""); + +static int nfs_mountdiskless(char *, char *, int, + struct sockaddr_in *, struct nfs_args *, + struct thread *, struct vnode **, struct mount **); +static void nfs_convert_diskless(void); +static void nfs_convert_oargs(struct nfs_args *args, + struct onfs_args *oargs); + +static void +nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) +{ + + args->version = NFS_ARGSVERSION; + args->addr = oargs->addr; + args->addrlen = oargs->addrlen; + args->sotype = oargs->sotype; + args->proto = oargs->proto; + args->fh = oargs->fh; + args->fhsize = oargs->fhsize; + args->flags = oargs->flags; + args->wsize = oargs->wsize; + args->rsize = oargs->rsize; + args->readdirsize = oargs->readdirsize; + args->timeo = oargs->timeo; + args->retrans = oargs->retrans; + args->maxgrouplist = oargs->maxgrouplist; + args->readahead = oargs->readahead; + args->deadthresh = oargs->deadthresh; + args->hostname = oargs->hostname; +} + +static void +nfs_convert_diskless(void) +{ + + bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, + sizeof(struct ifaliasreq)); + bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, + sizeof(struct sockaddr_in)); + nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); + nfsv3_diskless.root_fhsize = NFSX_V2FH; + bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); + bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, + sizeof(struct sockaddr_in)); + bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); + nfsv3_diskless.root_time = nfs_diskless.root_time; + bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, + MAXHOSTNAMELEN); + nfs_diskless_valid = 3; +} +#endif + +int +nfs4_init(struct vfsconf *vfsp) +{ + + rpcclnt_init(); + nfs4dev_init(); + idmap_init(); + nfsm_v4init(); + + return (0); +} + +int +nfs4_uninit(struct vfsconf *vfsp) +{ + + rpcclnt_uninit(); + nfs4dev_uninit(); + idmap_uninit(); + + return (0); +} + +/* + * nfs statfs call + */ +static int +nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) +{ + struct vnode *vp; + struct nfs_statfs *sfp; + caddr_t bpos, dpos; + struct nfsmount *nmp = VFSTONFS(mp); + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfsnode *np; + struct nfs4_compound cp; + struct nfs4_oparg_getattr ga; + struct nfsv4_fattr *fap = &ga.fa; + +#ifndef nolint + sfp = NULL; +#endif + error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); + if (error) + return (error); + vp = NFSTOV(np); + nfsstats.rpccnt[NFSPROC_FSSTAT]++; + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, NFSX_FH(1)); + mb = mreq; + bpos = mtod(mb, caddr_t); + + ga.bm = &nfsv4_fsattrbm; + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "statfs()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_getattr(&cp, &ga); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, td, td->td_ucred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_getattr(&cp, &ga); + + nfs4_vfsop_statfs(fap, sbp, mp); + +nfsmout: + error = nfs_v4postop(&cp, error); + + vput(vp); + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +static void +nfs_decode_args(struct nfsmount *nmp, struct nfs_args *argp) +{ + int s; + int adjsock; + int maxio; + + s = splnet(); + /* + * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes + * no sense in that context. + */ + if (argp->sotype == SOCK_STREAM) + nmp->nm_flag &= ~NFSMNT_NOCONN; + + nmp->nm_flag &= ~NFSMNT_RDIRPLUS; + + /* Re-bind if rsrvd port requested and wasn't on one */ + adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) + && (argp->flags & NFSMNT_RESVPORT); + /* Also re-bind if we're switching to/from a connected UDP socket */ + adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != + (argp->flags & NFSMNT_NOCONN)); + + /* Update flags atomically. Don't change the lock bits. */ + nmp->nm_flag = argp->flags | nmp->nm_flag; + splx(s); + + if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { + nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; + if (nmp->nm_timeo < NFS_MINTIMEO) + nmp->nm_timeo = NFS_MINTIMEO; + else if (nmp->nm_timeo > NFS_MAXTIMEO) + nmp->nm_timeo = NFS_MAXTIMEO; + } + + if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { + nmp->nm_retry = argp->retrans; + if (nmp->nm_retry > NFS_MAXREXMIT) + nmp->nm_retry = NFS_MAXREXMIT; + } + + if (argp->flags & NFSMNT_NFSV3) { + if (argp->sotype == SOCK_DGRAM) + maxio = NFS_MAXDGRAMDATA; + else + maxio = NFS_MAXDATA; + } else + maxio = NFS_V2MAXDATA; + + if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { + nmp->nm_wsize = argp->wsize; + /* Round down to multiple of blocksize */ + nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); + if (nmp->nm_wsize <= 0) + nmp->nm_wsize = NFS_FABLKSIZE; + } + if (nmp->nm_wsize > maxio) + nmp->nm_wsize = maxio; + if (nmp->nm_wsize > MAXBSIZE) + nmp->nm_wsize = MAXBSIZE; + + if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { + nmp->nm_rsize = argp->rsize; + /* Round down to multiple of blocksize */ + nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); + if (nmp->nm_rsize <= 0) + nmp->nm_rsize = NFS_FABLKSIZE; + } + if (nmp->nm_rsize > maxio) + nmp->nm_rsize = maxio; + if (nmp->nm_rsize > MAXBSIZE) + nmp->nm_rsize = MAXBSIZE; + + if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { + nmp->nm_readdirsize = argp->readdirsize; + } + if (nmp->nm_readdirsize > maxio) + nmp->nm_readdirsize = maxio; + if (nmp->nm_readdirsize > nmp->nm_rsize) + nmp->nm_readdirsize = nmp->nm_rsize; + + if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) + nmp->nm_acregmin = argp->acregmin; + else + nmp->nm_acregmin = NFS_MINATTRTIMO; + if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) + nmp->nm_acregmax = argp->acregmax; + else + nmp->nm_acregmax = NFS_MAXATTRTIMO; + if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) + nmp->nm_acdirmin = argp->acdirmin; + else + nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; + if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) + nmp->nm_acdirmax = argp->acdirmax; + else + nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; + if (nmp->nm_acdirmin > nmp->nm_acdirmax) + nmp->nm_acdirmin = nmp->nm_acdirmax; + if (nmp->nm_acregmin > nmp->nm_acregmax) + nmp->nm_acregmin = nmp->nm_acregmax; + + if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { + if (argp->maxgrouplist <= NFS_MAXGRPS) + nmp->nm_numgrps = argp->maxgrouplist; + else + nmp->nm_numgrps = NFS_MAXGRPS; + } + if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { + if (argp->readahead <= NFS_MAXRAHEAD) + nmp->nm_readahead = argp->readahead; + else + nmp->nm_readahead = NFS_MAXRAHEAD; + } + if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) { + if (argp->deadthresh <= NFS_MAXDEADTHRESH) + nmp->nm_deadthresh = argp->deadthresh; + else + nmp->nm_deadthresh = NFS_MAXDEADTHRESH; + } + + adjsock |= ((nmp->nm_sotype != argp->sotype) || + (nmp->nm_soproto != argp->proto)); + nmp->nm_sotype = argp->sotype; + nmp->nm_soproto = argp->proto; + + if (nmp->nm_rpcclnt.rc_so && adjsock) { + nfs_safedisconnect(nmp); + if (nmp->nm_sotype == SOCK_DGRAM) { + while (nfs4_connect(nmp)) { + printf("nfs_args: retrying connect\n"); + (void) tsleep((caddr_t)&lbolt, + PSOCK, "nfscon", 0); + } + } + } +} + +/* + * VFS Operations. + * + * mount system call + * It seems a bit dumb to copyinstr() the host and path here and then + * bcopy() them in mountnfs(), but I wanted to detect errors before + * doing the sockargs() call because sockargs() allocates an mbuf and + * an error after that means that I have to release the mbuf. + */ +/* ARGSUSED */ +static int +nfs_mount(struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, + struct thread *td) +{ + int error; + struct nfs_args args; + struct sockaddr *nam; + struct vnode *vp; + char hst[MNAMELEN]; + size_t len; + + if (path == NULL) { + printf("NFSv4: nfs_mountroot not supported\n"); + return EINVAL; + } + error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)); + if (error) + return (error); + if (args.version != NFS_ARGSVERSION) + return (EPROGMISMATCH); + if (mp->mnt_flag & MNT_UPDATE) { + struct nfsmount *nmp = VFSTONFS(mp); + + if (nmp == NULL) + return (EIO); + /* + * When doing an update, we can't change from or to + * v3, switch lockd strategies or change cookie translation + */ + args.flags = (args.flags & + ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_NOLOCKD)) | + (nmp->nm_flag & + (NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_NOLOCKD)); + nfs_decode_args(nmp, &args); + return (0); + } + + if (error) + return (error); + error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); + if (error) + return (error); + bzero(&hst[len], MNAMELEN - len); + /* sockargs() call must be after above copyin() calls */ + error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); + if (error) + return (error); + error = mountnfs(&args, mp, nam, path, hst, &vp, td->td_ucred); + return (error); +} + +/* + * renew should be done async + * should re-scan mount queue each time + */ +struct proc *nfs4_daemonproc; + +static int +nfs4_do_renew(struct nfsmount *nmp, struct ucred *cred) +{ + struct nfs4_compound cp; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + caddr_t bpos, dpos; + int error; + + mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, sizeof(uint64_t)); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_do_renew()"); + nfsm_v4build_renew(&cp, nmp->nm_clientid); + nfsm_v4build_finalize(&cp); + + nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, curthread, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_renew(&cp); + nmp->nm_last_renewal = time_second; + return (0); + + nfsmout: + error = nfs_v4postop(&cp, error); + + /* XXX */ + if (mrep != NULL) + m_freem(mrep); + return (error); +} + +static void +nfs4_daemon(void *arg) +{ + struct mount *mp; + struct nfsmount *nmp; + int nmounts; + + while (1) { + nmounts = 0; + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (strcmp(mp->mnt_vfc->vfc_name, "nfs4") != 0) + continue; + nmounts++; + nmp = VFSTONFS(mp); + if (time_second < nmp->nm_last_renewal + nmp->nm_lease_time - 4) + continue; + mtx_unlock(&mountlist_mtx); + mtx_lock(&Giant); + nfs4_do_renew(nmp, (struct ucred *) arg); + mtx_unlock(&Giant); + mtx_lock(&mountlist_mtx); + } + mtx_unlock(&mountlist_mtx); + + /* Must kill the daemon here, or module unload will cause a panic */ + if (nmounts == 0) { + mtx_lock(&Giant); + nfs4_daemonproc = NULL; + printf("nfsv4 renewd exiting\n"); + kthread_exit(0); + } + tsleep(&nfs4_daemonproc, PVFS, "nfs4", 2 * hz); + } +} + +/* + * Common code for mount and mountroot + */ +static int +mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, + char *pth, char *hst, struct vnode **vpp, struct ucred *cred) +{ + struct nfsmount *nmp; + char *rpth, *cp1, *cp2; + int nlkup = 0, error; + struct nfs4_compound cp; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + caddr_t bpos, dpos; + struct nfs4_oparg_lookup lkup; + struct nfs4_oparg_getfh gfh; + struct nfs4_oparg_getattr ga; + struct thread *td = curthread; /* XXX */ + + if (mp->mnt_flag & MNT_UPDATE) { + nmp = VFSTONFS(mp); + /* update paths, file handles, etc, here XXX */ + FREE(nam, M_SONAME); + return (0); + } else { + nmp = uma_zalloc(nfsmount_zone, M_WAITOK); + bzero((caddr_t)nmp, sizeof (struct nfsmount)); + TAILQ_INIT(&nmp->nm_bufq); + mp->mnt_data = (qaddr_t)nmp; + } + + vfs_getnewfsid(mp); + nmp->nm_mountp = mp; + nmp->nm_maxfilesize = 0xffffffffLL; + nmp->nm_timeo = NFS_TIMEO; + nmp->nm_retry = NFS_RETRANS; + nmp->nm_wsize = NFS_WSIZE; + nmp->nm_rsize = NFS_RSIZE; + nmp->nm_readdirsize = NFS_READDIRSIZE; + nmp->nm_numgrps = NFS_MAXGRPS; + nmp->nm_readahead = NFS_DEFRAHEAD; + nmp->nm_deadthresh = NFS_MAXDEADTHRESH; + bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); + bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); + nmp->nm_nam = nam; + /* Set up the sockets and per-host congestion */ + nmp->nm_sotype = argp->sotype; + nmp->nm_soproto = argp->proto; + /* XXX */ + mp->mnt_stat.f_iosize = PAGE_SIZE; + + /* XXX - this is to make some other code work. Sort of ugly. */ + argp->flags |= (NFSMNT_NFSV3 | NFSMNT_NFSV4); + + nfs_decode_args(nmp, argp); + + if ((error = nfs4_connect(nmp))) + goto bad; + + mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, NFSX_FH(1)); + mb = mreq; + bpos = mtod(mb, caddr_t); + + ga.bm = &nfsv4_fsinfobm; + nfs_v4initcompound(&cp); + + /* Get remote path */ + rpth = hst; + strsep(&rpth, ":"); + + nfsm_v4build_compound(&cp, "mountnfs()"); + nfsm_v4build_putrootfh(&cp); + for (cp1 = rpth; cp1 && *cp1; cp1 = cp2) { + while (*cp1 == '/') + cp1++; + if (!*cp1) + break; + for (cp2 = cp1; *cp2 && *cp2 != '/'; cp2++) + ; + lkup.name = cp1; + lkup.namelen = cp2 - cp1; + nfsm_v4build_lookup(&cp, &lkup); + nlkup++; + } + nfsm_v4build_getfh(&cp, &gfh); + nfsm_v4build_getattr(&cp, &ga); + nfsm_v4build_finalize(&cp); + + nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putrootfh(&cp); + while (nlkup--) + nfsm_v4dissect_lookup(&cp); + nfsm_v4dissect_getfh(&cp, &gfh); + nfsm_v4dissect_getattr(&cp, &ga); + + nfs4_vfsop_fsinfo(&ga.fa, nmp); + nmp->nm_state |= NFSSTA_GOTFSINFO; + + /* Copy root fh into nfsmount. */ + nmp->nm_fhsize = gfh.fh_len; + bcopy(&gfh.fh_val, nmp->nm_fh, nmp->nm_fhsize); + nmp->nm_last_renewal = time_second; + + if ((error = nfs4_do_setclientid(nmp, cred)) != 0) + goto nfsmout; + + /* Start renewd if it isn't already running */ + if (nfs4_daemonproc == NULL) + kthread_create(nfs4_daemon, crdup(cred), &nfs4_daemonproc, + (RFPROC|RFMEM), 0, "nfs4rd"); + + return (0); + nfsmout: + error = nfs_v4postop(&cp, error); + + /* XXX */ + if (mrep != NULL) + m_freem(mrep); +bad: + nfs_disconnect(nmp); + uma_zfree(nfsmount_zone, nmp); + FREE(nam, M_SONAME); + + return (error); +} + +/* + * unmount system call + */ +static int +nfs_unmount(struct mount *mp, int mntflags, struct thread *td) +{ + struct nfsmount *nmp; + int error, flags = 0; + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + nmp = VFSTONFS(mp); + /* + * Goes something like this.. + * - Call vflush() to clear out vnodes for this filesystem + * - Close the socket + * - Free up the data structures + */ + /* In the forced case, cancel any outstanding requests. */ + if (flags & FORCECLOSE) { + error = nfs_nmcancelreqs(nmp); + if (error) + return (error); + nfs4dev_purge(); + } + + error = vflush(mp, 0, flags); + if (error) + return (error); + + /* + * We are now committed to the unmount. + */ + nfs_disconnect(nmp); + FREE(nmp->nm_nam, M_SONAME); + + /* XXX there's a race condition here for SMP */ + wakeup(&nfs4_daemonproc); + + uma_zfree(nfsmount_zone, nmp); + return (0); +} + +/* + * Return root of a filesystem + */ +static int +nfs_root(struct mount *mp, struct vnode **vpp) +{ + struct vnode *vp; + struct nfsmount *nmp; + struct nfsnode *np; + int error; + + nmp = VFSTONFS(mp); + error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); + if (error) + return (error); + vp = NFSTOV(np); + if (vp->v_type == VNON) + vp->v_type = VDIR; + vp->v_vflag |= VV_ROOT; + *vpp = vp; + + return (0); +} + +/* + * Flush out the buffer cache + */ +/* ARGSUSED */ +static int +nfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct thread *td) +{ + struct vnode *vp, *vnp; + int error, allerror = 0; + + /* + * Force stale buffer cache information to be flushed. + */ + MNT_ILOCK(mp); +loop: + for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); + vp != NULL; + vp = vnp) { + /* + * If the vnode that we are about to sync is no longer + * associated with this mount point, start over. + */ + if (vp->v_mount != mp) + goto loop; + vnp = TAILQ_NEXT(vp, v_nmntvnodes); + VI_LOCK(vp); + MNT_IUNLOCK(mp); + if (VOP_ISLOCKED(vp, NULL) || TAILQ_EMPTY(&vp->v_dirtyblkhd) || + waitfor == MNT_LAZY) { + VI_UNLOCK(vp); + MNT_ILOCK(mp); + continue; + } + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { + goto loop; + } + error = VOP_FSYNC(vp, cred, waitfor, td); + if (error) + allerror = error; + VOP_UNLOCK(vp, 0, td); + vrele(vp); + + MNT_ILOCK(mp); + } + MNT_IUNLOCK(mp); + return (allerror); +} + +static int +nfs4_do_setclientid(struct nfsmount *nmp, struct ucred *cred) +{ + struct nfs4_oparg_setclientid scid; + struct nfs4_compound cp; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + caddr_t bpos, dpos; + struct route ro; + char *ipsrc = NULL, uaddr[24], name[24]; + int try = 0; + static int seq; + int error; + +#ifndef NFS4_USE_RPCCLNT + return (0); +#endif + if (nmp->nm_clientid) { + printf("nfs4_do_setclientid: already have clientid!\n"); + error = 0; + goto nfsmout; + } + + /* Try not to re-use clientids */ + if (seq == 0) + seq = time_second & 0xffffff; + +#ifdef NFS4_USE_RPCCLNT + scid.cb_netid = (nmp->nm_rpcclnt.rc_sotype == SOCK_STREAM) ? "tcp" : "udp"; +#endif + scid.cb_netid = "tcp"; + scid.cb_netidlen = 3; + scid.cb_prog = 0x1234; /* XXX */ + + /* Do a route lookup to find our source address for talking to this server */ + bzero(&ro, sizeof ro); + +#ifdef NFS4_USE_RPCCLNT + ro.ro_dst = *nmp->nm_rpcclnt.rc_name; +#endif + rtalloc(&ro); + if (ro.ro_rt == NULL) { + error = EHOSTUNREACH; + goto nfsmout; + } + ipsrc = inet_ntoa(IA_SIN(ifatoia(ro.ro_rt->rt_ifa))->sin_addr); + sprintf(uaddr, "%s.12.48", ipsrc); + scid.cb_univaddr = uaddr; + scid.cb_univaddrlen = strlen(uaddr); + RTFREE(ro.ro_rt); + + try_again: + sprintf(name, "%s-%d", ipsrc, seq++); + scid.namelen = strlen(name); + scid.name = name; + nfs_v4initcompound(&cp); + + mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, NFSX_FH(1)); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_do_setclientid()"); + nfsm_v4build_setclientid(&cp, &scid); + nfsm_v4build_finalize(&cp); + + nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, curthread, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_setclientid(&cp, &scid); + nmp->nm_clientid = scid.clientid; + + error = nfs_v4postop(&cp, error); + + /* Confirm */ + m_freem(mrep); + mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, NFSX_FH(1)); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_do_setclientid() (confirm)"); + nfsm_v4build_setclientid_confirm(&cp, &scid); + nfsm_v4build_finalize(&cp); + + nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, curthread, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_setclientid_confirm(&cp); + + nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep) + m_freem(mrep); + if (error == NFSERR_CLID_INUSE && (++try < NFS4_SETCLIENTID_MAXTRIES)) + goto try_again; + + return (error); +} diff --git a/sys/nfs4client/nfs4_vn.h b/sys/nfs4client/nfs4_vn.h new file mode 100644 index 0000000..d984713 --- /dev/null +++ b/sys/nfs4client/nfs4_vn.h @@ -0,0 +1,33 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_vn.h,v 1.5 2003/11/05 14:59:00 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#ifndef _NFS4CLIENT_NFS4_VFS_H +#define _NFS4CLIENT_NFS4_VFS_H + +void nfs4_vnop_loadattrcache(struct vnode *, struct nfsv4_fattr *, struct vattr *); + +#endif /* _NFS4CLIENT_NFS4_VFS_H */ diff --git a/sys/nfs4client/nfs4_vn_subs.c b/sys/nfs4client/nfs4_vn_subs.c new file mode 100644 index 0000000..28eb52d --- /dev/null +++ b/sys/nfs4client/nfs4_vn_subs.c @@ -0,0 +1,223 @@ +/* $FreeBSD$ */ +/* $Id: nfs4_vn_subs.c,v 1.9 2003/11/05 14:59:00 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sockio.h> +#include <sys/vnode.h> +#include <sys/types.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/uma.h> + +#include <net/if.h> +#include <net/route.h> +#include <netinet/in.h> + +#include <rpc/rpcclnt.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfsclient/nfs.h> +#include <nfsclient/nfsmount.h> +#include <nfs/xdr_subs.h> +#include <nfsclient/nfsm_subs.h> +#include <nfsclient/nfsdiskless.h> + +/* NFSv4 */ +#include <nfs4client/nfs4.h> +#include <nfs4client/nfs4m_subs.h> +#include <nfs4client/nfs4_vn.h> + +#include <nfsclient/nfsnode.h> + +void +nfs4_vnop_loadattrcache(struct vnode *vp, struct nfsv4_fattr *fap, + struct vattr *vaper) +{ + struct vattr *vap; + struct nfsnode *np; + int32_t rdev; + enum vtype vtyp; + u_short vmode; + struct timespec mtime; + struct timeval tv; + + microtime(&tv); + + vtyp = nv3tov_type[fap->fa4_type & 0x7]; + vmode = (fap->fa4_valid & FA4V_MODE) ? fap->fa4_mode : 0777; + rdev = (fap->fa4_valid & FA4V_RDEV) ? + makeudev(fap->fa4_rdev_major, fap->fa4_rdev_minor) : 0; + if (fap->fa4_valid & FA4V_MTIME) + mtime = fap->fa4_mtime; + else + bzero(&mtime, sizeof mtime); + + /* + * If v_type == VNON it is a new node, so fill in the v_type, + * n_mtime fields. Check to see if it represents a special + * device, and if so, check for a possible alias. Once the + * correct vnode has been obtained, fill in the rest of the + * information. + */ + np = VTONFS(vp); + vap = &np->n_vattr; + if (vp->v_type != vtyp || np->n_mtime == 0) { + bzero(vap, sizeof *vap); + vp->v_type = vtyp; + np->n_mtime = mtime.tv_sec; + } + vap->va_type = vtyp; + vap->va_mode = (vmode & 07777); + vap->va_rdev = rdev; + vap->va_mtime = mtime; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + if (fap->fa4_valid & FA4V_NLINK) + vap->va_nlink = fap->fa4_nlink; + if (fap->fa4_valid & FA4V_UID) + vap->va_uid = fap->fa4_uid; + if (fap->fa4_valid & FA4V_GID) + vap->va_gid = fap->fa4_gid; + vap->va_size = fap->fa4_size; + vap->va_blocksize = NFS_FABLKSIZE; + vap->va_bytes = fap->fa4_size; + if (fap->fa4_valid & FA4V_FILEID) + vap->va_fileid = nfs_v4fileid4_to_fileid(fap->fa4_fileid); + if (fap->fa4_valid & FA4V_ATIME) + vap->va_atime = fap->fa4_atime; + if (fap->fa4_valid & FA4V_CTIME) + vap->va_ctime = fap->fa4_ctime; + vap->va_flags = 0; + vap->va_filerev = 0; + /* XXX dontshrink flag? */ + if (vap->va_size != np->n_size) { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else + np->n_size = vap->va_size; + vnode_pager_setsize(vp, np->n_size); + } else + np->n_size = vap->va_size; + } + np->n_attrstamp = tv.tv_sec; + if (vaper != NULL) { + bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) + vaper->va_atime = np->n_atim; + if (np->n_flag & NUPD) + vaper->va_mtime = np->n_mtim; + } + } +} + +static uint64_t nfs_nullcookie = 0; +/* + * This function finds the directory cookie that corresponds to the + * logical byte offset given. + */ +uint64_t * +nfs4_getcookie(struct nfsnode *np, off_t off, int add) +{ + struct nfsdmap *dp, *dp2; + int pos; + + pos = (uoff_t)off / NFS_DIRBLKSIZ; + if (pos == 0 || off < 0) { +#ifdef DIAGNOSTIC + if (add) + panic("nfs getcookie add at <= 0"); +#endif + return (&nfs_nullcookie); + } + pos--; + dp = LIST_FIRST(&np->n_cookies); + if (!dp) { + if (add) { + MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + dp->ndm_eocookie = 0; + LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); + } else + return (NULL); + } + while (pos >= NFSNUMCOOKIES) { + pos -= NFSNUMCOOKIES; + if (LIST_NEXT(dp, ndm_list)) { + if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && + pos >= dp->ndm_eocookie) + return (NULL); + dp = LIST_NEXT(dp, ndm_list); + } else if (add) { + MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + dp2->ndm_eocookie = 0; + LIST_INSERT_AFTER(dp, dp2, ndm_list); + dp = dp2; + } else + return (NULL); + } + if (pos >= dp->ndm_eocookie) { + if (add) + dp->ndm_eocookie = pos + 1; + else + return (NULL); + } + return (&dp->ndm4_cookies[pos]); +} + +/* + * Invalidate cached directory information, except for the actual directory + * blocks (which are invalidated separately). + * Done mainly to avoid the use of stale offset cookies. + */ +void +nfs4_invaldir(struct vnode *vp) +{ + struct nfsnode *np = VTONFS(vp); + + np->n_direofoffset = 0; + bzero(np->n4_cookieverf, NFSX_V4VERF); + if (LIST_FIRST(&np->n_cookies)) + LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; +} diff --git a/sys/nfs4client/nfs4_vnops.c b/sys/nfs4client/nfs4_vnops.c new file mode 100644 index 0000000..48e4549 --- /dev/null +++ b/sys/nfs4client/nfs4_vnops.c @@ -0,0 +1,3200 @@ +/* $FreeBSD$ */ +/* $Id: nfs_vnops.c,v 1.45 2003/11/05 14:59:02 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * vnode op calls for Sun NFS version 2 and 3 + */ + +#include "opt_inet.h" + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/namei.h> +#include <sys/socket.h> +#include <sys/vnode.h> +#include <sys/dirent.h> +#include <sys/fcntl.h> +#include <sys/lockf.h> +#include <sys/stat.h> +#include <sys/sysctl.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#include <fs/fifofs/fifo.h> + +#include <rpc/rpcclnt.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfsclient/nfs.h> +#include <nfs4client/nfs4.h> +#include <nfsclient/nfsnode.h> +#include <nfsclient/nfsmount.h> +#include <nfsclient/nfs_lock.h> +#include <nfs/xdr_subs.h> +#include <nfsclient/nfsm_subs.h> + +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/in_var.h> + +/* NFSv4 */ +#include <nfs4client/nfs4m_subs.h> +#include <nfs4client/nfs4_vn.h> + +/* Defs */ +#define TRUE 1 +#define FALSE 0 + +/* + * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these + * calls are not in getblk() and brelse() so that they would not be necessary + * here. + */ +#ifndef B_VMIO +#define vfs_busy_pages(bp, f) +#endif + +static int nfsspec_read(struct vop_read_args *); +static int nfsspec_write(struct vop_write_args *); +static int nfsfifo_read(struct vop_read_args *); +static int nfsfifo_write(struct vop_write_args *); +static int nfsspec_close(struct vop_close_args *); +static int nfsfifo_close(struct vop_close_args *); +static int nfs4_flush(struct vnode *, struct ucred *, int, struct thread *, + int); +static int nfs4_setattrrpc(struct vnode *, struct vattr *, struct ucred *, + struct thread *); +static int nfs4_closerpc(struct vnode *, struct ucred *, struct thread *, int); + +static int nfs4_lookup(struct vop_lookup_args *); +static int nfs4_create(struct vop_create_args *); +static int nfs4_mknod(struct vop_mknod_args *); +static int nfs4_open(struct vop_open_args *); +static int nfs4_close(struct vop_close_args *); +static int nfs4_access(struct vop_access_args *); +static int nfs4_getattr(struct vop_getattr_args *); +static int nfs4_setattr(struct vop_setattr_args *); +static int nfs4_read(struct vop_read_args *); +static int nfs4_fsync(struct vop_fsync_args *); +static int nfs4_remove(struct vop_remove_args *); +static int nfs4_link(struct vop_link_args *); +static int nfs4_rename(struct vop_rename_args *); +static int nfs4_mkdir(struct vop_mkdir_args *); +static int nfs4_rmdir(struct vop_rmdir_args *); +static int nfs4_symlink(struct vop_symlink_args *); +static int nfs4_readdir(struct vop_readdir_args *); +static int nfs4_strategy(struct vop_strategy_args *); +static int nfs4_lookitup(struct vnode *, const char *, int, + struct ucred *, struct thread *, struct nfsnode **); +static int nfs4_sillyrename(struct vnode *, struct vnode *, + struct componentname *); +static int nfsspec_access(struct vop_access_args *); +static int nfs4_readlink(struct vop_readlink_args *); +static int nfs4_print(struct vop_print_args *); +static int nfs4_advlock(struct vop_advlock_args *); + +/* + * Global vfs data structures for nfs + */ +vop_t **nfs4_vnodeop_p; +static struct vnodeopv_entry_desc nfs4_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_access_desc, (vop_t *) nfs4_access }, + { &vop_advlock_desc, (vop_t *) nfs4_advlock }, + { &vop_close_desc, (vop_t *) nfs4_close }, + { &vop_create_desc, (vop_t *) nfs4_create }, + { &vop_fsync_desc, (vop_t *) nfs4_fsync }, + { &vop_getattr_desc, (vop_t *) nfs4_getattr }, + { &vop_getpages_desc, (vop_t *) nfs_getpages }, + { &vop_putpages_desc, (vop_t *) nfs_putpages }, + { &vop_inactive_desc, (vop_t *) nfs_inactive }, + { &vop_lease_desc, (vop_t *) vop_null }, + { &vop_link_desc, (vop_t *) nfs4_link }, + { &vop_lookup_desc, (vop_t *) nfs4_lookup }, + { &vop_mkdir_desc, (vop_t *) nfs4_mkdir }, + { &vop_mknod_desc, (vop_t *) nfs4_mknod }, + { &vop_open_desc, (vop_t *) nfs4_open }, + { &vop_print_desc, (vop_t *) nfs4_print }, + { &vop_read_desc, (vop_t *) nfs4_read }, + { &vop_readdir_desc, (vop_t *) nfs4_readdir }, + { &vop_readlink_desc, (vop_t *) nfs4_readlink }, + { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, + { &vop_remove_desc, (vop_t *) nfs4_remove }, + { &vop_rename_desc, (vop_t *) nfs4_rename }, + { &vop_rmdir_desc, (vop_t *) nfs4_rmdir }, + { &vop_setattr_desc, (vop_t *) nfs4_setattr }, + { &vop_strategy_desc, (vop_t *) nfs4_strategy }, + { &vop_symlink_desc, (vop_t *) nfs4_symlink }, + { &vop_write_desc, (vop_t *) nfs_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc nfs4_vnodeop_opv_desc = + { &nfs4_vnodeop_p, nfs4_vnodeop_entries }; +VNODEOP_SET(nfs4_vnodeop_opv_desc); + +/* + * Special device vnode ops + */ +vop_t **spec_nfs4nodeop_p; +static struct vnodeopv_entry_desc nfs4_specop_entries[] = { + { &vop_default_desc, (vop_t *) spec_vnoperate }, + { &vop_access_desc, (vop_t *) nfsspec_access }, + { &vop_close_desc, (vop_t *) nfsspec_close }, + { &vop_fsync_desc, (vop_t *) nfs4_fsync }, + { &vop_getattr_desc, (vop_t *) nfs4_getattr }, + { &vop_inactive_desc, (vop_t *) nfs_inactive }, + { &vop_print_desc, (vop_t *) nfs4_print }, + { &vop_read_desc, (vop_t *) nfsspec_read }, + { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, + { &vop_setattr_desc, (vop_t *) nfs4_setattr }, + { &vop_write_desc, (vop_t *) nfsspec_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc spec_nfs4nodeop_opv_desc = + { &spec_nfs4nodeop_p, nfs4_specop_entries }; +VNODEOP_SET(spec_nfs4nodeop_opv_desc); + +vop_t **fifo_nfs4nodeop_p; +static struct vnodeopv_entry_desc nfs4_fifoop_entries[] = { + { &vop_default_desc, (vop_t *) fifo_vnoperate }, + { &vop_access_desc, (vop_t *) nfsspec_access }, + { &vop_close_desc, (vop_t *) nfsfifo_close }, + { &vop_fsync_desc, (vop_t *) nfs4_fsync }, + { &vop_getattr_desc, (vop_t *) nfs4_getattr }, + { &vop_inactive_desc, (vop_t *) nfs_inactive }, + { &vop_print_desc, (vop_t *) nfs4_print }, + { &vop_read_desc, (vop_t *) nfsfifo_read }, + { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, + { &vop_setattr_desc, (vop_t *) nfs4_setattr }, + { &vop_write_desc, (vop_t *) nfsfifo_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc fifo_nfs4nodeop_opv_desc = + { &fifo_nfs4nodeop_p, nfs4_fifoop_entries }; +VNODEOP_SET(fifo_nfs4nodeop_opv_desc); + +static int nfs4_removerpc(struct vnode *dvp, const char *name, int namelen, + struct ucred *cred, struct thread *td); +static int nfs4_renamerpc(struct vnode *fdvp, const char *fnameptr, + int fnamelen, struct vnode *tdvp, + const char *tnameptr, int tnamelen, + struct ucred *cred, struct thread *td); +static int nfs4_renameit(struct vnode *sdvp, struct componentname *scnp, + struct sillyrename *sp); +static int nfs4_openrpc(struct vnode *, struct vnode **, + struct componentname *, int, struct vattr *); +static int nfs4_open_confirm(struct vnode *vp, struct nfs4_compound *cpp, + struct nfs4_oparg_open *openap, + struct nfs4_oparg_getfh *gfh, + struct ucred *cred, struct thread *td); +static int nfs4_createrpc(struct vnode *, struct vnode **, + struct componentname *, nfstype, + struct vattr *, char *); + +/* + * Global variables + */ +struct nfs4_lowner nfs4_masterlowner; + +#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) + +SYSCTL_DECL(_vfs_nfs4); + +static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; +SYSCTL_INT(_vfs_nfs4, OID_AUTO, access_cache_timeout, CTLFLAG_RW, + &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); + +static int nfsv3_commit_on_close = 0; +SYSCTL_INT(_vfs_nfs4, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, + &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); +#if 0 +SYSCTL_INT(_vfs_nfs4, OID_AUTO, access_cache_hits, CTLFLAG_RD, + &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); + +SYSCTL_INT(_vfs_nfs4, OID_AUTO, access_cache_misses, CTLFLAG_RD, + &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); +#endif + +#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ + | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ + | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) +static int +nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, + struct ucred *cred) +{ + const int v3 = 1; + u_int32_t *tl; + int error = 0, attrflag; + + return (0); + + struct mbuf *mreq, *mrep = NULL, *md, *mb; + caddr_t bpos, dpos; + u_int32_t rmode; + struct nfsnode *np = VTONFS(vp); + + nfsstats.rpccnt[NFSPROC_ACCESS]++; + mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); + mb = mreq; + bpos = mtod(mb, caddr_t); + nfsm_fhtom(vp, v3); + tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(wmode); + nfsm_request(vp, NFSPROC_ACCESS, td, cred); + nfsm_postop_attr(vp, attrflag); + if (!error) { + tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); + rmode = fxdr_unsigned(u_int32_t, *tl); + np->n_mode = rmode; + np->n_modeuid = cred->cr_uid; + np->n_modestamp = time_second; + } + m_freem(mrep); +nfsmout: + return error; +} + +/* + * nfs access vnode op. + * For nfs version 2, just return ok. File accesses may fail later. + * For nfs version 3, use the access rpc to check accessibility. If file modes + * are changed on the server, accesses might still fail later. + */ +static int +nfs4_access(struct vop_access_args *ap) +{ + struct vnode *vp = ap->a_vp; + int error = 0; + u_int32_t mode, wmode; + int v3 = NFS_ISV3(vp); /* v3 \in v4 */ + struct nfsnode *np = VTONFS(vp); + caddr_t bpos, dpos; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_compound cp; + struct nfs4_oparg_access acc; + struct thread *td = ap->a_td; + struct ucred *cred = ap->a_cred; + + /* + * Disallow write attempts on filesystems mounted read-only; + * unless the file is a socket, fifo, or a block or character + * device resident on the filesystem. + */ + if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } + /* + * For nfs v3, check to see if we have done this recently, and if + * so return our cached result instead of making an ACCESS call. + * If not, do an access rpc, otherwise you are stuck emulating + * ufs_access() locally using the vattr. This may not be correct, + * since the server may apply other access criteria such as + * client uid-->server uid mapping that we do not know about. + */ + /* XXX Disable this for now; needs fixing of _access_otw() */ + if (0 && v3) { + if (ap->a_mode & VREAD) + mode = NFSV3ACCESS_READ; + else + mode = 0; + if (vp->v_type != VDIR) { + if (ap->a_mode & VWRITE) + mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); + if (ap->a_mode & VEXEC) + mode |= NFSV3ACCESS_EXECUTE; + } else { + if (ap->a_mode & VWRITE) + mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | + NFSV3ACCESS_DELETE); + if (ap->a_mode & VEXEC) + mode |= NFSV3ACCESS_LOOKUP; + } + /* XXX safety belt, only make blanket request if caching */ + if (nfsaccess_cache_timeout > 0) { + wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | + NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | + NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; + } else { + wmode = mode; + } + + /* + * Does our cached result allow us to give a definite yes to + * this request? + */ + if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && + (ap->a_cred->cr_uid == np->n_modeuid) && + ((np->n_mode & mode) == mode)) { + nfsstats.accesscache_hits++; + } else { + /* + * Either a no, or a don't know. Go to the wire. + */ + nfsstats.accesscache_misses++; + error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred); + if (!error) { + if ((np->n_mode & mode) != mode) { + error = EACCES; + } + } + } + return (error); + } + + /* XXX use generic access code here? */ + mode = ap->a_mode & VREAD ? NFSV4ACCESS_READ : 0; + if (vp->v_type == VDIR) { + if (ap->a_mode & VWRITE) + mode |= NFSV4ACCESS_MODIFY | NFSV4ACCESS_EXTEND | NFSV4ACCESS_DELETE; + if (ap->a_mode & VEXEC) + mode |= NFSV4ACCESS_LOOKUP; + } else { + if (ap->a_mode & VWRITE) + mode |= NFSV4ACCESS_MODIFY | NFSV4ACCESS_EXTEND; + if (ap->a_mode & VEXEC) + mode |= NFSV4ACCESS_EXECUTE; + } + + nfs_v4initcompound(&cp); + acc.mode = mode; + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_access()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_access(&cp, &acc); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_access(&cp, &acc); + + if ((acc.rmode & mode) != mode) + error = EACCES; + + nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +static int +nfs4_openrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, + int flags, struct vattr *vap) +{ + struct vnode *vp = *vpp; + struct nfs4_oparg_getattr getattr; + struct nfs4_oparg_getfh getfh; + struct nfs4_oparg_open opena; + struct nfs4_compound cp; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct ucred *cred = cnp->cn_cred; + struct thread *td = cnp->cn_thread; + struct nfs4_fctx xfc, *fcp; + struct nfsnode *np; + + if (vp == NULL) { + /* Create a new file */ + np = NULL; + fcp = &xfc; + bzero(fcp, sizeof(*fcp)); + } else { + np = VTONFS(vp); + fcp = flags & FWRITE ? &np->n_wfc : &np->n_rfc; + } + + /* + * Since we are currently only one lockowner; we only open the + * file one each for reading and writing. + */ + if (fcp->refcnt++ != 0) { + *vpp = vp; + /*printf("not opening %s\n", np->n_name != NULL ? np->n_name : "");*/ + return (0); + } + + fcp->lop = &nfs4_masterlowner; + fcp->pid = cnp->cn_thread->td_proc->p_pid; + fcp->np = np; + + nfs_v4initcompound(&cp); + cp.nmp = VFSTONFS(dvp->v_mount); + + opena.ctype = NCLNULL; + opena.flags = flags; + opena.vap = vap; + opena.fcp = fcp; /* For lockowner */ + opena.cnp = cnp; + + getattr.bm = &nfsv4_getattrbm; + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_openrpc()"); + nfsm_v4build_putfh(&cp, dvp); + nfsm_v4build_open(&cp, &opena); + nfsm_v4build_getattr(&cp, &getattr); + nfsm_v4build_getfh(&cp, &getfh); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp != NULL ? vp : dvp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_open(&cp, &opena); + nfsm_v4dissect_getattr(&cp, &getattr); + nfsm_v4dissect_getfh(&cp, &getfh); + + error = nfs_v4postop(&cp, error); + + if (opena.rflags & NFSV4OPENRES_CONFIRM) { + error = nfs4_open_confirm(vp ? vp : dvp, &cp, &opena, &getfh, cred, td); + if (error != 0) + goto nfsmout; + } + + if (vp == NULL) { + /* New file */ + error = nfs_nget(dvp->v_mount, &getfh.fh_val, + getfh.fh_len, &np); + if (error != 0) + goto nfsmout; + + vp = NFSTOV(np); + np->n_dvp = dvp; + np->n_namelen = cnp->cn_namelen; /* XXX memory leaks on these; track! */ + if (np->n_name != NULL) + FREE(np->n_name, M_NFSREQ); + MALLOC(np->n_name, u_char *, np->n_namelen + 1, M_NFSREQ, M_WAITOK); + bcopy(cnp->cn_nameptr, np->n_name, np->n_namelen); + np->n_name[np->n_namelen] = '\0'; + if (flags & FWRITE) + np->n_wfc = *fcp; + else + np->n_rfc = *fcp; + + /*printf("opened new file %s\n", np->n_name);*/ + + nfs4_vnop_loadattrcache(vp, &getattr.fa, NULL); + *vpp = vp; + } else { + /*printf("openend \"old\" %s\n", np->n_name != NULL ? np->n_name : "");*/ + + if (flags & O_TRUNC && np->n_size != 0) { + struct vattr va; + + VATTR_NULL(&va); + va.va_size = 0; + error = nfs4_setattrrpc(vp, &va, + cnp->cn_cred, cnp->cn_thread); + } + np->n_attrstamp = 0; + } + + nfsmout: + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +static int +nfs4_open_confirm(struct vnode *vp, struct nfs4_compound *cpp, + struct nfs4_oparg_open *openap, struct nfs4_oparg_getfh *gfh, + struct ucred *cred, struct thread *td) +{ + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + + nfs_v4initcompound(cpp); + cpp->nmp = VFSTONFS(vp->v_mount); + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(cpp, "nfs4_open_confirm()"); + nfsm_v4build_putfh_nv(cpp, gfh); + nfsm_v4build_open_confirm(cpp, openap); + nfsm_v4build_finalize(cpp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(cpp); + nfsm_v4dissect_putfh(cpp); + nfsm_v4dissect_open_confirm(cpp, openap); + + nfsmout: + error = nfs_v4postop(cpp, error); + + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + + +/* + * nfs open vnode op + * Check to see if the type is ok + * and that deletion is not in progress. + * For paged in text files, you will need to flush the page cache + * if consistency is lost. + */ +/* ARGSUSED */ +static int +nfs4_open(struct vop_open_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + enum vtype vtype = vp->v_type; + int mode = ap->a_mode; + struct componentname cn; + + if (vtype != VREG) { + if (vtype != VDIR && vtype != VLNK) { +#ifdef DIAGNOSTIC + printf("open eacces vtyp=%d\n", vp->v_type); +#endif + return (EACCES); + } else + return (0); + } + + if (np->n_flag & NCREATED) { + np->n_flag &= ~NCREATED; + return (0); + } + + cn.cn_nameptr = np->n_name; + cn.cn_namelen = np->n_namelen; + cn.cn_cred = ap->a_cred; + cn.cn_thread = ap->a_td; + + return (nfs4_openrpc(np->n_dvp, &vp, &cn, mode, NULL)); +} + +static int +nfs4_closerpc(struct vnode *vp, struct ucred *cred, struct thread *td, int flags) +{ + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_fctx *fcp; + struct nfs4_compound cp; + struct nfsnode *np = VTONFS(vp); + + fcp = flags & FWRITE ? &np->n_wfc : &np->n_rfc; + + nfs_v4initcompound(&cp); + + if (--fcp->refcnt != 0) + return (0); + + /*printf("closing %s\n", np->n_name != NULL ? np->n_name : "");*/ + + cp.fcp = fcp; + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_closerpc()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_close(&cp, fcp); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_close(&cp, fcp); + + nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +/* + * nfs close vnode op + * What an NFS client should do upon close after writing is a debatable issue. + * Most NFS clients push delayed writes to the server upon close, basically for + * two reasons: + * 1 - So that any write errors may be reported back to the client process + * doing the close system call. By far the two most likely errors are + * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. + * 2 - To put a worst case upper bound on cache inconsistency between + * multiple clients for the file. + * There is also a consistency problem for Version 2 of the protocol w.r.t. + * not being able to tell if other clients are writing a file concurrently, + * since there is no way of knowing if the changed modify time in the reply + * is only due to the write for this client. + * (NFS Version 3 provides weak cache consistency data in the reply that + * should be sufficient to detect and handle this case.) + * + * The current code does the following: + * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers + * for NFS Version 3 - flush dirty buffers to the server but don't invalidate + * or commit them (this satisfies 1 and 2 except for the + * case where the server crashes after this close but + * before the commit RPC, which is felt to be "good + * enough". Changing the last argument to nfs_flush() to + * a 1 would force a commit operation, if it is felt a + * commit is necessary now. + */ +/* ARGSUSED */ +static int +nfs4_close(struct vop_close_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + int error = 0; + + if (vp->v_type != VREG) + return (0); + + if (np->n_flag & NMODIFIED) { + if (NFS_ISV3(vp)) { + /* + * Under NFSv3 we have dirty buffers to + * dispose of. We must flush them to the NFS + * server. We have the option of waiting all + * the way through the commit rpc or just + * waiting for the initial write. The default + * is to only wait through the initial write + * so the data is in the server's cache, which + * is roughly similar to the state a standard + * disk subsystem leaves the file in on + * close(). + * + * We cannot clear the NMODIFIED bit in + * np->n_flag due to potential races with + * other processes, and certainly cannot clear + * it if we don't commit. + */ + int cm = nfsv3_commit_on_close ? 1 : 0; + error = nfs4_flush(vp, ap->a_cred, MNT_WAIT, ap->a_td, cm); + /* np->n_flag &= ~NMODIFIED; */ + } else { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td); + error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1); + VOP_UNLOCK(vp, 0, ap->a_td); + } + np->n_attrstamp = 0; + } + + error = nfs4_closerpc(vp, ap->a_cred, ap->a_td, ap->a_fflag); + + if (!error && np->n_flag & NWRITEERR) { + np->n_flag &= ~NWRITEERR; + error = np->n_error; + } + return (error); +} + +/* + * nfs getattr call from vfs. + */ +static int +nfs4_getattr(struct vop_getattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_oparg_getattr ga; + struct nfs4_compound cp; + + /* + * Update local times for special files. + */ + if (np->n_flag & (NACC | NUPD)) + np->n_flag |= NCHG; + /* + * First look in the cache. + */ + if (nfs_getattrcache(vp, ap->a_vap) == 0) + return (0); + + nfsstats.rpccnt[NFSPROC_GETATTR]++; + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, NFSX_FH(1)); + mb = mreq; + bpos = mtod(mb, caddr_t); + + ga.bm = &nfsv4_getattrbm; + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_getattr()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_getattr(&cp, &ga); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, ap->a_td, ap->a_cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_getattr(&cp, &ga); + + nfs4_vnop_loadattrcache(vp, &ga.fa, ap->a_vap); + +nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + return (error); +} + +/* + * nfs setattr call. + */ +static int +nfs4_setattr(struct vop_setattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct vattr *vap = ap->a_vap; + int error = 0; + u_quad_t tsize; + +#ifndef nolint + tsize = (u_quad_t)0; +#endif + + /* + * Setting of flags is not supported. + */ + if (vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + + /* + * Disallow write attempts if the filesystem is mounted read-only. + */ + if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && + (vp->v_mount->mnt_flag & MNT_RDONLY)) + return (EROFS); + if (vap->va_size != VNOVAL) { + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VCHR: + case VBLK: + case VSOCK: + case VFIFO: + if (vap->va_mtime.tv_sec == VNOVAL && + vap->va_atime.tv_sec == VNOVAL && + vap->va_mode == (mode_t)VNOVAL && + vap->va_uid == (uid_t)VNOVAL && + vap->va_gid == (gid_t)VNOVAL) + return (0); + vap->va_size = VNOVAL; + break; + default: + /* + * Disallow write attempts if the filesystem is + * mounted read-only. + */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + /* + * We run vnode_pager_setsize() early (why?), + * we must set np->n_size now to avoid vinvalbuf + * V_SAVE races that might setsize a lower + * value. + */ + + tsize = np->n_size; + error = nfs_meta_setsize(vp, ap->a_cred, + ap->a_td, vap->va_size); + + if (np->n_flag & NMODIFIED) { + if (vap->va_size == 0) + error = nfs_vinvalbuf(vp, 0, + ap->a_cred, ap->a_td, 1); + else + error = nfs_vinvalbuf(vp, V_SAVE, + ap->a_cred, ap->a_td, 1); + if (error) { + vnode_pager_setsize(vp, np->n_size); + return (error); + } + } + /* + * np->n_size has already been set to vap->va_size + * in nfs_meta_setsize(). We must set it again since + * nfs_loadattrcache() could be called through + * nfs_meta_setsize() and could modify np->n_size. + */ + np->n_vattr.va_size = np->n_size = vap->va_size; + }; + } else if ((vap->va_mtime.tv_sec != VNOVAL || + vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && + vp->v_type == VREG && + (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, + ap->a_td, 1)) == EINTR) + return (error); + error = nfs4_setattrrpc(vp, vap, ap->a_cred, ap->a_td); + if (error && vap->va_size != VNOVAL) { + np->n_size = np->n_vattr.va_size = tsize; + vnode_pager_setsize(vp, np->n_size); + } + return (error); +} + +/* + * Do an nfs setattr rpc. + */ +static int +nfs4_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, + struct thread *td) +{ + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_compound cp; + struct nfs4_oparg_getattr ga; + struct nfsnode *np = VTONFS(vp); + struct nfs4_fctx *fcp = &np->n_wfc; + + nfsstats.rpccnt[NFSPROC_SETATTR]++; + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + ga.bm = &nfsv4_getattrbm; + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_setattrrpc"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_setattr(&cp, vap, fcp); + nfsm_v4build_getattr(&cp, &ga); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_setattr(&cp); + nfsm_v4dissect_getattr(&cp, &ga); + + nfs4_vnop_loadattrcache(vp, &ga.fa, NULL); + + /* XXX -- need to implement this in nfs4_setattr*/ + if (np->n_flag & NTRUNCATE) { + error = nfs4_closerpc(vp, cred, td, FWRITE); + np->n_flag &= ~NTRUNCATE; + } + +nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +/* + * nfs lookup call, one step at a time... + * First look in cache + * If not found, unlock the directory nfsnode and do the rpc + */ +static int +nfs4_lookup(struct vop_lookup_args *ap) +{ + struct componentname *cnp = ap->a_cnp; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + int isdot, flags = cnp->cn_flags; + struct vnode *newvp; + struct nfsmount *nmp; + caddr_t bpos, dpos; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + long len; + nfsfh_t *fhp; + struct nfsnode *np; + int lockparent, wantparent, error = 0, fhsize; + struct thread *td = cnp->cn_thread; + struct nfs4_compound cp; + struct nfs4_oparg_getattr ga, dga; + struct nfs4_oparg_lookup l; + struct nfs4_oparg_getfh gfh; + + *vpp = NULLVP; + cnp->cn_flags &= ~PDIRUNLOCK; + if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + if (dvp->v_type != VDIR) + return (ENOTDIR); + lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT|WANTPARENT); + nmp = VFSTONFS(dvp->v_mount); + np = VTONFS(dvp); + + isdot = cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.'; + + if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { + struct vattr vattr; + int vpid; + + if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { + *vpp = NULLVP; + return (error); + } + + vhold(*vpp); + newvp = *vpp; + vpid = newvp->v_id; + /* + * See the comment starting `Step through' in ufs/ufs_lookup.c + * for an explanation of the locking protocol + */ + if (dvp == newvp) { + VREF(newvp); + error = 0; + } else if (flags & ISDOTDOT) { + VOP_UNLOCK(dvp, 0, td); + cnp->cn_flags |= PDIRUNLOCK; + error = vget(newvp, LK_EXCLUSIVE, td); + if (!error && lockparent && (flags & ISLASTCN)) { + error = vn_lock(dvp, LK_EXCLUSIVE, td); + if (error == 0) + cnp->cn_flags &= ~PDIRUNLOCK; + } + } else { + error = vget(newvp, LK_EXCLUSIVE, td); + if (!lockparent || error || !(flags & ISLASTCN)) { + VOP_UNLOCK(dvp, 0, td); + cnp->cn_flags |= PDIRUNLOCK; + } + } + if (!error) { + if (vpid == newvp->v_id) { + if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td) + && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { + nfsstats.lookupcache_hits++; + if (cnp->cn_nameiop != LOOKUP && + (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + vdrop(newvp); + return (0); + } + cache_purge(newvp); + } + vput(newvp); + if (lockparent && dvp != newvp && (flags & ISLASTCN)) + VOP_UNLOCK(dvp, 0, td); + } + vdrop(newvp); + error = vn_lock(dvp, LK_EXCLUSIVE, td); + *vpp = NULLVP; + if (error) { + cnp->cn_flags |= PDIRUNLOCK; + return (error); + } + cnp->cn_flags &= ~PDIRUNLOCK; + } + + error = 0; + newvp = NULLVP; + nfsstats.lookupcache_misses++; + nfsstats.rpccnt[NFSPROC_LOOKUP]++; + + len = cnp->cn_namelen; + mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + ga.bm = &nfsv4_getattrbm; + dga.bm = &nfsv4_getattrbm; + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_lookup()"); + nfsm_v4build_putfh(&cp, dvp); + nfsm_v4build_getattr(&cp, &dga); + if (flags & ISDOTDOT) + nfsm_v4build_lookupp(&cp); + else if (!isdot) { + l.name = cnp->cn_nameptr; + l.namelen = len; + nfsm_v4build_lookup(&cp, &l); + } + nfsm_v4build_getattr(&cp, &ga); + nfsm_v4build_getfh(&cp, &gfh); + nfsm_v4build_finalize(&cp); + + nfsm_request(dvp, NFSV4PROC_COMPOUND, cnp->cn_thread, cnp->cn_cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_getattr(&cp, &dga); + if (flags & ISDOTDOT) + nfsm_v4dissect_lookupp(&cp); + else if (!isdot) + nfsm_v4dissect_lookup(&cp); + nfsm_v4dissect_getattr(&cp, &ga); + nfsm_v4dissect_getfh(&cp, &gfh); + + nfs4_vnop_loadattrcache(dvp, &dga.fa, NULL); + fhp = &gfh.fh_val; + fhsize = gfh.fh_len; + + /* + * Handle RENAME case... + */ + if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { + if (NFS_CMPFH(np, fhp, fhsize)) + return (EISDIR); + + error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); + if (error) + return (error); + + newvp = NFSTOV(np); + + nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); + + *vpp = newvp; + cnp->cn_flags |= SAVENAME; + if (!lockparent) { + VOP_UNLOCK(dvp, 0, td); + cnp->cn_flags |= PDIRUNLOCK; + } + return (0); + } + + if (flags & ISDOTDOT) { + VOP_UNLOCK(dvp, 0, td); + + error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); + if (error) { + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); + return (error); + } + newvp = NFSTOV(np); + + nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); + + if (lockparent && (flags & ISLASTCN)) { + error = vn_lock(dvp, LK_EXCLUSIVE, td); + if (error) { + cnp->cn_flags |= PDIRUNLOCK; + vput(newvp); + return (error); + } + } else + cnp->cn_flags |= PDIRUNLOCK; + } else if (NFS_CMPFH(np, fhp, fhsize)) { + VREF(dvp); + newvp = dvp; + } else { + error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); + if (error) + return (error); + + if (!lockparent || !(flags & ISLASTCN)) { + cnp->cn_flags |= PDIRUNLOCK; + VOP_UNLOCK(dvp, 0, td); + } + newvp = NFSTOV(np); + + /* Fill in np used by open. */ + np->n_dvp = dvp; + np->n_namelen = cnp->cn_namelen; + if (np->n_name != NULL) + FREE(np->n_name, M_NFSREQ); + MALLOC(np->n_name, u_char *, np->n_namelen + 1, M_NFSREQ, M_WAITOK); + bcopy(cnp->cn_nameptr, np->n_name, np->n_namelen); + np->n_name[np->n_namelen] = '\0'; + + nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); + } + + if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + if ((cnp->cn_flags & MAKEENTRY) && + (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { + np->n_ctime = np->n_vattr.va_ctime.tv_sec; + cache_enter(dvp, newvp, cnp); + } + *vpp = newvp; + m_freem(mrep); +nfsmout: + error = nfs_v4postop(&cp, error); + + if (error) { + if (newvp != NULLVP) { + vrele(newvp); + *vpp = NULLVP; + } + if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && + (flags & ISLASTCN) && error == ENOENT) { + if (!lockparent) { + VOP_UNLOCK(dvp, 0, td); + cnp->cn_flags |= PDIRUNLOCK; + } + if (dvp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else + error = EJUSTRETURN; + } + if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + } + + return (error); +} + +/* + * nfs read call. + * Just call nfs_bioread() to do the work. + */ +static int +nfs4_read(struct vop_read_args *ap) +{ + struct vnode *vp = ap->a_vp; + + switch (vp->v_type) { + case VREG: + return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); + case VDIR: + return (EISDIR); + default: + return (EOPNOTSUPP); + } +} + +/* + * nfs readlink call + */ +static int +nfs4_readlink(struct vop_readlink_args *ap) +{ + struct vnode *vp = ap->a_vp; + + if (vp->v_type != VLNK) + return (EINVAL); + return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); +} + +/* + * Do a readlink rpc. + * Called by nfs_doio() from below the buffer cache. + */ +int +nfs4_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) +{ + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_compound cp; + + nfsstats.rpccnt[NFSPROC_READLINK]++; + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_readlinkrpc()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_readlink(&cp); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_readlink(&cp, uiop); + +nfsmout: + error = nfs_v4postop(&cp, error); + + if (m_freem != NULL) + m_freem(mrep); + return (error); +} + +/* + * nfs read rpc call + * Ditto above + */ +int +nfs4_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) +{ + caddr_t bpos, dpos; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfsmount *nmp; + int error = 0, len, tsiz; + struct nfs4_compound cp; + struct nfs4_oparg_read read; + struct nfsnode *np = VTONFS(vp); + + nmp = VFSTONFS(vp->v_mount); + tsiz = uiop->uio_resid; + if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) + return (EFBIG); + + if (tsiz == 0) + return (0); + + read.uiop = uiop; + read.fcp = np->n_rfc.refcnt > 0 ? &np->n_rfc : &np->n_wfc; + + while (tsiz > 0) { + nfsstats.rpccnt[NFSPROC_READ]++; + len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; + + read.off = uiop->uio_offset; + read.maxcnt = len; + nfs_v4initcompound(&cp); + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_readrpc()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_read(&cp, &read); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); + if (error != 0) { + error = nfs_v4postop(&cp, error); + goto nfsmout; + } + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_read(&cp, &read); + + if (read.eof || read.retlen == 0) + tsiz = 0; + else + tsiz -= read.retlen; + + error = nfs_v4postop(&cp, error); + + m_freem(mrep); + mrep = NULL; + } +nfsmout: + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +/* + * nfs write call + */ +int +nfs4_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, + int *iomode, int *must_commit) +{ + int32_t backup; + caddr_t bpos, dpos; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + int error = 0, len, tsiz, wccflag = 1, rlen; + struct nfs4_compound cp; + struct nfs4_oparg_write write; + nfsv4stablehow commit, committed = NSHFILESYNC; + caddr_t verf; + struct nfsnode *np = VTONFS(vp); + +#ifndef DIAGNOSTIC + if (uiop->uio_iovcnt != 1) + panic("nfs: writerpc iovcnt > 1"); +#endif + *must_commit = 0; + tsiz = uiop->uio_resid; + if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) + return (EFBIG); + + if (tsiz == 0) + return (0); + + write.stable = (nfsv4stablehow)*iomode; + write.uiop = uiop; + write.fcp = &np->n_wfc; + + while (tsiz > 0) { + nfsstats.rpccnt[NFSPROC_WRITE]++; + len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; + + write.off = uiop->uio_offset; + write.cnt = len; + nfs_v4initcompound(&cp); + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_writerpc()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_write(&cp, &write); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); + if (error != 0) { + error = nfs_v4postop(&cp, error); + goto nfsmout; + } + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_write(&cp, &write); + + rlen = write.retlen; + if (rlen == 0) { + error = NFSERR_IO; + break; + } else if (rlen < len) { + backup = len - rlen; + (char *)uiop->uio_iov->iov_base -= backup; + uiop->uio_iov->iov_len += backup; + uiop->uio_offset -= backup; + uiop->uio_resid += backup; + len = rlen; + } + + commit = write.committed; + + if (committed == NSHFILESYNC || + (committed = NSHDATASYNC && commit == NSHUNSTABLE)) + committed = commit; + + verf = (caddr_t)write.wverf; + + if ((nmp->nm_flag & NFSSTA_HASWRITEVERF) == 0) { + bcopy(verf, nmp->nm_verf, NFSX_V4VERF); + nmp->nm_flag |= NFSMNT_HASWRITEVERF; + } else if (bcmp(verf, nmp->nm_verf, NFSX_V4VERF)) { + *must_commit = 1; + bcopy(verf, nmp->nm_verf, NFSX_V4VERF); + } + + /* XXX wccflag */ + if (wccflag) + VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; + + error = nfs_v4postop(&cp, error); + + m_freem(mrep); + mrep = NULL; + if (error) + break; + tsiz -= len; + } +nfsmout: + if (mrep != NULL) + m_freem(mrep); + *iomode = committed; + if (error) + uiop->uio_resid = tsiz; + return (error); +} + +/* ARGSUSED */ +static int +nfs4_mknod(struct vop_mknod_args *ap) +{ + struct vattr *vap = ap->a_vap; + struct vnode *newvp = NULL; + int error; + + error = nfs4_createrpc(ap->a_dvp, &newvp, + ap->a_cnp, (nfstype)vap->va_type, vap, NULL); + + /* XXX - is this actually referenced here? */ + if (error == 0) { + *ap->a_vpp = newvp; + vrele(newvp); + } + + return (error); +} + +static int +nfs4_createrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, + nfstype ftype, struct vattr *vap, char *linktarget) +{ + struct nfsnode *dnp = VTONFS(dvp); + struct nfsnode *np = NULL; + struct vnode *newvp = NULL; + struct nfs4_compound cp; + struct nfs4_oparg_create c; + struct nfs4_oparg_getattr ga; + struct nfs4_oparg_getfh gfh; + caddr_t bpos, dpos; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + int error = 0; + + nfsstats.rpccnt[NFSPROC_CREATE]++; + + mreq = nfsm_reqhead(dvp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + bzero(&c, sizeof(c)); + bzero(&ga, sizeof(ga)); + + c.type = ftype; + c.vap = vap; + c.linktext = linktarget; + c.name = cnp->cn_nameptr; + c.namelen = cnp->cn_namelen; + + ga.bm = &nfsv4_getattrbm; + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_createrpc()"); + nfsm_v4build_putfh(&cp, dvp); + nfsm_v4build_create(&cp, &c); + nfsm_v4build_getattr(&cp, &ga); + nfsm_v4build_getfh(&cp, &gfh); + nfsm_v4build_finalize(&cp); + + nfsm_request(dvp, NFSV4PROC_COMPOUND, cnp->cn_thread, cnp->cn_cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_create(&cp, &c); + nfsm_v4dissect_getattr(&cp, &ga); + nfsm_v4dissect_getfh(&cp, &gfh); + + error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np); + if (error != 0) + goto nfsmout; + + newvp = NFSTOV(np); + nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); + + if (cnp->cn_flags & MAKEENTRY) + cache_enter(dvp, newvp, cnp); + + dnp->n_flag |= NMODIFIED; + dnp->n_attrstamp = 0; + + nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + + /* XXX */ + /*FREE(cnp->cn_pnbuf, M_NAMEI);*/ + if (error != 0 && newvp != NULL) + vrele(newvp); + else if (error == 0) + *vpp = newvp; + + return (error); +} + +static int +nfs4_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, + struct vnode *tdvp, const char *tnameptr, int tnamelen, + struct ucred *cred, struct thread *td) +{ + + struct nfsnode *fnp = VTONFS(fdvp), *tnp = VTONFS(tdvp); + caddr_t bpos, dpos; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_compound cp; + struct nfs4_oparg_rename r; + int error = 0; + + nfsstats.rpccnt[NFSPROC_RENAME]++; + + r.fname = fnameptr; + r.fnamelen = fnamelen; + r.tname = tnameptr; + r.tnamelen = tnamelen; + nfs_v4initcompound(&cp); + + mreq = nfsm_reqhead(fdvp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_renamerpc()"); + nfsm_v4build_putfh(&cp, fdvp); + nfsm_v4build_savefh(&cp); + nfsm_v4build_putfh(&cp, tdvp); + nfsm_v4build_rename(&cp, &r); + nfsm_v4build_finalize(&cp); + + nfsm_request(fdvp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_savefh(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_rename(&cp); + + /* XXX should this always be performed? */ + fnp->n_flag |= NMODIFIED; + tnp->n_flag |= NMODIFIED; + fnp->n_attrstamp = tnp->n_attrstamp = 0; + + nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +/* + * nfs file create call + */ +static int +nfs4_create(struct vop_create_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vattr *vap = ap->a_vap; + struct nfsnode *dnp = VTONFS(dvp); + struct componentname *cnp = ap->a_cnp; + struct vnode *newvp = NULL; + int error = 0, fmode = (O_CREAT | FREAD | FWRITE); + struct vattr vattr; + + if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) + return (error); + + if (vap->va_vaflags & VA_EXCLUSIVE) + fmode |= O_EXCL; + + error = nfs4_openrpc(dvp, &newvp, cnp, fmode, vap); + if (error != 0) + goto out; + + VTONFS(newvp)->n_flag |= NCREATED; + + if (cnp->cn_flags & MAKEENTRY) + cache_enter(dvp, newvp, cnp); + + *ap->a_vpp = newvp; + + dnp->n_flag |= NMODIFIED; + dnp->n_attrstamp = 0; /* XXX; wccflag */ + + out: + return (error); +} + +/* + * nfs file remove call + * To try and make nfs semantics closer to ufs semantics, a file that has + * other processes using the vnode is renamed instead of removed and then + * removed later on the last close. + * - If v_usecount > 1 + * If a rename is not already in the works + * call nfs4_sillyrename() to set it up + * else + * do the remove rpc + */ +static int +nfs4_remove(struct vop_remove_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + struct nfsnode *np = VTONFS(vp); + int error = 0; + struct vattr vattr; + +#ifndef DIAGNOSTIC + if ((cnp->cn_flags & HASBUF) == 0) + panic("nfs4_remove: no name"); + if (vrefcnt(vp) < 1) + panic("nfs4_remove: bad v_usecount"); +#endif + if (vp->v_type == VDIR) + error = EPERM; + else if (vrefcnt(vp) == 1 || (np->n_sillyrename && + VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_thread) == 0 && + vattr.va_nlink > 1)) { + /* + * Purge the name cache so that the chance of a lookup for + * the name succeeding while the remove is in progress is + * minimized. Without node locking it can still happen, such + * that an I/O op returns ESTALE, but since you get this if + * another host removes the file.. + */ + cache_purge(vp); + /* + * throw away biocache buffers, mainly to avoid + * unnecessary delayed writes later. + */ + error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_thread, 1); + /* Do the rpc */ + if (error != EINTR) + error = nfs4_removerpc(dvp, cnp->cn_nameptr, + cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); + /* + * Kludge City: If the first reply to the remove rpc is lost.. + * the reply to the retransmitted request will be ENOENT + * since the file was in fact removed + * Therefore, we cheat and return success. + */ + if (error == ENOENT) + error = 0; + } else if (!np->n_sillyrename) + error = nfs4_sillyrename(dvp, vp, cnp); + np->n_attrstamp = 0; + return (error); +} + +/* + * nfs file remove rpc called from nfs_inactive + */ +int +nfs4_removeit(struct sillyrename *sp) +{ + /* + * Make sure that the directory vnode is still valid. + * XXX we should lock sp->s_dvp here. + */ + if (sp->s_dvp->v_type == VBAD) + return (0); + return (nfs4_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, + NULL)); +} + +/* + * Nfs remove rpc, called from nfs4_remove() and nfs4_removeit(). + */ +static int +nfs4_removerpc(struct vnode *dvp, const char *name, int namelen, + struct ucred *cred, struct thread *td) +{ + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_compound cp; + + nfsstats.rpccnt[NFSPROC_REMOVE]++; + + mreq = nfsm_reqhead(dvp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_removerpc()"); + nfsm_v4build_putfh(&cp, dvp); + nfsm_v4build_remove(&cp, name, namelen); + nfsm_v4build_finalize(&cp); + + nfsm_request(dvp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_remove(&cp); + + nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; /* XXX wccflag */ + + return (error); +} + +/* + * nfs file rename call + */ +static int +nfs4_rename(struct vop_rename_args *ap) +{ + struct vnode *fvp = ap->a_fvp; + struct vnode *tvp = ap->a_tvp; + struct vnode *fdvp = ap->a_fdvp; + struct vnode *tdvp = ap->a_tdvp; + struct componentname *tcnp = ap->a_tcnp; + struct componentname *fcnp = ap->a_fcnp; + int error; + + #ifndef DIAGNOSTIC + if ((tcnp->cn_flags & HASBUF) == 0 || + (fcnp->cn_flags & HASBUF) == 0) + panic("nfs4_rename: no name"); +#endif + /* Check for cross-device rename */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out; + } + + if (fvp == tvp) { + printf("nfs4_rename: fvp == tvp (can't happen)\n"); + error = 0; + goto out; + } + if ((error = vn_lock(fvp, LK_EXCLUSIVE, fcnp->cn_thread)) != 0) + goto out; + + /* + * We have to flush B_DELWRI data prior to renaming + * the file. If we don't, the delayed-write buffers + * can be flushed out later after the file has gone stale + * under NFSV3. NFSV2 does not have this problem because + * ( as far as I can tell ) it flushes dirty buffers more + * often. + */ + VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_thread); + VOP_UNLOCK(fvp, 0, fcnp->cn_thread); + if (tvp) + VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_thread); + + /* + * If the tvp exists and is in use, sillyrename it before doing the + * rename of the new file over it. + * XXX Can't sillyrename a directory. + */ + if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && + tvp->v_type != VDIR && !nfs4_sillyrename(tdvp, tvp, tcnp)) { + vput(tvp); + tvp = NULL; + } + + error = nfs4_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, + tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, + tcnp->cn_thread); + + if (fvp->v_type == VDIR) { + if (tvp != NULL && tvp->v_type == VDIR) + cache_purge(tdvp); + cache_purge(fdvp); + } + +out: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + /* + * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. + */ + if (error == ENOENT) + error = 0; + return (error); +} + +/* + * nfs file rename rpc called from nfs4_remove() above + */ +static int +nfs4_renameit(struct vnode *sdvp, struct componentname *scnp, + struct sillyrename *sp) +{ + return (nfs4_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, + sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); +} + +/* + * nfs hard link create call + */ +static int +nfs4_link(struct vop_link_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vnode *tdvp = ap->a_tdvp; + struct componentname *cnp = ap->a_cnp; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_compound cp; + struct nfs4_oparg_link l; + + if (vp->v_mount != tdvp->v_mount) { + return (EXDEV); + } + + /* + * Push all writes to the server, so that the attribute cache + * doesn't get "out of sync" with the server. + * XXX There should be a better way! + */ + VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_thread); + + nfsstats.rpccnt[NFSPROC_LINK]++; + + l.name = cnp->cn_nameptr; + l.namelen = cnp->cn_namelen; + nfs_v4initcompound(&cp); + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + nfsm_v4build_compound(&cp, "nfs4_link()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_savefh(&cp); + nfsm_v4build_putfh(&cp, tdvp); + nfsm_v4build_link(&cp, &l); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, cnp->cn_thread, cnp->cn_cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_savefh(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_link(&cp); + + VTONFS(tdvp)->n_flag |= NMODIFIED; + VTONFS(vp)->n_attrstamp = 0; + VTONFS(tdvp)->n_attrstamp = 0; + +nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + + return (error); +} + +/* + * nfs symbolic link create call + */ +static int +nfs4_symlink(struct vop_symlink_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + int error = 0; + struct vnode *newvp = NULL; + + nfsstats.rpccnt[NFSPROC_SYMLINK]++; + + error = nfs4_createrpc(ap->a_dvp, &newvp, ap->a_cnp, NFLNK, + ap->a_vap, ap->a_target); + + if (error != 0 && newvp != NULL) + vput(newvp); + else if (error == 0) + *ap->a_vpp = newvp; + + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; /* XXX wccflags */ + + return (error); +} + +/* + * nfs make dir call + */ +static int +nfs4_mkdir(struct vop_mkdir_args *ap) +{ + return (nfs4_createrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, NFDIR, + ap->a_vap, NULL)); +} + +/* + * nfs remove directory call + */ +static int +nfs4_rmdir(struct vop_rmdir_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + struct nfsnode *dnp = VTONFS(dvp); + struct componentname *cnp = ap->a_cnp; + int error = 0; + + if (dvp == vp) + return (EINVAL); + + error = (nfs4_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, + NULL)); + if (error) + return (error); + + dnp->n_flag |= NMODIFIED; + dnp->n_attrstamp = 0; + cache_purge(dvp); + cache_purge(vp); + + return (error); +} + +/* + * nfs readdir call + */ +static int +nfs4_readdir(struct vop_readdir_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct uio *uio = ap->a_uio; + int tresid, error; + struct vattr vattr; + + if (vp->v_type != VDIR) + return (EPERM); + /* + * First, check for hit on the EOF offset cache + */ + if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && + (np->n_flag & NMODIFIED) == 0) { + if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0 && + np->n_mtime == vattr.va_mtime.tv_sec) { + nfsstats.direofcache_hits++; + return (0); + } + } + + /* + * Call nfs_bioread() to do the real work. + */ + tresid = uio->uio_resid; + error = nfs_bioread(vp, uio, 0, ap->a_cred); + + if (!error && uio->uio_resid == tresid) + nfsstats.direofcache_misses++; + return (error); +} + +static u_char fty_to_dty[] = { + DT_UNKNOWN, /* NFNON */ + DT_REG, /* NFREG */ + DT_DIR, /* NFDIR */ + DT_BLK, /* NFBLK */ + DT_CHR, /* NFCHR */ + DT_LNK, /* NFLNK */ + DT_SOCK, /* NFSOCK */ + DT_FIFO, /* NFFIFO */ + DT_UNKNOWN, /* NFATTRDIT */ + DT_UNKNOWN, /* NFNAMEDATTR */ + DT_UNKNOWN, /* NFBAD */ +}; + +/* + * Readdir rpc call. + * Called from below the buffer cache by nfs_doio(). + */ +int +nfs4_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) +{ + int len, left; + struct dirent *dp = NULL; + u_int32_t *tl; + caddr_t p; + uint64_t *cookiep; + caddr_t bpos, dpos; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + uint64_t cookie; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + struct nfsnode *dnp = VTONFS(vp); + int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; + struct nfs4_compound cp; + struct nfs4_oparg_readdir readdir; + struct nfsv4_fattr fattr; + u_int fty; + +#ifndef DIAGNOSTIC + if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || + (uiop->uio_resid & (DIRBLKSIZ - 1))) + panic("nfs readdirrpc bad uio"); +#endif + + /* + * If there is no cookie, assume directory was stale. + */ + cookiep = nfs4_getcookie(dnp, uiop->uio_offset, 0); + if (cookiep) + cookie = *cookiep; + else + return (NFSERR_BAD_COOKIE); + + /* Generate fake entries for "." and ".." */ + while (cookie < 2 && bigenough) { + cookie++; + len = 4 + DIRHDSIZ; + + if (len > uiop->uio_resid) { + bigenough = 0; + break; + } + dp = (struct dirent *)uiop->uio_iov->iov_base; + + dp->d_namlen = cookie; + dp->d_reclen = len; + dp->d_type = DT_DIR; + if (cookie == 1) + dp->d_fileno = dnp->n_vattr.va_fileid; /* XXX has problems with pynfs virtualhandles */ + else + dp->d_fileno = dnp->n_dvp != NULL ? + VTONFS(dnp->n_dvp)->n_vattr.va_fileid : cookie; + + p = dp->d_name; + *p++ = '.'; + if (cookie == 2) + *p++ = '.'; + *p = '\0'; + + blksiz += len; + if (blksiz == DIRBLKSIZ) + blksiz = 0; + uiop->uio_offset += len; + uiop->uio_resid -= len; + (char *)uiop->uio_iov->iov_base += len; + uiop->uio_iov->iov_len -= len; + } + + if (cookie == 2) + cookie = 0; + + /* This is sort of ugly, to prevent v4postop() from acting weird */ + bzero(&cp, sizeof(cp)); + + /* + * Loop around doing readdir rpc's of size nm_readdirsize + * truncated to a multiple of DIRBLKSIZ. + * The stopping criteria is EOF or buffer full. + */ + /* + * XXX this is sort of ugly for nfsv4; we don't maintain the + * strict abstraction, but do the decoding inline. that's ok. + */ + while (more_dirs && bigenough) { + nfsstats.rpccnt[NFSPROC_READDIR]++; + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + readdir.cnt = nmp->nm_readdirsize; + readdir.cookie = cookie; + readdir.bm = &nfsv4_readdirbm; + if (cookie == 0) + bzero(&readdir.verf, sizeof(readdir.verf)); + else + bcopy(&dnp->n_cookieverf, &readdir.verf, + sizeof(readdir.verf)); + + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_readdirrpc()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_readdir(&cp, &readdir); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + + /* + * XXX - Readdir gets handled inline like in + * NFSv{2,3}. This is a nasty inconsistency and + * should be fixed. + */ + + tl = nfsm_dissect(uint32_t *, 5 * NFSX_UNSIGNED); + if (fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_READDIR) { + error = EBADRPC; + goto nfsmout; + } + if (fxdr_unsigned(uint32_t, *tl++) != 0) { + error = EBADRPC; + goto nfsmout; + } + + bcopy(tl, &dnp->n_cookieverf, NFSX_V4VERF); + tl += 2; + more_dirs = fxdr_unsigned(int, *tl++); + + /* loop thru the dir entries, doctoring them to 4bsd form */ + while (more_dirs && bigenough) { + tl = nfsm_dissect(uint32_t *, 3 * NFSX_UNSIGNED); + cookie = fxdr_hyper(tl); + tl += 2; + /* XXX cookie sanity check */ + len = fxdr_unsigned(int, *tl++); + if (len <= 0 || len > NFS_MAXNAMLEN) { + error = EBADRPC; + goto nfsmout; + } + tlen = nfsm_rndup(len); + if (tlen == len) + tlen += 4; /* To ensure null termination */ + left = DIRBLKSIZ - blksiz; + if ((tlen + DIRHDSIZ) > left) { + dp->d_reclen += left; + uiop->uio_iov->iov_base = + (char *)uiop->uio_iov->iov_base + left; + uiop->uio_iov->iov_len -= left; + uiop->uio_offset += left; + uiop->uio_resid -= left; + blksiz = 0; + } + if ((tlen + DIRHDSIZ) > uiop->uio_resid) + bigenough = 0; + if (bigenough) { + dp = (struct dirent *)uiop->uio_iov->iov_base; + + dp->d_namlen = len; + dp->d_reclen = tlen + DIRHDSIZ; + + blksiz += dp->d_reclen; + if (blksiz == DIRBLKSIZ) + blksiz = 0; + uiop->uio_offset += DIRHDSIZ; + uiop->uio_resid -= DIRHDSIZ; + uiop->uio_iov->iov_base = + (char *)uiop->uio_iov->iov_base + DIRHDSIZ; + uiop->uio_iov->iov_len -= DIRHDSIZ; + + /* Copy name */ + nfsm_mtouio(uiop, len); + p = uiop->uio_iov->iov_base; + tlen -= len; + *p = '\0'; /* null terminate */ + /* printf("nfs4_readdirrpc: name: \"%s\" cookie %d\n", + p - len, (int) cookie);*/ + uiop->uio_iov->iov_base = + (char *)uiop->uio_iov->iov_base + tlen; + uiop->uio_iov->iov_len -= tlen; + uiop->uio_offset += tlen; + uiop->uio_resid -= tlen; + + /* Copy attributes */ + nfsm_v4dissect_attrs(&fattr); + + dp->d_fileno = nfs_v4fileid4_to_fileid( + fattr.fa4_valid & FA4V_FILEID && + fattr.fa4_fileid ? + fattr.fa4_fileid : cookie); + + fty = (u_int)fattr.fa4_type; + dp->d_type = fattr.fa4_valid & FA4V_TYPE && + (fty < sizeof(fty_to_dty)) ? + fty_to_dty[fty] : DT_UNKNOWN; + } else + nfsm_adv(nfsm_rndup(len)); + + tl = nfsm_dissect(uint32_t *, NFSX_UNSIGNED); + more_dirs = fxdr_unsigned(int, *tl++); + } + /* + * If at end of rpc data, get the eof boolean + */ + if (!more_dirs) { + tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); + more_dirs = (fxdr_unsigned(int, *tl) == 0); + } + + error = nfs_v4postop(&cp, error); + + m_freem(mrep); + mrep = NULL; + } + /* + * Fill last record, iff any, out to a multiple of DIRBLKSIZ + * by increasing d_reclen for the last record. + */ + if (blksiz > 0) { + left = DIRBLKSIZ - blksiz; + dp->d_reclen += left; + uiop->uio_iov->iov_base = + (char *)uiop->uio_iov->iov_base + left; + uiop->uio_iov->iov_len -= left; + uiop->uio_offset += left; + uiop->uio_resid -= left; + } + + /* + * We are now either at the end of the directory or have filled the + * block. + */ + if (bigenough) + dnp->n_direofoffset = uiop->uio_offset; + else { + if (uiop->uio_resid > 0) + printf("EEK! readdirrpc resid > 0\n"); + cookiep = nfs4_getcookie(dnp, uiop->uio_offset, 1); + *cookiep = cookie; + } +nfsmout: + if (mrep != NULL) + m_freem(mrep); + return (error); +} + +/* + * Silly rename. To make the NFS filesystem that is stateless look a little + * more like the "ufs" a remove of an active vnode is translated to a rename + * to a funny looking filename that is removed by nfs_inactive on the + * nfsnode. There is the potential for another process on a different client + * to create the same funny name between the nfs_lookitup() fails and the + * nfs_rename() completes, but... + */ +static int +nfs4_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) +{ + struct sillyrename *sp; + struct nfsnode *np; + int error; + short pid; + + cache_purge(dvp); + np = VTONFS(vp); +#ifndef DIAGNOSTIC + if (vp->v_type == VDIR) + panic("nfs: sillyrename dir"); +#endif + MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), + M_NFSREQ, M_WAITOK); + sp->s_cred = crhold(cnp->cn_cred); + sp->s_dvp = dvp; + sp->s_removeit = nfs4_removeit; + VREF(dvp); + + /* Fudge together a funny name */ + pid = cnp->cn_thread->td_proc->p_pid; + sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid); + + /* Try lookitups until we get one that isn't there */ + while (nfs4_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, + cnp->cn_thread, NULL) == 0) { + sp->s_name[4]++; + if (sp->s_name[4] > 'z') { + error = EINVAL; + goto bad; + } + } + error = nfs4_renameit(dvp, cnp, sp); + if (error) + goto bad; + error = nfs4_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, + cnp->cn_thread, &np); + np->n_sillyrename = sp; + return (0); +bad: + vrele(sp->s_dvp); + crfree(sp->s_cred); + free((caddr_t)sp, M_NFSREQ); + return (error); +} + +/* + * Look up a file name and optionally either update the file handle or + * allocate an nfsnode, depending on the value of npp. + * npp == NULL --> just do the lookup + * *npp == NULL --> allocate a new nfsnode and make sure attributes are + * handled too + * *npp != NULL --> update the file handle in the vnode + */ +static int +nfs4_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, + struct thread *td, struct nfsnode **npp) +{ + struct vnode *newvp = NULL; + struct nfsnode *np, *dnp = VTONFS(dvp); + caddr_t bpos, dpos; + int error = 0, fhlen; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + nfsfh_t *nfhp; + struct nfs4_compound cp; + struct nfs4_oparg_lookup l; + struct nfs4_oparg_getfh gfh; + struct nfs4_oparg_getattr ga; + + nfsstats.rpccnt[NFSPROC_RENAME]++; + + mreq = nfsm_reqhead(dvp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + l.name = name; + l.namelen = len; + + nfs_v4initcompound(&cp); + + ga.bm = &nfsv4_getattrbm; + + nfsm_v4build_compound(&cp, "nfs4_renamerpc()"); + nfsm_v4build_putfh(&cp, dvp); + nfsm_v4build_lookup(&cp, &l); + nfsm_v4build_getfh(&cp, &gfh); + nfsm_v4build_getattr(&cp, &ga); + + nfsm_request(dvp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_lookup(&cp); + nfsm_v4dissect_getfh(&cp, &gfh); + nfsm_v4dissect_getattr(&cp, &ga); + + if (npp != NULL && error == 0) { + nfhp = &gfh.fh_val; + fhlen = gfh.fh_len; + + if (*npp != NULL) { + np = *npp; + if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { + free((caddr_t)np->n_fhp, M_NFSBIGFH); + np->n_fhp = &np->n_fh; + } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) + np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); + bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); + np->n_fhsize = fhlen; + newvp = NFSTOV(np); + } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { + VREF(dvp); + newvp = dvp; + } else { + error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); + if (error) { + m_freem(mrep); + return (error); + } + newvp = NFSTOV(np); + } + + if (newvp != dvp) { + np->n_dvp = dvp; + np->n_namelen = len; + if (np->n_name != NULL) + FREE(np->n_name, M_NFSREQ); + MALLOC(np->n_name, u_char *, + np->n_namelen + 1, M_NFSREQ, M_WAITOK); + memcpy(np->n_name, name, len); + np->n_name[len] = '\0'; + } + nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); + } + +nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep != NULL) + m_freem(mrep); + if (npp && *npp == NULL) { + if (error) { + if (newvp) { + if (newvp == dvp) + vrele(newvp); + else + vput(newvp); + } + } else + *npp = np; + } + + + return (error); +} + +/* + * Nfs Version 3 commit rpc + */ +int +nfs4_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, + struct thread *td) +{ + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep = NULL, *md, *mb; + struct nfs4_compound cp; + struct nfs4_oparg_commit commit; + + if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) + return (0); + nfsstats.rpccnt[NFSPROC_COMMIT]++; + + mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); + mb = mreq; + bpos = mtod(mb, caddr_t); + + commit.start = offset; + commit.len = cnt; + + nfs_v4initcompound(&cp); + + nfsm_v4build_compound(&cp, "nfs4_commit()"); + nfsm_v4build_putfh(&cp, vp); + nfsm_v4build_commit(&cp, &commit); + nfsm_v4build_finalize(&cp); + + nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); + if (error != 0) + goto nfsmout; + + nfsm_v4dissect_compound(&cp); + nfsm_v4dissect_putfh(&cp); + nfsm_v4dissect_commit(&cp, &commit); + + /* XXX */ + /* nfsm_wcc_data(vp, wccflag);*/ + if (bcmp(nmp->nm_verf, commit.verf, NFSX_V4VERF)) { + bcopy(commit.verf, nmp->nm_verf, NFSX_V4VERF); + error = NFSERR_STALEWRITEVERF; + } + +nfsmout: + error = nfs_v4postop(&cp, error); + + if (mrep == NULL) + m_freem(mrep); + return (error); +} + +/* + * Strategy routine. + * For async requests when nfsiod(s) are running, queue the request by + * calling nfs_asyncio(), otherwise just all nfs_doio() to do the + * request. + */ +static int +nfs4_strategy(struct vop_strategy_args *ap) +{ + struct buf *bp = ap->a_bp; + struct ucred *cr; + struct thread *td; + int error = 0; + + KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", + __func__, ap->a_vp, ap->a_bp->b_vp)); + KASSERT(!(bp->b_flags & B_DONE), ("nfs4_strategy: buffer %p unexpectedly marked B_DONE", bp)); + KASSERT(BUF_REFCNT(bp) > 0, ("nfs4_strategy: buffer %p not locked", bp)); + + if (bp->b_flags & B_ASYNC) + td = NULL; + else + td = curthread; /* XXX */ + + if (bp->b_iocmd == BIO_READ) + cr = bp->b_rcred; + else + cr = bp->b_wcred; + + /* + * If the op is asynchronous and an i/o daemon is waiting + * queue the request, wake it up and wait for completion + * otherwise just do it ourselves. + */ + if ((bp->b_flags & B_ASYNC) == 0 || + nfs_asyncio(bp, NOCRED, td)) + error = nfs_doio(bp, cr, td); + return (error); +} + +/* + * fsync vnode op. Just call nfs4_flush() with commit == 1. + */ +/* ARGSUSED */ +static int +nfs4_fsync(struct vop_fsync_args *ap) +{ + return (nfs4_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_td, 1)); +} + +/* + * Flush all the blocks associated with a vnode. + * Walk through the buffer pool and push any dirty pages + * associated with the vnode. + */ +static int +nfs4_flush(struct vnode *vp, struct ucred *cred, int waitfor, struct thread *td, + int commit) +{ + struct nfsnode *np = VTONFS(vp); + struct buf *bp; + int i; + struct buf *nbp; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; + int passone = 1; + u_quad_t off, endoff, toff; + struct ucred* wcred = NULL; + struct buf **bvec = NULL; +#ifndef NFS_COMMITBVECSIZ +#define NFS_COMMITBVECSIZ 20 +#endif + struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; + int bvecsize = 0, bveccount; + + if (nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; + if (!commit) + passone = 0; + /* + * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the + * server, but nas not been committed to stable storage on the server + * yet. On the first pass, the byte range is worked out and the commit + * rpc is done. On the second pass, nfs_writebp() is called to do the + * job. + */ +again: + off = (u_quad_t)-1; + endoff = 0; + bvecpos = 0; + if (NFS_ISV3(vp) && commit) { + s = splbio(); + if (bvec != NULL && bvec != bvec_on_stack) + free(bvec, M_TEMP); + /* + * Count up how many buffers waiting for a commit. + */ + bveccount = 0; + VI_LOCK(vp); + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if (BUF_REFCNT(bp) == 0 && + (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) + == (B_DELWRI | B_NEEDCOMMIT)) + bveccount++; + } + /* + * Allocate space to remember the list of bufs to commit. It is + * important to use M_NOWAIT here to avoid a race with nfs4_write. + * If we can't get memory (for whatever reason), we will end up + * committing the buffers one-by-one in the loop below. + */ + if (bveccount > NFS_COMMITBVECSIZ) { + /* + * Release the vnode interlock to avoid a lock + * order reversal. + */ + VI_UNLOCK(vp); + bvec = (struct buf **) + malloc(bveccount * sizeof(struct buf *), + M_TEMP, M_NOWAIT); + VI_LOCK(vp); + if (bvec == NULL) { + bvec = bvec_on_stack; + bvecsize = NFS_COMMITBVECSIZ; + } else + bvecsize = bveccount; + } else { + bvec = bvec_on_stack; + bvecsize = NFS_COMMITBVECSIZ; + } + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + if (bvecpos >= bvecsize) + break; + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + continue; + } + if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != + (B_DELWRI | B_NEEDCOMMIT)) { + BUF_UNLOCK(bp); + nbp = TAILQ_NEXT(bp, b_vnbufs); + continue; + } + VI_UNLOCK(vp); + bremfree(bp); + /* + * Work out if all buffers are using the same cred + * so we can deal with them all with one commit. + * + * NOTE: we are not clearing B_DONE here, so we have + * to do it later on in this routine if we intend to + * initiate I/O on the bp. + * + * Note: to avoid loopback deadlocks, we do not + * assign b_runningbufspace. + */ + if (wcred == NULL) + wcred = bp->b_wcred; + else if (wcred != bp->b_wcred) + wcred = NOCRED; + bp->b_flags |= B_WRITEINPROG; + vfs_busy_pages(bp, 1); + + VI_LOCK(vp); + /* + * bp is protected by being locked, but nbp is not + * and vfs_busy_pages() may sleep. We have to + * recalculate nbp. + */ + nbp = TAILQ_NEXT(bp, b_vnbufs); + + /* + * A list of these buffers is kept so that the + * second loop knows which buffers have actually + * been committed. This is necessary, since there + * may be a race between the commit rpc and new + * uncommitted writes on the file. + */ + bvec[bvecpos++] = bp; + toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + + bp->b_dirtyoff; + if (toff < off) + off = toff; + toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); + if (toff > endoff) + endoff = toff; + } + splx(s); + VI_UNLOCK(vp); + } + if (bvecpos > 0) { + /* + * Commit data on the server, as required. + * If all bufs are using the same wcred, then use that with + * one call for all of them, otherwise commit each one + * separately. + */ + if (wcred != NOCRED) + retv = nfs4_commit(vp, off, (int)(endoff - off), + wcred, td); + else { + retv = 0; + for (i = 0; i < bvecpos; i++) { + off_t off, size; + bp = bvec[i]; + off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + + bp->b_dirtyoff; + size = (u_quad_t)(bp->b_dirtyend + - bp->b_dirtyoff); + retv = nfs4_commit(vp, off, (int)size, + bp->b_wcred, td); + if (retv) break; + } + } + + if (retv == NFSERR_STALEWRITEVERF) + nfs_clearcommit(vp->v_mount); + + /* + * Now, either mark the blocks I/O done or mark the + * blocks dirty, depending on whether the commit + * succeeded. + */ + for (i = 0; i < bvecpos; i++) { + bp = bvec[i]; + bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK); + if (retv) { + /* + * Error, leave B_DELWRI intact + */ + vfs_unbusy_pages(bp); + brelse(bp); + } else { + /* + * Success, remove B_DELWRI ( bundirty() ). + * + * b_dirtyoff/b_dirtyend seem to be NFS + * specific. We should probably move that + * into bundirty(). XXX + */ + s = splbio(); + VI_LOCK(vp); + vp->v_numoutput++; + VI_UNLOCK(vp); + bp->b_flags |= B_ASYNC; + bundirty(bp); + bp->b_flags &= ~B_DONE; + bp->b_ioflags &= ~BIO_ERROR; + bp->b_dirtyoff = bp->b_dirtyend = 0; + splx(s); + bufdone(bp); + } + } + } + + /* + * Start/do any write(s) that are required. + */ +loop: + s = splbio(); + VI_LOCK(vp); + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { + if (waitfor != MNT_WAIT || passone) + continue; + + error = BUF_TIMELOCK(bp, + LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, + VI_MTX(vp), "nfsfsync", slpflag, slptimeo); + splx(s); + if (error == 0) + panic("nfs4_fsync: inconsistent lock"); + if (error == ENOLCK) + goto loop; + if (nfs4_sigintr(nmp, NULL, td)) { + error = EINTR; + goto done; + } + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + goto loop; + } + if ((bp->b_flags & B_DELWRI) == 0) + panic("nfs4_fsync: not dirty"); + if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { + BUF_UNLOCK(bp); + continue; + } + VI_UNLOCK(vp); + bremfree(bp); + if (passone || !commit) + bp->b_flags |= B_ASYNC; + else + bp->b_flags |= B_ASYNC | B_WRITEINPROG; + splx(s); + BUF_WRITE(bp); + goto loop; + } + splx(s); + if (passone) { + passone = 0; + VI_UNLOCK(vp); + goto again; + } + if (waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_iflag |= VI_BWAIT; + error = msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), + slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); + if (error) { + VI_UNLOCK(vp); + if (nfs4_sigintr(nmp, NULL, td)) { + error = EINTR; + goto done; + } + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + VI_LOCK(vp); + } + } + if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) { + VI_UNLOCK(vp); + goto loop; + } + } + VI_UNLOCK(vp); + if (np->n_flag & NWRITEERR) { + error = np->n_error; + np->n_flag &= ~NWRITEERR; + } +done: + if (bvec != NULL && bvec != bvec_on_stack) + free(bvec, M_TEMP); + return (error); +} + +/* + * NFS advisory byte-level locks. + */ +static int +nfs4_advlock(struct vop_advlock_args *ap) +{ + return (EPERM); + + if ((VFSTONFS(ap->a_vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { + struct nfsnode *np = VTONFS(ap->a_vp); + + return (lf_advlock(ap, &(np->n_lockf), np->n_size)); + } + return (nfs_dolock(ap)); +} + +/* + * Print out the contents of an nfsnode. + */ +static int +nfs4_print(struct vop_print_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + + printf("\tfileid %ld fsid 0x%x", + np->n_vattr.va_fileid, np->n_vattr.va_fsid); + if (vp->v_type == VFIFO) + fifo_printinfo(vp); + printf("\n"); + return (0); +} + +/* + * This is the "real" nfs::bwrite(struct buf*). + * B_WRITEINPROG isn't set unless the force flag is one and it + * handles the B_NEEDCOMMIT flag. + * We set B_CACHE if this is a VMIO buffer. + */ +int +nfs4_writebp(struct buf *bp, int force, struct thread *td) +{ + int s; + int oldflags = bp->b_flags; +#if 0 + int retv = 1; + off_t off; +#endif + + if (BUF_REFCNT(bp) == 0) + panic("bwrite: buffer is not locked???"); + + if (bp->b_flags & B_INVAL) { + brelse(bp); + return(0); + } + + bp->b_flags |= B_CACHE; + + /* + * Undirty the bp. We will redirty it later if the I/O fails. + */ + + s = splbio(); + bundirty(bp); + bp->b_flags &= ~B_DONE; + bp->b_ioflags &= ~BIO_ERROR; + bp->b_iocmd = BIO_WRITE; + + VI_LOCK(bp->b_vp); + bp->b_vp->v_numoutput++; + VI_UNLOCK(bp->b_vp); + curthread->td_proc->p_stats->p_ru.ru_oublock++; + splx(s); + + /* + * Note: to avoid loopback deadlocks, we do not + * assign b_runningbufspace. + */ + vfs_busy_pages(bp, 1); + + if (force) + bp->b_flags |= B_WRITEINPROG; + BUF_KERNPROC(bp); + bp->b_iooffset = dbtob(bp->b_blkno); + VOP_STRATEGY(bp->b_vp, bp); + + if( (oldflags & B_ASYNC) == 0) { + int rtval = bufwait(bp); + + if (oldflags & B_DELWRI) { + s = splbio(); + reassignbuf(bp, bp->b_vp); + splx(s); + } + + brelse(bp); + return (rtval); + } + + return (0); +} + +/* + * nfs special file access vnode op. + * Essentially just get vattr and then imitate iaccess() since the device is + * local to the client. + */ +static int +nfsspec_access(struct vop_access_args *ap) +{ + struct vattr *vap; + struct ucred *cred = ap->a_cred; + struct vnode *vp = ap->a_vp; + mode_t mode = ap->a_mode; + struct vattr vattr; + int error; + + /* + * Disallow write attempts on filesystems mounted read-only; + * unless the file is a socket, fifo, or a block or character + * device resident on the filesystem. + */ + if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } + vap = &vattr; + error = VOP_GETATTR(vp, vap, cred, ap->a_td); + if (error) + return (error); + return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, + mode, cred, NULL)); +} + +/* + * Read wrapper for special devices. + */ +static int +nfsspec_read(struct vop_read_args *ap) +{ + struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set access flag. + */ + np->n_flag |= NACC; + getnanotime(&np->n_atim); + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); +} + +/* + * Write wrapper for special devices. + */ +static int +nfsspec_write(struct vop_write_args *ap) +{ + struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set update flag. + */ + np->n_flag |= NUPD; + getnanotime(&np->n_mtim); + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); +} + +/* + * Close wrapper for special devices. + * + * Update the times on the nfsnode then do device close. + */ +static int +nfsspec_close(struct vop_close_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct vattr vattr; + + if (np->n_flag & (NACC | NUPD)) { + np->n_flag |= NCHG; + if (vrefcnt(vp) == 1 && + (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + VATTR_NULL(&vattr); + if (np->n_flag & NACC) + vattr.va_atime = np->n_atim; + if (np->n_flag & NUPD) + vattr.va_mtime = np->n_mtim; + (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td); + } + } + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); +} + +/* + * Read wrapper for fifos. + */ +static int +nfsfifo_read(struct vop_read_args *ap) +{ + struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set access flag. + */ + np->n_flag |= NACC; + getnanotime(&np->n_atim); + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); +} + +/* + * Write wrapper for fifos. + */ +static int +nfsfifo_write(struct vop_write_args *ap) +{ + struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set update flag. + */ + np->n_flag |= NUPD; + getnanotime(&np->n_mtim); + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); +} + +/* + * Close wrapper for fifos. + * + * Update the times on the nfsnode then do fifo close. + */ +static int +nfsfifo_close(struct vop_close_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct vattr vattr; + struct timespec ts; + + if (np->n_flag & (NACC | NUPD)) { + getnanotime(&ts); + if (np->n_flag & NACC) + np->n_atim = ts; + if (np->n_flag & NUPD) + np->n_mtim = ts; + np->n_flag |= NCHG; + if (vrefcnt(vp) == 1 && + (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + VATTR_NULL(&vattr); + if (np->n_flag & NACC) + vattr.va_atime = np->n_atim; + if (np->n_flag & NUPD) + vattr.va_mtime = np->n_mtim; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td); + (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td); + VOP_UNLOCK(vp, 0, ap->a_td); + } + } + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); +} + diff --git a/sys/nfs4client/nfs4m_subs.h b/sys/nfs4client/nfs4m_subs.h new file mode 100644 index 0000000..534c836 --- /dev/null +++ b/sys/nfs4client/nfs4m_subs.h @@ -0,0 +1,498 @@ +/* $FreeBSD$ */ +/* $Id: nfs4m_subs.h,v 1.36 2003/11/05 14:59:01 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +#ifndef _NFS4CLIENT_NFSM4_SUBS_H +#define _NFS4CLIENT_NFSM4_SUBS_H + +void nfsm_v4init(void); + +void nfsm_buildf_xx(struct mbuf **mb, caddr_t *bpos, char *fmt, ...); +int nfsm_dissectf_xx(struct mbuf **md, caddr_t *dpos, char *fmt, ...); + +int nfsm_v4build_compound_xx(struct nfs4_compound *, char *, + struct mbuf **, caddr_t *); +int nfsm_v4build_putfh_xx(struct nfs4_compound *, struct vnode *, + struct mbuf **, caddr_t *); +int nfsm_v4build_putfh_nv_xx(struct nfs4_compound *, struct nfs4_oparg_getfh *, + struct mbuf **, caddr_t *); +int nfsm_v4build_getattr_xx(struct nfs4_compound *, struct nfs4_oparg_getattr *, + struct mbuf **, caddr_t *); +int nfsm_v4build_finalize_xx(struct nfs4_compound *, struct mbuf **, caddr_t *); +int nfsm_v4build_getfh_xx(struct nfs4_compound *, struct nfs4_oparg_getfh *, + struct mbuf **, caddr_t *); +int nfsm_v4build_lookup_xx(struct nfs4_compound *, struct nfs4_oparg_lookup *, + struct mbuf **, caddr_t *); +int nfsm_v4build_setclientid_xx(struct nfs4_compound *, + struct nfs4_oparg_setclientid *, struct mbuf **, caddr_t *); +int nfsm_v4build_setclientid_confirm_xx(struct nfs4_compound *, + struct nfs4_oparg_setclientid *, struct mbuf **, caddr_t *); +int nfsm_v4build_close_xx(struct nfs4_compound *, struct nfs4_fctx *, + struct mbuf **, caddr_t *); +int nfsm_v4build_access_xx(struct nfs4_compound *, struct nfs4_oparg_access *, + struct mbuf **, caddr_t *); +int nfsm_v4build_open_xx(struct nfs4_compound *, struct nfs4_oparg_open *, + struct mbuf **, caddr_t *); +int nfsm_v4build_open_confirm_xx(struct nfs4_compound *, struct nfs4_oparg_open *, + struct mbuf **, caddr_t *); +int nfsm_v4build_read_xx(struct nfs4_compound *, struct nfs4_oparg_read *, + struct mbuf **, caddr_t *); +int nfsm_v4build_write_xx(struct nfs4_compound *, struct nfs4_oparg_write *, + struct mbuf **, caddr_t *); +int nfsm_v4build_commit_xx(struct nfs4_compound *, struct nfs4_oparg_commit *, + struct mbuf **, caddr_t *); +int nfsm_v4build_readdir_xx(struct nfs4_compound *, struct nfs4_oparg_readdir *, + struct mbuf **, caddr_t *); +int nfsm_v4build_renew_xx(struct nfs4_compound *, uint64_t, + struct mbuf **, caddr_t *); +int nfsm_v4build_setattr_xx(struct nfs4_compound *, struct vattr *, + struct nfs4_fctx *, struct mbuf **, caddr_t *); +int nfsm_v4build_create_xx(struct nfs4_compound *, struct nfs4_oparg_create *, + struct mbuf **, caddr_t *); +int nfsm_v4build_rename_xx(struct nfs4_compound *, struct nfs4_oparg_rename *, + struct mbuf **, caddr_t *); +int nfsm_v4build_link_xx(struct nfs4_compound *, struct nfs4_oparg_link *, + struct mbuf **, caddr_t *); +int nfsm_v4build_remove_xx(struct nfs4_compound *, const char *, u_int, + struct mbuf **, caddr_t *); + +int nfsm_v4dissect_compound_xx(struct nfs4_compound *, struct mbuf **, caddr_t *); +int nfsm_v4dissect_getattr_xx(struct nfs4_compound *, struct nfs4_oparg_getattr *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_getfh_xx(struct nfs4_compound *, struct nfs4_oparg_getfh *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_setclientid_xx(struct nfs4_compound *, + struct nfs4_oparg_setclientid *, struct mbuf **, caddr_t *); +int nfsm_v4dissect_close_xx(struct nfs4_compound *, struct nfs4_fctx *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_access_xx(struct nfs4_compound *, struct nfs4_oparg_access *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_open_xx(struct nfs4_compound *, struct nfs4_oparg_open *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_open_confirm_xx(struct nfs4_compound *, struct nfs4_oparg_open *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_read_xx(struct nfs4_compound *, struct nfs4_oparg_read *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_write_xx(struct nfs4_compound *, struct nfs4_oparg_write *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_commit_xx(struct nfs4_compound *, struct nfs4_oparg_commit *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_setattr_xx(struct nfs4_compound *, struct mbuf **, caddr_t *); +int nfsm_v4dissect_create_xx(struct nfs4_compound *, struct nfs4_oparg_create *, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_readlink_xx(struct nfs4_compound *, struct uio *, + struct mbuf **, caddr_t *); + +int nfsm_v4dissect_attrs_xx(struct nfsv4_fattr *, struct mbuf **, caddr_t *); + +int nfsm_v4build_simple_xx(struct nfs4_compound *, uint32_t, + struct mbuf **, caddr_t *); +int nfsm_v4dissect_simple_xx(struct nfs4_compound *, uint32_t, + uint32_t, struct mbuf **, caddr_t *); + +#define nfsm_v4build_putrootfh_xx(cp, mb, bpos) \ + nfsm_v4build_simple_xx((cp), NFSV4OP_PUTROOTFH, (mb), (bpos)) + +#define nfsm_v4build_lookupp_xx(cp, mb, bpos) \ + nfsm_v4build_simple_xx((cp), NFSV4OP_LOOKUPP, (mb), (bpos)) + +#define nfsm_v4build_savefh_xx(cp, mb, bpos) \ + nfsm_v4build_simple_xx((cp), NFSV4OP_SAVEFH, (mb), (bpos)) + +#define nfsm_v4build_readlink_xx(cp, mb, bpos) \ + nfsm_v4build_simple_xx((cp), NFSV4OP_READLINK, (mb), (bpos)) + + +#define nfsm_v4dissect_putrootfh_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_PUTROOTFH, 0, (mb), (bpos)) + +#define nfsm_v4dissect_lookup_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_LOOKUP, 0, (mb), (bpos)) + +#define nfsm_v4dissect_lookupp_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_LOOKUPP, 0, (mb), (bpos)) + +#define nfsm_v4dissect_putfh_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_PUTFH, 0, (mb), (bpos)) + +#define nfsm_v4dissect_renew_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_RENEW, 0, (mb), (bpos)) + +#define nfsm_v4dissect_setclientid_confirm_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_SETCLIENTID_CONFIRM, 0, (mb), (bpos)) + +#define nfsm_v4dissect_rename_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_RENAME, 0, (mb), (bpos)) + +#define nfsm_v4dissect_link_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_LINK, 0, (mb), (bpos)) + +#define nfsm_v4dissect_savefh_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_SAVEFH, 0, (mb), (bpos)) + +#define nfsm_v4dissect_remove_xx(cp, mb, bpos) \ + nfsm_v4dissect_simple_xx((cp), NFSV4OP_REMOVE, 0, (mb), (bpos)) + +#define nfsm_v4build_compound(cp, tag) do { \ + int32_t t1; \ + t1 = nfsm_v4build_compound_xx((cp), (tag), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_finalize(cp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_finalize_xx((cp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_putfh(cp, vp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_putfh_xx((cp), (vp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_putfh_nv(cp, gfh) do { \ + int32_t t1; \ + t1 = nfsm_v4build_putfh_nv_xx((cp), (gfh), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_putrootfh(cp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_putrootfh_xx((cp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_getattr(cp, ga) do { \ + int32_t t1; \ + t1 = nfsm_v4build_getattr_xx((cp), (ga), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_setattr(cp, vap, fcp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_setattr_xx((cp), (vap), (fcp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_lookup(cp, l) do { \ + int32_t t1; \ + t1 = nfsm_v4build_lookup_xx((cp), (l), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_lookupp(cp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_lookupp_xx((cp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_getfh(cp, gfh) do { \ + int32_t t1; \ + t1 = nfsm_v4build_getfh_xx((cp), (gfh), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_close(cp, fcp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_close_xx((cp), (fcp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_access(cp, acc) do { \ + int32_t t1; \ + t1 = nfsm_v4build_access_xx((cp), (acc), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_open(cp, o) do { \ + int32_t t1; \ + t1 = nfsm_v4build_open_xx((cp), (o), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_open_confirm(cp, o) do { \ + int32_t t1; \ + t1 = nfsm_v4build_open_confirm_xx((cp), (o), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_read(cp, r) do { \ + int32_t t1; \ + t1 = nfsm_v4build_read_xx((cp), (r), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_write(cp, w) do { \ + int32_t t1; \ + t1 = nfsm_v4build_write_xx((cp), (w), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_commit(cp, c) do { \ + int32_t t1; \ + t1 = nfsm_v4build_commit_xx((cp), (c), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_readdir(cp, r) do { \ + int32_t t1; \ + t1 = nfsm_v4build_readdir_xx((cp), (r), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_renew(cp, cid) do { \ + int32_t t1; \ + t1 = nfsm_v4build_renew_xx((cp), (cid), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_setclientid(cp, cid) do { \ + int32_t t1; \ + t1 = nfsm_v4build_setclientid_xx((cp), (cid), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_setclientid_confirm(cp, cid) do { \ + int32_t t1; \ + t1 = nfsm_v4build_setclientid_confirm_xx((cp), (cid), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_create(cp, c) do { \ + int32_t t1; \ + t1 = nfsm_v4build_create_xx((cp), (c), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_rename(cp, r) do { \ + int32_t t1; \ + t1 = nfsm_v4build_rename_xx((cp), (r), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_link(cp, r) do { \ + int32_t t1; \ + t1 = nfsm_v4build_link_xx((cp), (r), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_savefh(cp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_savefh_xx((cp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_readlink(cp) do { \ + int32_t t1; \ + t1 = nfsm_v4build_readlink_xx((cp), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +#define nfsm_v4build_remove(cp, name, namelen) do { \ + int32_t t1; \ + t1 = nfsm_v4build_remove_xx((cp), (name), (namelen), &mb, &bpos); \ + nfsm_bcheck(t1, mreq); \ +} while (0) + +/* --- */ + +#define nfsm_v4dissect_compound(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_compound_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_putfh(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_putfh_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_putrootfh(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_putrootfh_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_getattr(cp, ga) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_getattr_xx((cp), (ga), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_setattr(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_setattr_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_lookup(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_lookup_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_lookupp(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_lookupp_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_getfh(cp, gfh) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_getfh_xx((cp), (gfh), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_setclientid(cp, sci) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_setclientid_xx((cp), (sci), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_setclientid_confirm(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_setclientid_confirm_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_close(cp, fcp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_close_xx((cp), (fcp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_access(cp, acc) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_access_xx((cp), (acc), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_open(cp, openp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_open_xx((cp), (openp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_open_confirm(cp, openp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_open_confirm_xx((cp), (openp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_read(cp, r) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_read_xx((cp), (r), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_write(cp, w) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_write_xx((cp), (w), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_commit(cp, c) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_commit_xx((cp), (c), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_attrs(fattr) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_attrs_xx((fattr), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_renew(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_renew_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_create(cp, c) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_create_xx((cp), (c), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_rename(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_rename_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_link(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_link_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_savefh(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_savefh_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_readlink(cp, uiop) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_readlink_xx((cp), (uiop), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#define nfsm_v4dissect_remove(cp) \ +do { \ + int32_t t1; \ + t1 = nfsm_v4dissect_remove_xx((cp), &md, &dpos); \ + nfsm_dcheck(t1, mrep); \ +} while (0) + +#endif /* _NFS4CLIENT_NFSM4_SUBS_H */ diff --git a/sys/nfsclient/bootp_subr.c b/sys/nfsclient/bootp_subr.c index 51a618e..c4d9eea 100644 --- a/sys/nfsclient/bootp_subr.c +++ b/sys/nfsclient/bootp_subr.c @@ -65,6 +65,8 @@ __FBSDID("$FreeBSD$"); #include <net/if_types.h> #include <net/if_dl.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> diff --git a/sys/nfsclient/krpc_subr.c b/sys/nfsclient/krpc_subr.c index b12bbc6..9ecee2c 100644 --- a/sys/nfsclient/krpc_subr.c +++ b/sys/nfsclient/krpc_subr.c @@ -57,6 +57,8 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <netinet/in.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfsclient/krpc.h> #include <nfs/xdr_subs.h> diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h index f231fb9..f09fd65 100644 --- a/sys/nfsclient/nfs.h +++ b/sys/nfsclient/nfs.h @@ -242,13 +242,31 @@ extern int nfs_debug; #endif +/* + * File context information for nfsv4. Currently, there is only one + * lockowner for the whole machine "0." + */ +struct nfs4_fctx { + TAILQ_ENTRY(nfs4_fstate) next; + + pid_t pid; + uint32_t refcnt; + struct nfs4_lowner *lop; + struct nfsnode *np; + char stateid[NFSX_V4STATEID]; +}; + vfs_init_t nfs_init; vfs_uninit_t nfs_uninit; int nfs_mountroot(struct mount *mp, struct thread *td); + +#ifndef NFS4_USE_RPCCLNT int nfs_send(struct socket *, struct sockaddr *, struct mbuf *, struct nfsreq *); int nfs_sndlock(struct nfsreq *); void nfs_sndunlock(struct nfsreq *); +#endif /* ! NFS4_USE_RPCCLNT */ + int nfs_vinvalbuf(struct vnode *, int, struct ucred *, struct thread *, int); int nfs_readrpc(struct vnode *, struct uio *, struct ucred *); @@ -271,6 +289,7 @@ int nfsm_mbuftouio(struct mbuf **, struct uio *, int, caddr_t *); void nfs_nhinit(void); int nfs_nmcancelreqs(struct nfsmount *); void nfs_timer(void*); + int nfs_connect(struct nfsmount *, struct nfsreq *); void nfs_disconnect(struct nfsmount *); void nfs_safedisconnect(struct nfsmount *); diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 442acbc..58ef852 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -58,24 +58,40 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_pager.h> #include <vm/vnode_pager.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> #include <nfsclient/nfsmount.h> #include <nfsclient/nfsnode.h> +#include <nfs4client/nfs4.h> + /* * Just call nfs_writebp() with the force argument set to 1. * * NOTE: B_DONE may or may not be set in a_bp on call. */ static int +nfs4_bwrite(struct buf *bp) +{ + + return (nfs4_writebp(bp, 1, curthread)); +} + +static int nfs_bwrite(struct buf *bp) { return (nfs_writebp(bp, 1, curthread)); } +struct buf_ops buf_ops_nfs4 = { + "buf_ops_nfs4", + nfs4_bwrite +}; + struct buf_ops buf_ops_nfs = { "buf_ops_nfs", nfs_bwrite @@ -118,6 +134,7 @@ nfs_getpages(struct vop_getpages_args *ap) if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + /* We'll never get here for v4, because we always have fsinfo */ (void)nfs_fsinfo(nmp, vp, cred, td); } @@ -170,7 +187,10 @@ nfs_getpages(struct vop_getpages_args *ap) uio.uio_rw = UIO_READ; uio.uio_td = td; - error = nfs_readrpc(vp, &uio, cred); + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) + error = nfs4_readrpc(vp, &uio, cred); + else + error = nfs_readrpc(vp, &uio, cred); pmap_qremove(kva, npages); relpbuf(bp, &nfs_pbuf_freecnt); @@ -332,7 +352,10 @@ nfs_putpages(struct vop_putpages_args *ap) else iomode = NFSV3WRITE_FILESYNC; - error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit); + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) + error = nfs4_writerpc(vp, &uio, cred, &iomode, &must_commit); + else + error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit); pmap_qremove(kva, npages); relpbuf(bp, &nfs_pbuf_freecnt); @@ -837,7 +860,10 @@ again: allocbuf(bp, bcount); bp->b_flags |= save; bp->b_magic = B_MAGIC_NFS; - bp->b_op = &buf_ops_nfs; + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) + bp->b_op = &buf_ops_nfs4; + else + bp->b_op = &buf_ops_nfs; } } else { /* @@ -996,7 +1022,10 @@ again: break; } else if ((n + on) == biosize) { bp->b_flags |= B_ASYNC; - (void)nfs_writebp(bp, 0, 0); + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) + (void)nfs4_writebp(bp, 0, 0); + else + (void)nfs_writebp(bp, 0, 0); } else { bdwrite(bp); } @@ -1339,13 +1368,17 @@ nfs_doio(struct buf *bp, struct ucred *cr, struct thread *td) case VDIR: nfsstats.readdir_bios++; uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; - if (nmp->nm_flag & NFSMNT_RDIRPLUS) { - error = nfs_readdirplusrpc(vp, uiop, cr); - if (error == NFSERR_NOTSUPP) - nmp->nm_flag &= ~NFSMNT_RDIRPLUS; + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) + error = nfs4_readdirrpc(vp, uiop, cr); + else { + if ((nmp->nm_flag & NFSMNT_RDIRPLUS) != 0) { + error = nfs_readdirplusrpc(vp, uiop, cr); + if (error == NFSERR_NOTSUPP) + nmp->nm_flag &= ~NFSMNT_RDIRPLUS; + } + if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) + error = nfs_readdirrpc(vp, uiop, cr); } - if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) - error = nfs_readdirrpc(vp, uiop, cr); /* * end-of-directory sets B_INVAL but does not generate an * error. diff --git a/sys/nfsclient/nfs_diskless.c b/sys/nfsclient/nfs_diskless.c index 4e09bde..e2c199a 100644 --- a/sys/nfsclient/nfs_diskless.c +++ b/sys/nfsclient/nfs_diskless.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <net/if_var.h> #include <net/ethernet.h> #include <netinet/in.h> +#include <rpc/rpcclnt.h> #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> diff --git a/sys/nfsclient/nfs_lock.c b/sys/nfsclient/nfs_lock.c index 374bbf9..88b9609 100644 --- a/sys/nfsclient/nfs_lock.c +++ b/sys/nfsclient/nfs_lock.c @@ -51,6 +51,8 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c index c33b950..6ebd053 100644 --- a/sys/nfsclient/nfs_nfsiod.c +++ b/sys/nfsclient/nfs_nfsiod.c @@ -66,6 +66,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/tcp.h> + +#include <rpc/rpcclnt.h> + #include <nfs/xdr_subs.h> #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c index fc3520f..3dd3d03 100644 --- a/sys/nfsclient/nfs_node.c +++ b/sys/nfsclient/nfs_node.c @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); #include <vm/uma.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> @@ -182,7 +184,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp) /* * Calculate nfs mount point and figure out whether the rslock should - * be interruptable or not. + * be interruptible or not. */ nmp = VFSTONFS(mntp); if (nmp->nm_flag & NFSMNT_INT) @@ -226,7 +228,10 @@ loop: */ np = uma_zalloc(nfsnode_zone, M_WAITOK); - error = getnewvnode("nfs", mntp, nfs_vnodeop_p, &nvp); + if (nmp->nm_flag & NFSMNT_NFSV4) + error = getnewvnode("nfs4", mntp, nfs4_vnodeop_p, &nvp); + else + error = getnewvnode("nfs", mntp, nfs_vnodeop_p, &nvp); if (error) { if (nfs_node_hash_lock < 0) wakeup(&nfs_node_hash_lock); @@ -295,7 +300,7 @@ nfs_inactive(struct vop_inactive_args *ap) /* * Remove the silly file that was rename'd earlier */ - nfs_removeit(sp); + (sp->s_removeit)(sp); crfree(sp->s_cred); vrele(sp->s_dvp); FREE((caddr_t)sp, M_NFSREQ); diff --git a/sys/nfsclient/nfs_socket.c b/sys/nfsclient/nfs_socket.c index 9c82759..d74ae8e 100644 --- a/sys/nfsclient/nfs_socket.c +++ b/sys/nfsclient/nfs_socket.c @@ -65,6 +65,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/tcp.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> @@ -73,6 +75,8 @@ __FBSDID("$FreeBSD$"); #include <nfsclient/nfsmount.h> #include <nfsclient/nfsnode.h> +#include <nfs4client/nfs4.h> + #define TRUE 1 #define FALSE 0 @@ -874,6 +878,8 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum, return (ESTALE); } nmp = VFSTONFS(vp->v_mount); + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) + return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp); MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); rep->r_nmp = nmp; rep->r_vp = vp; @@ -1235,6 +1241,8 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td) struct proc *p; sigset_t tmpset; + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) + return nfs4_sigintr(nmp, rep, td); if (rep && (rep->r_flags & R_SOFTTERM)) return (EINTR); /* Terminate all requests while attempting a forced unmount. */ diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c index 7b8866e..8d4a3df 100644 --- a/sys/nfsclient/nfs_subs.c +++ b/sys/nfsclient/nfs_subs.c @@ -67,6 +67,8 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <vm/uma.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> @@ -761,6 +763,10 @@ nfs_invaldir(struct vnode *vp) if (vp->v_type != VDIR) panic("nfs: invaldir not dir"); #endif + if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NFSV4) != 0) { + nfs4_invaldir(vp); + return; + } np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index b53a200..7ee48ab 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -66,6 +66,8 @@ __FBSDID("$FreeBSD$"); #include <net/route.h> #include <netinet/in.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c index 692ab89..60acab0 100644 --- a/sys/nfsclient/nfs_vnops.c +++ b/sys/nfsclient/nfs_vnops.c @@ -69,6 +69,8 @@ __FBSDID("$FreeBSD$"); #include <fs/fifofs/fifo.h> +#include <rpc/rpcclnt.h> + #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> #include <nfsclient/nfs.h> @@ -2397,6 +2399,7 @@ nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) M_NFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; + sp->s_removeit = nfs_removeit; VREF(dvp); /* Fudge together a funny name */ diff --git a/sys/nfsclient/nfsargs.h b/sys/nfsclient/nfsargs.h index 7f4c636..5cd0e9e 100644 --- a/sys/nfsclient/nfsargs.h +++ b/sys/nfsclient/nfsargs.h @@ -95,5 +95,7 @@ struct nfs_args { #define NFSMNT_ACDIRMIN 0x00100000 #define NFSMNT_ACDIRMAX 0x00200000 #define NFSMNT_NOLOCKD 0x00400000 /* Locks are local */ +#define NFSMNT_NFSV4 0x00800000 /* Use NFS Version 4 protocol */ +#define NFSMNT_HASWRITEVERF 0x01000000 /* NFSv4 Write verifier */ #endif diff --git a/sys/nfsclient/nfsm_subs.h b/sys/nfsclient/nfsm_subs.h index f626dd4..ee9d215 100644 --- a/sys/nfsclient/nfsm_subs.h +++ b/sys/nfsclient/nfsm_subs.h @@ -147,6 +147,17 @@ do { \ } \ } while (0) +#define nfsm_request_mnt(n, t, p, c) \ +do { \ + error = nfs4_request_mnt((n), mreq, (t), (p), (c), &mrep, &md, &dpos); \ + if (error != 0) { \ + if (error & NFSERR_RETERR) \ + error &= ~NFSERR_RETERR; \ + else \ + goto nfsmout; \ + } \ +} while (0) + /* *********************************** */ /* Reply interpretation phase macros */ diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h index 8c531cb..485885a 100644 --- a/sys/nfsclient/nfsmount.h +++ b/sys/nfsclient/nfsmount.h @@ -50,8 +50,9 @@ struct nfsmount { int nm_state; /* Internal state flags */ struct mount *nm_mountp; /* Vfs structure for this filesystem */ int nm_numgrps; /* Max. size of groupslist */ - u_char nm_fh[NFSX_V3FHMAX]; /* File handle of root dir */ + u_char nm_fh[NFSX_V4FH]; /* File handle of root dir */ int nm_fhsize; /* Size of root file handle */ + struct rpcclnt nm_rpcclnt; /* rpc state */ struct socket *nm_so; /* Rpc socket */ int nm_sotype; /* Type of socket */ int nm_soproto; /* and protocol */ @@ -79,6 +80,12 @@ struct nfsmount { short nm_bufqwant; /* process wants to add to the queue */ int nm_bufqiods; /* number of iods processing queue */ u_int64_t nm_maxfilesize; /* maximum file size */ + + /* NFSv4 */ + uint64_t nm_clientid; + fsid_t nm_fsid; + u_int nm_lease_time; + time_t nm_last_renewal; }; #if defined(_KERNEL) diff --git a/sys/nfsclient/nfsnode.h b/sys/nfsclient/nfsnode.h index f74d99f..76019cd 100644 --- a/sys/nfsclient/nfsnode.h +++ b/sys/nfsclient/nfsnode.h @@ -51,6 +51,7 @@ struct sillyrename { struct ucred *s_cred; struct vnode *s_dvp; + int (*s_removeit)(struct sillyrename *sp); long s_namlen; char s_name[20]; }; @@ -68,9 +69,15 @@ struct sillyrename { struct nfsdmap { LIST_ENTRY(nfsdmap) ndm_list; int ndm_eocookie; - nfsuint64 ndm_cookies[NFSNUMCOOKIES]; + union { + nfsuint64 ndmu3_cookies[NFSNUMCOOKIES]; + uint64_t ndmu4_cookies[NFSNUMCOOKIES]; + } ndm_un1; }; +#define ndm_cookies ndm_un1.ndmu3_cookies +#define ndm4_cookies ndm_un1.ndmu4_cookies + /* * The nfsnode is the nfs equivalent to ufs's inode. Any similarity * is purely coincidental. @@ -99,11 +106,13 @@ struct nfsnode { time_t n_expiry; /* Lease expiry time */ nfsfh_t *n_fhp; /* NFS File Handle */ struct vnode *n_vnode; /* associated vnode */ + struct vnode *n_dvp; /* parent vnode */ struct lockf *n_lockf; /* Locking record of file */ int n_error; /* Save write error value */ union { struct timespec nf_atim; /* Special file times */ nfsuint64 nd_cookieverf; /* Cookie verifier (dir only) */ + u_char nd4_cookieverf[NFSX_V4VERF]; } n_un1; union { struct timespec nf_mtim; @@ -117,12 +126,21 @@ struct nfsnode { short n_flag; /* Flag for locking.. */ nfsfh_t n_fh; /* Small File Handle */ struct lock n_rslock; + struct nfs4_fctx n_rfc; + struct nfs4_fctx n_wfc; + /* + * The last component name is needed for the NFSv4 OPEN + * operation. + */ + u_char *n_name; + uint32_t n_namelen; }; #define n_atim n_un1.nf_atim #define n_mtim n_un2.nf_mtim #define n_sillyrename n_un3.nf_silly #define n_cookieverf n_un1.nd_cookieverf +#define n4_cookieverf n_un1.nd4_cookieverf #define n_direofoffset n_un2.nd_direof #define n_cookies n_un3.nd_cook @@ -137,6 +155,8 @@ struct nfsnode { #define NACC 0x0100 /* Special file accessed */ #define NUPD 0x0200 /* Special file updated */ #define NCHG 0x0400 /* Special file times changed */ +#define NCREATED 0x0800 /* Opened by nfs_create() */ +#define NTRUNCATE 0x1000 /* Opened by nfs_setattr() */ /* * Convert between nfsnode pointers and vnode pointers @@ -182,6 +202,10 @@ extern vop_t **fifo_nfsnodeop_p; extern vop_t **nfs_vnodeop_p; extern vop_t **spec_nfsnodeop_p; +extern vop_t **fifo_nfs4nodeop_p; +extern vop_t **nfs4_vnodeop_p; +extern vop_t **spec_nfs4nodeop_p; + /* * Prototypes for NFS vnode operations */ @@ -193,9 +217,12 @@ int nfs_reclaim(struct vop_reclaim_args *); /* other stuff */ int nfs_removeit(struct sillyrename *); +int nfs4_removeit(struct sillyrename *); int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **); nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int); +uint64_t *nfs4_getcookie(struct nfsnode *, off_t, int); void nfs_invaldir(struct vnode *); +void nfs4_invaldir(struct vnode *); #endif /* _KERNEL */ diff --git a/sys/rpc/rpcclnt.c b/sys/rpc/rpcclnt.c new file mode 100644 index 0000000..4f61f29 --- /dev/null +++ b/sys/rpc/rpcclnt.c @@ -0,0 +1,2083 @@ +/* $FreeBSD$ */ +/* $Id: rpcclnt.c,v 1.9 2003/11/05 14:59:03 rees Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +/* + * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of + * California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by Rick Macklem at + * The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. 2. + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. 3. All advertising + * materials mentioning features or use of this software must display the + * following acknowledgement: This product includes software developed by the + * University of California, Berkeley and its contributors. 4. Neither the + * name of the University nor the names of its contributors may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 + */ + +/* XXX: kill ugly debug strings */ +/* XXX: get rid of proct, as it is not even being used... (or keep it so v{2,3} + * can run, but clean it up! */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/syslog.h> +#include <sys/malloc.h> +#include <sys/uio.h> +#include <sys/lock.h> +#include <sys/signalvar.h> +#include <sys/sysent.h> +#include <sys/syscall.h> +#include <sys/sysctl.h> + +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/mutex.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include <nfs/rpcv2.h> + +#include <rpc/rpcm_subs.h> +#include <rpc/rpcclnt.h> + +/* memory management */ +#ifdef __OpenBSD__ +struct pool rpctask_pool; +struct pool rpcclnt_pool; +#define RPCTASKPOOL_LWM 10 +#define RPCTASKPOOL_HWM 40 +#else +static MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state"); +#endif + +#define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0) + +/* + * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean + * and mean deviation of rtt for the appropriate type of rpc for the frequent + * rpcs and a default for the others. The justification for doing "other" + * this way is that these rpcs happen so infrequently that timer est. would + * probably be stale. Also, since many of these rpcs are non-idempotent, a + * conservative timeout is desired. getattr, lookup - A+2D read, write - + * A+4D other - nm_timeo + */ +#define RPC_RTO(n, t) \ + ((t) == 0 ? (n)->rc_timeo : \ + ((t) < 3 ? \ + (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \ + ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1))) + +#define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\ + (r)->r_procnum) - 1] + +#define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\ + (r)->r_procnum) - 1] + + +/* + * There is a congestion window for outstanding rpcs maintained per mount + * point. The cwnd size is adjusted in roughly the way that: Van Jacobson, + * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM, + * August 1988. describes for TCP. The cwnd size is chopped in half on a + * retransmit timeout and incremented by 1/cwnd when each rpc reply is + * received and a full cwnd of rpcs is in progress. (The sent count and cwnd + * are scaled for integer arith.) Variants of "slow start" were tried and + * were found to be too much of a performance hit (ave. rtt 3 times larger), + * I suspect due to the large rtt that nfs rpcs have. + */ +#define RPC_CWNDSCALE 256 +#define RPC_MAXCWND (RPC_CWNDSCALE * 32) +static int rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,}; + +/* XXX ugly debug strings */ +#define RPC_ERRSTR_ACCEPTED_SIZE 6 +char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = { + "", /* no good message... */ + "remote server hasn't exported program.", + "remote server can't support version number.", + "program can't support procedure.", + "procedure can't decode params.", + "remote error. remote side memory allocation failure?" +}; + +char *rpc_errstr_denied[2] = { + "remote server doesnt support rpc version 2!", + "remote server authentication error." +}; + +#define RPC_ERRSTR_AUTH_SIZE 6 +char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = { + "", + "auth error: bad credential (seal broken).", + "auth error: client must begin new session.", + "auth error: bad verifier (seal broken).", + "auth error: verifier expired or replayed.", + "auth error: rejected for security reasons.", +}; + +/* + * Static data, mostly RPC constants in XDR form + */ +static u_int32_t rpc_reply, rpc_call, rpc_vers; + +/* + * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, + * rpc_autherr, rpc_auth_kerb; + */ + +static u_int32_t rpcclnt_xid = 0; +static u_int32_t rpcclnt_xid_touched = 0; +struct rpcstats rpcstats; +int rpcclnt_ticks; + +SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem"); + +SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries"); +SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request"); +SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts"); +SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected"); +SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid"); + + +#ifdef RPCCLNT_DEBUG +int rpcdebugon = 0; +SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages"); +#endif + +/* + * Queue head for rpctask's + */ +static +TAILQ_HEAD(, rpctask) rpctask_q; +struct callout_handle rpcclnt_timer_handle; + +#ifdef __OpenBSD__ +static int rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *); +static int rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, RPC_EXEC_CTX); +#else +static int rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *); +static int rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, RPC_EXEC_CTX); +#endif + +static int rpcclnt_msg(RPC_EXEC_CTX, const char *, char *); + +static int rpcclnt_reply(struct rpctask *, RPC_EXEC_CTX); +static void rpcclnt_timer(void *); +static int rpcclnt_sndlock(int *, struct rpctask *); +static void rpcclnt_sndunlock(int *); +static int rpcclnt_rcvlock(struct rpctask *); +static void rpcclnt_rcvunlock(int *); +#if 0 +void rpcclnt_realign(struct mbuf *, int); +#else +static void rpcclnt_realign(struct mbuf **, int); +#endif + +static struct mbuf *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, u_int32_t, int *, struct mbuf **, struct ucred *); +static int rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *); +static u_int32_t rpcclnt_proct(struct rpcclnt *, u_int32_t); +static int rpc_adv(struct mbuf **, caddr_t *, int, int); +static void rpcclnt_softterm(struct rpctask * task); + +static int rpcauth_buildheader(struct rpc_auth * auth, struct ucred *, struct mbuf **, caddr_t *); + +void +rpcclnt_init(void) +{ +#ifdef __OpenBSD__ + static struct timeout rpcclnt_timer_to; +#endif + + rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000; + if (rpcclnt_ticks < 1) + rpcclnt_ticks = 1; + rpcstats.rpcretries = 0; + rpcstats.rpcrequests = 0; + rpcstats.rpctimeouts = 0; + rpcstats.rpcunexpected = 0; + rpcstats.rpcinvalid = 0; + + /* + * rpc constants how about actually using more than one of these! + */ + + rpc_reply = txdr_unsigned(RPC_REPLY); + rpc_vers = txdr_unsigned(RPC_VER2); + rpc_call = txdr_unsigned(RPC_CALL); +#if 0 + rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); + rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); + rpc_mismatch = txdr_unsigned(RPC_MISMATCH); + rpc_autherr = txdr_unsigned(RPC_AUTHERR); + rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); + rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); +#endif + + /* initialize rpctask queue */ + TAILQ_INIT(&rpctask_q); + +#ifdef __OpenBSD__ + /* initialize pools */ + pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM, + "rpctask_p", NULL); + pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM); + pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM); + + pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL); + + /* initialize timers */ + timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to); + rpcclnt_timer(&rpcclnt_timer_to); +#else /* !__OpenBSD__ */ + rpcclnt_timer(NULL); + +#endif /* !__OpenBSD__ */ + + RPCDEBUG("rpc initialed"); + + return; +} + +void +rpcclnt_uninit(void) +{ + RPCDEBUG("uninit"); + untimeout(rpcclnt_timer, (void *)NULL, rpcclnt_timer_handle); + + /* XXX delete sysctl variables? */ +} + +int +rpcclnt_setup(clnt, program, addr, sotype, soproto, auth, max_read_size, max_write_size, flags) + struct rpcclnt * clnt; + struct rpc_program * program; + struct sockaddr * addr; + int sotype; + int soproto; + struct rpc_auth * auth; + int max_read_size; + int max_write_size; + int flags; +{ + if (clnt == NULL || program == NULL || addr == NULL || auth == NULL) + RPC_RETURN (EFAULT); + + if (program->prog_name == NULL) + RPC_RETURN (EFAULT); + clnt->rc_prog = program; + + clnt->rc_name = addr; + clnt->rc_sotype = sotype; + clnt->rc_soproto = soproto; + clnt->rc_auth = auth; + clnt->rc_rsize = max_read_size; + clnt->rc_wsize = max_write_size; + clnt->rc_flag = flags; + + clnt->rc_proctlen = 0; + clnt->rc_proct = NULL; + + RPC_RETURN (0); +} + +/* + * Initialize sockets and congestion for a new RPC connection. We do not free + * the sockaddr if error. + */ +int +rpcclnt_connect(rpc, td) + struct rpcclnt *rpc; + RPC_EXEC_CTX td; +{ + struct socket *so; + int s, error, rcvreserve, sndreserve; + struct sockaddr *saddr; + +#ifdef __OpenBSD__ + struct sockaddr_in *sin; + struct mbuf *m; +#else + struct sockaddr_in sin; + + int soarg; + struct sockopt opt; +#endif + + if (rpc == NULL) { + RPCDEBUG("no rpcclnt struct!\n"); + RPC_RETURN(EFAULT); + } + + GIANT_REQUIRED; /* XXX until socket locking done */ + + /* create the socket */ + rpc->rc_so = NULL; + + saddr = rpc->rc_name; + + error = socreate(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype, + rpc->rc_soproto, td->td_ucred, td); + + if (error) { + RPCDEBUG("error %d in socreate()", error); + RPC_RETURN(error); + } + so = rpc->rc_so; + rpc->rc_soflags = so->so_proto->pr_flags; + + /* + * Some servers require that the client port be a reserved port + * number. We always allocate a reserved port, as this prevents + * filehandle disclosure through UDP port capture. + */ + if (saddr->sa_family == AF_INET) { +#ifdef __OpenBSD__ + struct mbuf *mopt; + int *ip; +#endif + +#ifdef __OpenBSD__ + MGET(mopt, M_TRYWAIT, MT_SOOPTS); + mopt->m_len = sizeof(int); + ip = mtod(mopt, int *); + *ip = IP_PORTRANGE_LOW; + + error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt); +#else + soarg = IP_PORTRANGE_LOW; + bzero(&opt, sizeof(struct sockopt)); + opt.sopt_dir = SOPT_SET; + opt.sopt_level = IPPROTO_IP; + opt.sopt_name = IP_PORTRANGE; + opt.sopt_val = &soarg; + opt.sopt_valsize = sizeof(soarg); + + error = sosetopt(so, &opt); +#endif + if (error) + goto bad; + +#if __OpenBSD__ + MGET(m, M_TRYWAIT, MT_SONAME); + sin = mtod(m, struct sockaddr_in *); + sin->sin_len = m->m_len = sizeof(struct sockaddr_in); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + sin->sin_port = htons(0); + error = sobind(so, m); + m_freem(m); +#else + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = INADDR_ANY; + sin.sin_port = htons(0); + error = sobind(so, (struct sockaddr *) & sin, td); +#endif + if (error) + goto bad; + +#if __OpenBSD__ + MGET(mopt, M_TRYWAIT, MT_SOOPTS); + mopt->m_len = sizeof(int); + ip = mtod(mopt, int *); + *ip = IP_PORTRANGE_DEFAULT; + error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt); +#else + soarg = IP_PORTRANGE_DEFAULT; + bzero(&opt, sizeof(struct sockopt)); + opt.sopt_dir = SOPT_SET; + opt.sopt_level = IPPROTO_IP; + opt.sopt_name = IP_PORTRANGE; + opt.sopt_val = &soarg; + opt.sopt_valsize = sizeof(soarg); + error = sosetopt(so, &opt); +#endif + if (error) + goto bad; + } + /* + * Protocols that do not require connections may be optionally left + * unconnected for servers that reply from a port other than + * NFS_PORT. + */ + if (rpc->rc_flag & RPCCLNT_NOCONN) { + if (rpc->rc_soflags & PR_CONNREQUIRED) { + error = ENOTCONN; + goto bad; + } + } else { + error = soconnect(so, saddr, td); + if (error) + goto bad; + + /* + * Wait for the connection to complete. Cribbed from the + * connect system call but with the wait timing out so that + * interruptible mounts don't hang here for a long time. + */ +#ifdef __OpenBSD__ + s = splsoftnet(); +#else + s = splnet(); +#endif + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + (void)tsleep((caddr_t) & so->so_timeo, PSOCK, + "rpc", 2 * hz); + + /* + * XXX needs to catch interrupt signals. something + * like this: if ((so->so_state & SS_ISCONNECTING) && + * so->so_error == 0 && rep && (error = + * nfs_sigintr(nmp, rep, rep->r_td)) != 0) { + * so->so_state &= ~SS_ISCONNECTING; splx(s); goto + * bad; } + */ + } + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + goto bad; + } + splx(s); + } + if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) { + so->so_rcv.sb_timeo = (5 * hz); + so->so_snd.sb_timeo = (5 * hz); + } else { + so->so_rcv.sb_timeo = 0; + so->so_snd.sb_timeo = 0; + } + + + if (rpc->rc_sotype == SOCK_DGRAM) { + sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR; + rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR; + } else if (rpc->rc_sotype == SOCK_SEQPACKET) { + sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2; + rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2; + } else { + if (rpc->rc_sotype != SOCK_STREAM) + panic("rpcclnt_connect() bad sotype"); + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { +#ifdef __OpenBSD__ + MGET(m, M_TRYWAIT, MT_SOOPTS); + *mtod(m, int32_t *) = 1; + m->m_len = sizeof(int32_t); + sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); +#else + soarg = 1; + + bzero(&opt, sizeof(struct sockopt)); + opt.sopt_dir = SOPT_SET; + opt.sopt_level = SOL_SOCKET; + opt.sopt_name = SO_KEEPALIVE; + opt.sopt_val = &soarg; + opt.sopt_valsize = sizeof(soarg); + sosetopt(so, &opt); +#endif + } + if (so->so_proto->pr_protocol == IPPROTO_TCP) { +#ifdef __OpenBSD__ + MGET(m, M_TRYWAIT, MT_SOOPTS); + *mtod(m, int32_t *) = 1; + m->m_len = sizeof(int32_t); + sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); +#else + soarg = 1; + + bzero(&opt, sizeof(struct sockopt)); + opt.sopt_dir = SOPT_SET; + opt.sopt_level = IPPROTO_TCP; + opt.sopt_name = TCP_NODELAY; + opt.sopt_val = &soarg; + opt.sopt_valsize = sizeof(soarg); + sosetopt(so, &opt); +#endif + } + sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR + + sizeof(u_int32_t)) * 2; + rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR + + sizeof(u_int32_t)) * 2; + } + error = soreserve(so, sndreserve, rcvreserve); + if (error) + goto bad; + so->so_rcv.sb_flags |= SB_NOINTR; + so->so_snd.sb_flags |= SB_NOINTR; + + /* Initialize other non-zero congestion variables */ + rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] = rpc->rc_srtt[3] = + rpc->rc_srtt[4] = (RPC_TIMEO << 3); + rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] = + rpc->rc_sdrtt[3] = rpc->rc_sdrtt[4] = 0; + rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */ + rpc->rc_sent = 0; + rpc->rc_timeouts = 0; + RPC_RETURN(0); + +bad: + rpcclnt_disconnect(rpc); + RPC_RETURN(error); +} + + +/* + * Reconnect routine: Called when a connection is broken on a reliable + * protocol. - clean up the old socket - nfs_connect() again - set + * R_MUSTRESEND for all outstanding requests on mount point If this fails the + * mount point is DEAD! nb: Must be called with the nfs_sndlock() set on the + * mount point. + */ +int +rpcclnt_reconnect(rep, td) + struct rpctask *rep; + RPC_EXEC_CTX td; +{ + struct rpctask *rp; + struct rpcclnt *rpc = rep->r_rpcclnt; + int error; + + rpcclnt_disconnect(rpc); + while ((error = rpcclnt_connect(rpc, td)) != 0) { + if (error == EINTR || error == ERESTART) + RPC_RETURN(EINTR); + tsleep(&lbolt, PSOCK, "rpccon", 0); + } + + /* + * Loop through outstanding request list and fix up all requests on + * old socket. + */ + for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL; + rp = TAILQ_NEXT(rp, r_chain)) { + if (rp->r_rpcclnt == rpc) + rp->r_flags |= R_MUSTRESEND; + } + RPC_RETURN(0); +} + +/* + * NFS disconnect. Clean up and unlink. + */ +void +rpcclnt_disconnect(rpc) + struct rpcclnt *rpc; +{ + struct socket *so; + + GIANT_REQUIRED; /* XXX until socket locking done */ + + if (rpc->rc_so) { + so = rpc->rc_so; + rpc->rc_so = NULL; + soshutdown(so, 2); + soclose(so); + } +} + +void +rpcclnt_safedisconnect(struct rpcclnt * rpc) +{ + struct rpctask dummytask; + + bzero(&dummytask, sizeof(dummytask)); + dummytask.r_rpcclnt = rpc; + rpcclnt_rcvlock(&dummytask); + rpcclnt_disconnect(rpc); + rpcclnt_rcvunlock(&rpc->rc_flag); +} + +/* + * This is the nfs send routine. For connection based socket types, it must + * be called with an nfs_sndlock() on the socket. "rep == NULL" indicates + * that it has been called from a server. For the client side: - return EINTR + * if the RPC is terminated, 0 otherwise - set R_MUSTRESEND if the send fails + * for any reason - do any cleanup required by recoverable socket errors + * (???) For the server side: - return EINTR or ERESTART if interrupted by a + * signal - return EPIPE if a connection is lost for connection based sockets + * (TCP...) - do any cleanup required by recoverable socket errors (???) + */ +static int +rpcclnt_send(so, nam, top, rep) + struct socket *so; +#ifdef __OpenBSD__ + struct mbuf *nam; +#else + struct sockaddr *nam; +#endif + struct mbuf *top; + struct rpctask *rep; +{ +#ifdef __OpenBSD__ + struct mbuf *sendnam; +#else + struct sockaddr *sendnam; + struct thread *td = curthread; +#endif + int error, soflags, flags; + + GIANT_REQUIRED; /* XXX until socket locking done */ + + if (rep) { + if (rep->r_flags & R_SOFTTERM) { + m_freem(top); + RPC_RETURN(EINTR); + } + if ((so = rep->r_rpcclnt->rc_so) == NULL) { + rep->r_flags |= R_MUSTRESEND; + m_freem(top); + RPC_RETURN(0); + } + rep->r_flags &= ~R_MUSTRESEND; + soflags = rep->r_rpcclnt->rc_soflags; + } else + soflags = so->so_proto->pr_flags; + + if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) + sendnam = NULL; + else + sendnam = nam; + + if (so->so_type == SOCK_SEQPACKET) + flags = MSG_EOR; + else + flags = 0; + + error = sosend(so, sendnam, NULL, top, NULL, flags, td); + + if (error) { + if (rep) { + log(LOG_INFO, "rpc send error %d for service %s\n", error, + rep->r_rpcclnt->rc_prog->prog_name); + /* + * Deal with errors for the client side. + */ + if (rep->r_flags & R_SOFTTERM) + error = EINTR; + else + rep->r_flags |= R_MUSTRESEND; + } else + log(LOG_INFO, "rpc service send error %d\n", error); + + /* + * Handle any recoverable (soft) socket errors here. + */ + if (error != EINTR && error != ERESTART && + error != EWOULDBLOCK && error != EPIPE) + error = 0; + } + RPC_RETURN(error); +} + +/* + * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all done by + * soreceive(), but for SOCK_STREAM we must deal with the Record Mark and + * consolidate the data into a new mbuf list. nb: Sometimes TCP passes the + * data up to soreceive() in long lists of small mbufs. For SOCK_STREAM we + * must be very careful to read an entire record once we have read any of it, + * even if the system call has been interrupted. + */ +static int +rpcclnt_receive(rep, aname, mp, td) + struct rpctask *rep; +#ifdef __OpenBSD__ + struct mbuf **aname; +#else + struct sockaddr **aname; +#endif + struct mbuf **mp; + RPC_EXEC_CTX td; +{ + struct socket *so; + struct uio auio; + struct iovec aio; + struct mbuf *m; + struct mbuf *control; + u_int32_t len; +#ifdef __OpenBSD__ + struct mbuf **getnam; +#else + struct sockaddr **getnam; +#endif + int error, sotype, rcvflg; + + GIANT_REQUIRED; /* XXX until socket locking done */ + + /* + * Set up arguments for soreceive() + */ + *mp = NULL; + *aname = NULL; + sotype = rep->r_rpcclnt->rc_sotype; + + /* + * For reliable protocols, lock against other senders/receivers in + * case a reconnect is necessary. For SOCK_STREAM, first get the + * Record Mark to find out how much more there is to get. We must + * lock the socket against other receivers until we have an entire + * rpc request/reply. + */ + if (sotype != SOCK_DGRAM) { + error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep); + if (error) + RPC_RETURN(error); +tryagain: + /* + * Check for fatal errors and resending request. + */ + /* + * Ugh: If a reconnect attempt just happened, rc_so would + * have changed. NULL indicates a failed attempt that has + * essentially shut down this mount point. + */ + if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { + rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); + RPC_RETURN(EINTR); + } + so = rep->r_rpcclnt->rc_so; + if (!so) { + error = rpcclnt_reconnect(rep, td); + if (error) { + rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); + RPC_RETURN(error); + } + goto tryagain; + } + while (rep->r_flags & R_MUSTRESEND) { + m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT); + rpcstats.rpcretries++; + error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep); + if (error) { + if (error == EINTR || error == ERESTART || + (error = rpcclnt_reconnect(rep, td)) != 0) { + rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); + RPC_RETURN(error); + } + goto tryagain; + } + } + rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); + if (sotype == SOCK_STREAM) { + aio.iov_base = (caddr_t) & len; + aio.iov_len = sizeof(u_int32_t); + auio.uio_iov = &aio; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_offset = 0; + auio.uio_resid = sizeof(u_int32_t); +#ifdef __OpenBSD__ + auio.uio_procp = td; +#else + auio.uio_td = td; +#endif + do { + rcvflg = MSG_WAITALL; + error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg); + if (error == EWOULDBLOCK && rep) { + if (rep->r_flags & R_SOFTTERM) + RPC_RETURN(EINTR); + } + } while (error == EWOULDBLOCK); + if (!error && auio.uio_resid > 0) { + log(LOG_INFO, + "short receive (%d/%d) from rpc server %s\n", + sizeof(u_int32_t) - auio.uio_resid, + sizeof(u_int32_t), + rep->r_rpcclnt->rc_prog->prog_name); + error = EPIPE; + } + if (error) + goto errout; + len = ntohl(len) & ~0x80000000; + /* + * This is SERIOUS! We are out of sync with the + * sender and forcing a disconnect/reconnect is all I + * can do. + */ + if (len > RPC_MAXPACKET) { + log(LOG_ERR, "%s (%d) from rpc server %s\n", + "impossible packet length", + len, + rep->r_rpcclnt->rc_prog->prog_name); + error = EFBIG; + goto errout; + } + auio.uio_resid = len; + do { + rcvflg = MSG_WAITALL; + error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg); + } while (error == EWOULDBLOCK || error == EINTR || + error == ERESTART); + if (!error && auio.uio_resid > 0) { + log(LOG_INFO, + "short receive (%d/%d) from rpc server %s\n", + len - auio.uio_resid, len, + rep->r_rpcclnt->rc_prog->prog_name); + error = EPIPE; + } + } else { + /* + * NB: Since uio_resid is big, MSG_WAITALL is ignored + * and soreceive() will return when it has either a + * control msg or a data msg. We have no use for + * control msg., but must grab them and then throw + * them away so we know what is going on. + */ + auio.uio_resid = len = 100000000; /* Anything Big */ +#ifdef __OpenBSD__ + auio.uio_procp = td; +#else + auio.uio_td = td; +#endif + do { + rcvflg = 0; + error = soreceive(so, NULL, &auio, mp, &control, &rcvflg); + if (control) + m_freem(control); + if (error == EWOULDBLOCK && rep) { + if (rep->r_flags & R_SOFTTERM) + RPC_RETURN(EINTR); + } + } while (error == EWOULDBLOCK || + (!error && *mp == NULL && control)); + if ((rcvflg & MSG_EOR) == 0) + printf("Egad!!\n"); + if (!error && *mp == NULL) + error = EPIPE; + len -= auio.uio_resid; + } +errout: + if (error && error != EINTR && error != ERESTART) { + m_freem(*mp); + *mp = (struct mbuf *) 0; + if (error != EPIPE) + log(LOG_INFO, + "receive error %d from rpc server %s\n", + error, + rep->r_rpcclnt->rc_prog->prog_name); + error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep); + if (!error) + error = rpcclnt_reconnect(rep, td); + if (!error) + goto tryagain; + } + } else { + if ((so = rep->r_rpcclnt->rc_so) == NULL) + RPC_RETURN(EACCES); + if (so->so_state & SS_ISCONNECTED) + getnam = NULL; + else + getnam = aname; + auio.uio_resid = len = 1000000; +#ifdef __OpenBSD__ + auio.uio_procp = td; +#else + auio.uio_td = td; +#endif + + do { + rcvflg = 0; + error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg); + RPCDEBUG("soreceivce returns %d", error); + if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) { + RPCDEBUG("wouldblock && softerm -> EINTR"); + RPC_RETURN(EINTR); + } + } while (error == EWOULDBLOCK); + len -= auio.uio_resid; + } + if (error) { + m_freem(*mp); + *mp = NULL; + } + + /* + * Search for any mbufs that are not a multiple of 4 bytes + * long or with m_data not longword aligned. These could + * cause pointer alignment problems, so copy them to well + * aligned mbufs. + */ + rpcclnt_realign(mp, 5 * RPCX_UNSIGNED); + RPC_RETURN(error); +} + + +/* + * Implement receipt of reply on a socket. We must search through the list of + * received datagrams matching them with outstanding requests using the xid, + * until ours is found. + */ +/* ARGSUSED */ +static int +rpcclnt_reply(myrep, td) + struct rpctask *myrep; + RPC_EXEC_CTX td; +{ + struct rpctask *rep; + struct rpcclnt *rpc = myrep->r_rpcclnt; + int32_t t1; + struct mbuf *mrep, *md; +#ifdef __OpenBSD__ + struct mbuf *nam; +#else + struct sockaddr *nam; +#endif + u_int32_t rxid, *tl; + caddr_t dpos, cp2; + int error; + + /* + * Loop around until we get our own reply + */ + for (;;) { + /* + * Lock against other receivers so that I don't get stuck in + * sbwait() after someone else has received my reply for me. + * Also necessary for connection based protocols to avoid + * race conditions during a reconnect. + */ + error = rpcclnt_rcvlock(myrep); + if (error) + RPC_RETURN(error); + /* Already received, bye bye */ + if (myrep->r_mrep != NULL) { + rpcclnt_rcvunlock(&rpc->rc_flag); + RPC_RETURN(0); + } + /* + * Get the next Rpc reply off the socket + */ + error = rpcclnt_receive(myrep, &nam, &mrep, td); + + rpcclnt_rcvunlock(&rpc->rc_flag); + + if (error) { + /* + * Ignore routing errors on connectionless + * protocols?? + */ + if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) { + rpc->rc_so->so_error = 0; + if (myrep->r_flags & R_GETONEREP) + RPC_RETURN(0); + RPCDEBUG("ingoring routing error on connectionless protocol."); + continue; + } + RPC_RETURN(error); + } +#ifdef __OpenBSD__ + if (nam) + m_freem(nam); +#else + if (nam) + FREE(nam, M_SONAME); +#endif + + /* + * Get the xid and check that it is an rpc reply + */ + md = mrep; + dpos = mtod(md, caddr_t); + rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); + rxid = *tl++; + if (*tl != rpc_reply) { + rpcstats.rpcinvalid++; + m_freem(mrep); + rpcmout: + if (myrep->r_flags & R_GETONEREP) + RPC_RETURN(0); + continue; + } + /* + * Loop through the request list to match up the reply Iff no + * match, just drop the datagram + */ + TAILQ_FOREACH(rep, &rpctask_q, r_chain) { + if (rep->r_mrep == NULL && rxid == rep->r_xid) { + /* Found it.. */ + rep->r_mrep = mrep; + rep->r_md = md; + rep->r_dpos = dpos; + + /* + * Update congestion window. Do the additive + * increase of one rpc/rtt. + */ + if (rpc->rc_cwnd <= rpc->rc_sent) { + rpc->rc_cwnd += + (RPC_CWNDSCALE * RPC_CWNDSCALE + + (rpc->rc_cwnd >> 1)) / rpc->rc_cwnd; + if (rpc->rc_cwnd > RPC_MAXCWND) + rpc->rc_cwnd = RPC_MAXCWND; + } + rep->r_flags &= ~R_SENT; + rpc->rc_sent -= RPC_CWNDSCALE; + /* + * Update rtt using a gain of 0.125 on the + * mean and a gain of 0.25 on the deviation. + */ + if (rep->r_flags & R_TIMING) { + /* + * Since the timer resolution of + * NFS_HZ is so course, it can often + * result in r_rtt == 0. Since r_rtt + * == N means that the actual rtt is + * between N+dt and N+2-dt ticks, add + * 1. + */ + t1 = rep->r_rtt + 1; + t1 -= (RPC_SRTT(rpc, rep) >> 3); + RPC_SRTT(rpc, rep) += t1; + if (t1 < 0) + t1 = -t1; + t1 -= (RPC_SDRTT(rpc, rep) >> 2); + RPC_SDRTT(rpc, rep) += t1; + } + rpc->rc_timeouts = 0; + break; + } + } + /* + * If not matched to a request, drop it. If it's mine, get + * out. + */ + if (rep == 0) { + rpcstats.rpcunexpected++; + RPCDEBUG("rpc reply not matched\n"); + m_freem(mrep); + } else if (rep == myrep) { + if (rep->r_mrep == NULL) + panic("rpcreply nil"); + RPC_RETURN(0); + } + if (myrep->r_flags & R_GETONEREP) + RPC_RETURN(0); + } +} + +/* XXX: ignores tryagain! */ +/* + * code from nfs_request - goes something like this - fill in task struct - + * links task into list - calls nfs_send() for first transmit - calls + * nfs_receive() to get reply - fills in reply (which should be initialized + * prior to calling), which is valid when 0 is returned and is NEVER freed in + * this function + * + * always frees the request header, but NEVER frees 'mrest' + * + */ +/* + * ruthtype + * pcclnt_setauth() should be used before calling this. EAUTH is returned if + * authentication fails. + */ +/* + * note that reply->result_* are invalid unless reply->type == + * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_* + * are invalid unless reply->type == RPC_MSGACCEPTED + */ +int +rpcclnt_request(rpc, mrest, procnum, td, cred, reply) + struct rpcclnt *rpc; + struct mbuf *mrest; + int procnum; + RPC_EXEC_CTX td; + struct ucred *cred; + struct rpc_reply *reply; +{ + struct mbuf *m, *mrep; + struct rpctask *task; + u_int32_t *tl; + struct mbuf *md, *mheadend; + caddr_t dpos, cp2; + int t1, s, error = 0, mrest_len; + u_int32_t xid; + +#ifdef __OpenBSD__ + task = pool_get(&rpctask_pool, PR_WAITOK); +#else + MALLOC(task, struct rpctask *, sizeof(struct rpctask), M_RPC, (M_WAITOK | M_ZERO)); +#endif + + task->r_rpcclnt = rpc; + task->r_procnum = procnum; + task->r_td = td; + + mrest_len = m_length(mrest, NULL); + + m = rpcclnt_buildheader(rpc, procnum, mrest, mrest_len, &xid, &mheadend, + cred); + + /* + * For stream protocols, insert a Sun RPC Record Mark. + */ + if (rpc->rc_sotype == SOCK_STREAM) { + M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT); + *mtod(m, u_int32_t *) = htonl(0x80000000 | + (m->m_pkthdr.len - RPCX_UNSIGNED)); + } + task->r_mreq = m; + task->r_xid = xid; + + if (rpc->rc_flag & RPCCLNT_SOFT) + task->r_retry = rpc->rc_retry; + else + task->r_retry = RPC_MAXREXMIT + 1; /* past clip limit */ + task->r_rtt = task->r_rexmit = 0; + + if (rpcclnt_proct(rpc, procnum) > 0) + task->r_flags = R_TIMING; + else + task->r_flags = 0; + task->r_mrep = NULL; + + /* + * Do the client side RPC. + */ + rpcstats.rpcrequests++; + + /* + * Chain request into list of outstanding requests. Be sure to put it + * LAST so timer finds oldest requests first. + */ + s = splsoftclock(); + TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain); + + /* + * If backing off another request or avoiding congestion, don't send + * this one now but let timer do it. If not timing a request, do it + * now. + */ + if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM || + (rpc->rc_flag & RPCCLNT_DUMBTIMR) || + rpc->rc_sent < rpc->rc_cwnd)) { + splx(s); + + if (rpc->rc_soflags & PR_CONNREQUIRED) + error = rpcclnt_sndlock(&rpc->rc_flag, task); + if (!error) { + error = rpcclnt_send(rpc->rc_so, rpc->rc_name, m_copym(m, 0, M_COPYALL, M_TRYWAIT), + task); + if (rpc->rc_soflags & PR_CONNREQUIRED) + rpcclnt_sndunlock(&rpc->rc_flag); + } + if (!error && (task->r_flags & R_MUSTRESEND) == 0) { + rpc->rc_sent += RPC_CWNDSCALE; + task->r_flags |= R_SENT; + } + } else { + splx(s); + task->r_rtt = -1; + } + + /* + * Wait for the reply from our send or the timer's. + */ + if (!error || error == EPIPE) + error = rpcclnt_reply(task, td); + + /* + * RPC done, unlink the request. + */ + s = splsoftclock(); + TAILQ_REMOVE(&rpctask_q, task, r_chain); + splx(s); + + /* + * Decrement the outstanding request count. + */ + if (task->r_flags & R_SENT) { + task->r_flags &= ~R_SENT; /* paranoia */ + rpc->rc_sent -= RPC_CWNDSCALE; + } + /* + * If there was a successful reply and a tprintf msg. tprintf a + * response. + */ + if (!error && (task->r_flags & R_TPRINTFMSG)) + rpcclnt_msg(task->r_td, rpc->rc_prog->prog_name, + "is alive again"); + + /* free request header (leaving mrest) */ + mheadend->m_next = NULL; + m_freem(task->r_mreq); + + /* initialize reply */ + reply->mrep = task->r_mrep; + reply->verf_md = NULL; + reply->result_md = NULL; + + mrep = task->r_mrep; + md = task->r_md; + dpos = task->r_dpos; + + /* task structure is no longer needed */ +#ifdef __OpenBSD__ + pool_put(&rpctask_pool, task); +#else + FREE(task, M_RPC); +#endif + + if (error) + goto rpcmout; + + /* + * break down the rpc header and check if ok + */ + + rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); + reply->stat.type = fxdr_unsigned(u_int32_t, *tl); + + if (reply->stat.type == RPC_MSGDENIED) { + rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); + reply->stat.status = fxdr_unsigned(u_int32_t, *tl); + + switch (reply->stat.status) { + case RPC_MISMATCH: + rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); + reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++); + reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl); + break; + case RPC_AUTHERR: + rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); + reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl); + break; + default: + error = EBADRPC; + goto rpcmout; + break; + } + RPC_RETURN(0); + } else if (reply->stat.type == RPC_MSGACCEPTED) { + rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); + + reply->verf_md = md; + reply->verf_dpos = dpos; + + reply->verf_type = fxdr_unsigned(u_int32_t, *tl++); + reply->verf_size = fxdr_unsigned(u_int32_t, *tl); + + if (reply->verf_size != 0) + rpcm_adv(rpcm_rndup(reply->verf_size)); + + rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); + reply->stat.status = fxdr_unsigned(u_int32_t, *tl); + + if (reply->stat.status == RPC_SUCCESS) { + if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) { + RPCDEBUG("where is the next mbuf?"); + RPCDEBUG("%d -> %d", (int)(dpos - mtod(md, caddr_t)), md->m_len); + if (md->m_next == NULL) { + error = EBADRPC; + goto rpcmout; + } else { + reply->result_md = md->m_next; + reply->result_dpos = mtod(reply->result_md, caddr_t); + } + } else { + reply->result_md = md; + reply->result_dpos = dpos; + } + } else if (reply->stat.status == RPC_PROGMISMATCH) { + rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); + reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++); + reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl); + } else if (reply->stat.status > 5) { + error = EBADRPC; + goto rpcmout; + } + RPC_RETURN(0); + } else { + error = EBADRPC; + } + +rpcmout: + RPCDEBUG("request returning error %d", error); + RPC_RETURN(error); +} + + +/* + * Nfs timer routine Scan the nfsreq list and retranmit any requests that + * have timed out To avoid retransmission attempts on STREAM sockets (in the + * future) make sure to set the r_retry field to 0 (implies nm_retry == 0). + */ +void +rpcclnt_timer(arg) + void *arg; +{ +#ifdef __OpenBSD__ + struct timeout *to = (struct timeout *) arg; +#endif + struct rpctask *rep; + struct mbuf *m; + struct socket *so; + struct rpcclnt *rpc; + int timeo; + int s, error; + +#ifndef __OpenBSD__ + struct thread *td = curthread; +#endif + +#if __OpenBSD__ + s = splsoftnet(); +#else + s = splnet(); +#endif + TAILQ_FOREACH(rep, &rpctask_q, r_chain) { + rpc = rep->r_rpcclnt; + if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) + continue; + if (rpcclnt_sigintr(rpc, rep, rep->r_td)) { + rep->r_flags |= R_SOFTTERM; + continue; + } + if (rep->r_rtt >= 0) { + rep->r_rtt++; + if (rpc->rc_flag & RPCCLNT_DUMBTIMR) + timeo = rpc->rc_timeo; + else + timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt, + rep->r_procnum)); + if (rpc->rc_timeouts > 0) + timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1]; + if (rep->r_rtt <= timeo) + continue; + if (rpc->rc_timeouts < 8) + rpc->rc_timeouts++; + } + /* + * Check for server not responding + */ + if ((rep->r_flags & R_TPRINTFMSG) == 0 && + rep->r_rexmit > rpc->rc_deadthresh) { + rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name, + "not responding"); + rep->r_flags |= R_TPRINTFMSG; + } + if (rep->r_rexmit >= rep->r_retry) { /* too many */ + rpcstats.rpctimeouts++; + rep->r_flags |= R_SOFTTERM; + continue; + } + if (rpc->rc_sotype != SOCK_DGRAM) { + if (++rep->r_rexmit > RPC_MAXREXMIT) + rep->r_rexmit = RPC_MAXREXMIT; + continue; + } + if ((so = rpc->rc_so) == NULL) + continue; + + /* + * If there is enough space and the window allows.. Resend it + * Set r_rtt to -1 in case we fail to send it now. + */ + rep->r_rtt = -1; + if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && + ((rpc->rc_flag & RPCCLNT_DUMBTIMR) || + (rep->r_flags & R_SENT) || + rpc->rc_sent < rpc->rc_cwnd) && + (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) { + if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0) + error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, + NULL, NULL, td); + else + error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, rpc->rc_name, NULL, td); + if (error) { + if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) + so->so_error = 0; + } else { + /* + * Iff first send, start timing else turn + * timing off, backoff timer and divide + * congestion window by 2. + */ + if (rep->r_flags & R_SENT) { + rep->r_flags &= ~R_TIMING; + if (++rep->r_rexmit > RPC_MAXREXMIT) + rep->r_rexmit = RPC_MAXREXMIT; + rpc->rc_cwnd >>= 1; + if (rpc->rc_cwnd < RPC_CWNDSCALE) + rpc->rc_cwnd = RPC_CWNDSCALE; + rpcstats.rpcretries++; + } else { + rep->r_flags |= R_SENT; + rpc->rc_sent += RPC_CWNDSCALE; + } + rep->r_rtt = 0; + } + } + } + splx(s); + +#ifdef __OpenBSD__ + timeout_add(rpcclnt_timer, to, rpcclnt_ticks); +#else + rpcclnt_timer_handle = timeout(rpcclnt_timer, NULL, rpcclnt_ticks); +#endif +} + +/* + * Test for a termination condition pending on the process. This is used for + * RPCCLNT_INT mounts. + */ +int +rpcclnt_sigintr(rpc, task, pr) + struct rpcclnt *rpc; + struct rpctask *task; + RPC_EXEC_CTX pr; +{ + struct proc *p; + + sigset_t tmpset; + + if (rpc == NULL) + return EFAULT; + + /* XXX deal with forced unmounts */ + + if (task && (task->r_flags & R_SOFTTERM)) + RPC_RETURN(EINTR); + + if (!(rpc->rc_flag & RPCCLNT_INT)) + RPC_RETURN(0); + + if (pr == NULL) + return (0); + +#ifdef __OpenBSD__ + p = pr; + if (p && p->p_siglist && + (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & + RPCINT_SIGMASK)) + RPC_RETURN(EINTR); +#else + p = pr->td_proc; + PROC_LOCK(p); + tmpset = p->p_siglist; + SIGSETNAND(tmpset, pr->td_sigmask); + mtx_lock(&p->p_sigacts->ps_mtx); + SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); + mtx_unlock(&p->p_sigacts->ps_mtx); + if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) { + PROC_UNLOCK(p); + RPC_RETURN(EINTR); + } + PROC_UNLOCK(p); +#endif + RPC_RETURN(0); +} + +/* + * Lock a socket against others. Necessary for STREAM sockets to ensure you + * get an entire rpc request/reply and also to avoid race conditions between + * the processes with nfs requests in progress when a reconnect is necessary. + */ +static int +rpcclnt_sndlock(flagp, task) + int *flagp; + struct rpctask *task; +{ + RPC_EXEC_CTX p; + int slpflag = 0, slptimeo = 0; + + if (task) { + p = task->r_td; + if (task->r_rpcclnt->rc_flag & RPCCLNT_INT) + slpflag = PCATCH; + } else + p = NULL; + while (*flagp & RPCCLNT_SNDLOCK) { + if (rpcclnt_sigintr(task->r_rpcclnt, task, p)) + RPC_RETURN(EINTR); + *flagp |= RPCCLNT_WANTSND; + (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcsndlck", + slptimeo); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + *flagp |= RPCCLNT_SNDLOCK; + RPC_RETURN(0); +} + +/* + * Unlock the stream socket for others. + */ +static void +rpcclnt_sndunlock(flagp) + int *flagp; +{ + + if ((*flagp & RPCCLNT_SNDLOCK) == 0) + panic("rpc sndunlock"); + *flagp &= ~RPCCLNT_SNDLOCK; + if (*flagp & RPCCLNT_WANTSND) { + *flagp &= ~RPCCLNT_WANTSND; + wakeup((caddr_t) flagp); + } +} + +static int +rpcclnt_rcvlock(task) + struct rpctask *task; +{ + int *flagp = &task->r_rpcclnt->rc_flag; + int slpflag, slptimeo = 0; + + if (*flagp & RPCCLNT_INT) + slpflag = PCATCH; + else + slpflag = 0; + while (*flagp & RPCCLNT_RCVLOCK) { + if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td)) + RPC_RETURN(EINTR); + *flagp |= RPCCLNT_WANTRCV; + (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk", + slptimeo); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + *flagp |= RPCCLNT_RCVLOCK; + RPC_RETURN(0); +} + +/* + * Unlock the stream socket for others. + */ +static void +rpcclnt_rcvunlock(flagp) + int *flagp; +{ + + if ((*flagp & RPCCLNT_RCVLOCK) == 0) + panic("nfs rcvunlock"); + *flagp &= ~RPCCLNT_RCVLOCK; + if (*flagp & RPCCLNT_WANTRCV) { + *flagp &= ~RPCCLNT_WANTRCV; + wakeup((caddr_t) flagp); + } +} + +#if 0 +/* + * Check for badly aligned mbuf data areas and realign data in an mbuf list + * by copying the data areas up, as required. + */ +void +rpcclnt_realign(m, hsiz) + struct mbuf *m; + int hsiz; +{ + struct mbuf *m2; + int siz, mlen, olen; + caddr_t tcp, fcp; + struct mbuf *mnew; + + while (m) { + /* + * This never happens for UDP, rarely happens for TCP but + * frequently happens for iso transport. + */ + if ((m->m_len & 0x3) || (mtod(m, long)&0x3)) { + olen = m->m_len; + fcp = mtod(m, caddr_t); + if ((long)fcp & 0x3) { + if (m->m_flags & M_PKTHDR) + m_tag_delete_chain(m, NULL); + m->m_flags &= ~M_PKTHDR; + if (m->m_flags & M_EXT) + m->m_data = m->m_ext.ext_buf + + ((m->m_ext.ext_size - olen) & ~0x3); + else + m->m_data = m->m_dat; + } + m->m_len = 0; + tcp = mtod(m, caddr_t); + mnew = m; + m2 = m->m_next; + + /* + * If possible, only put the first invariant part of + * the RPC header in the first mbuf. + */ + mlen = M_TRAILINGSPACE(m); + if (olen <= hsiz && mlen > hsiz) + mlen = hsiz; + + /* Loop through the mbuf list consolidating data. */ + while (m) { + while (olen > 0) { + if (mlen == 0) { + if (m2->m_flags & M_PKTHDR) + m_tag_delete_chain(m2, NULL); + m2->m_flags &= ~M_PKTHDR; + if (m2->m_flags & M_EXT) + m2->m_data = m2->m_ext.ext_buf; + else + m2->m_data = m2->m_dat; + m2->m_len = 0; + mlen = M_TRAILINGSPACE(m2); + tcp = mtod(m2, caddr_t); + mnew = m2; + m2 = m2->m_next; + } + siz = min(mlen, olen); + if (tcp != fcp) + bcopy(fcp, tcp, siz); + mnew->m_len += siz; + mlen -= siz; + olen -= siz; + tcp += siz; + fcp += siz; + } + m = m->m_next; + if (m) { + olen = m->m_len; + fcp = mtod(m, caddr_t); + } + } + + /* + * Finally, set m_len == 0 for any trailing mbufs + * that have been copied out of. + */ + while (m2) { + m2->m_len = 0; + m2 = m2->m_next; + } + return; + } + m = m->m_next; + } +} +#else +static void +rpcclnt_realign(struct mbuf **pm, int hsiz) +{ + struct mbuf *m; + struct mbuf *n = NULL; + int off = 0; + + RPCDEBUG("in rpcclnt_realign()"); + + while ((m = *pm) != NULL) { + if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { + MGET(n, M_TRYWAIT, MT_DATA); + if (m->m_len >= MINCLSIZE) { + MCLGET(n, M_TRYWAIT); + } + n->m_len = 0; + break; + } + pm = &m->m_next; + } + + /* + * If n is non-NULL, loop on m copying data, then replace the + * portion of the chain that had to be realigned. + */ + if (n != NULL) { + while (m) { + m_copyback(n, off, m->m_len, mtod(m, caddr_t)); + off += m->m_len; + m = m->m_next; + } + m_freem(*pm); + *pm = n; + } + + RPCDEBUG("leave rpcclnt_realign()"); +} +#endif + +static int +rpcclnt_msg(p, server, msg) + RPC_EXEC_CTX p; + const char *server; + char *msg; +{ +#ifdef __OpenBSD__ + tpr_t tpr; + struct proc *pr = p; + + if (p) + tpr = tprintf_open(p); + else + tpr = NULL; + tprintf(tpr, "rpc server %s: %s\n", server, msg); + tprintf_close(tpr); + RPC_RETURN(0); +#else + tprintf(p ? p->td_proc : NULL, LOG_INFO, + "nfs server %s: %s\n", server, msg); + RPC_RETURN(0); +#endif +} + +/* + * Build the RPC header and fill in the authorization info. The authorization + * string argument is only used when the credentials come from outside of the + * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the + * kernel) Returns the head of the mbuf list. + */ +static struct mbuf * +rpcclnt_buildheader(rc, procid, mrest, mrest_len, xidp, mheadend, cred) + struct rpcclnt *rc; + int procid; + struct mbuf *mrest; + u_int32_t mrest_len; + int *xidp; + struct mbuf **mheadend; + struct ucred * cred; +{ + /* register */ struct mbuf *mb; + register u_int32_t *tl; + /* register */ caddr_t bpos; + struct mbuf *mreq, *mb2; + int error; + + MGETHDR(mb, M_TRYWAIT, MT_DATA); + if (6 * RPCX_UNSIGNED >= MINCLSIZE) { + MCLGET(mb, M_TRYWAIT); + } else if (6 * RPCX_UNSIGNED < MHLEN) { + MH_ALIGN(mb, 6 * RPCX_UNSIGNED); + } else { + RPCDEBUG("mbuf too small"); + panic("cheap bailout"); + } + mb->m_len = 0; + mreq = mb; + bpos = mtod(mb, caddr_t); + + /* + * First the RPC header. + */ + rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED); + + /* Get a new (non-zero) xid */ + if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) { + rpcclnt_xid = arc4random(); + rpcclnt_xid_touched = 1; + } else { + while ((*xidp = arc4random() % 256) == 0); + rpcclnt_xid += *xidp; + } + + /* XXX: funky... */ + *tl++ = *xidp = txdr_unsigned(rpcclnt_xid); + + *tl++ = rpc_call; + *tl++ = rpc_vers; + *tl++ = txdr_unsigned(rc->rc_prog->prog_id); + *tl++ = txdr_unsigned(rc->rc_prog->prog_version); + *tl++ = txdr_unsigned(procid); + + if ((error = rpcauth_buildheader(rc->rc_auth, cred, &mb, &bpos))) { + RPCDEBUG("rpcauth_buildheader failed %d", error); + return NULL; + } + + mb->m_next = mrest; + *mheadend = mb; + mreq->m_pkthdr.len = m_length(mreq, NULL); + mreq->m_pkthdr.rcvif = NULL; + return (mreq); +} + +/* + * Help break down an mbuf chain by setting the first siz bytes contiguous + * pointed to by returned val. This is used by the macros rpcm_dissect and + * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2) + */ +static int +rpcm_disct(mdp, dposp, siz, left, cp2) + struct mbuf **mdp; + caddr_t *dposp; + int siz; + int left; + caddr_t *cp2; +{ + struct mbuf *mp, *mp2; + int siz2, xfer; + caddr_t p; + + mp = *mdp; + while (left == 0) { + *mdp = mp = mp->m_next; + if (mp == NULL) + RPC_RETURN(EBADRPC); + left = mp->m_len; + *dposp = mtod(mp, caddr_t); + } + if (left >= siz) { + *cp2 = *dposp; + *dposp += siz; + } else if (mp->m_next == NULL) { + RPC_RETURN(EBADRPC); + } else if (siz > MHLEN) { + panic("rpc S too big"); + } else { + MGET(mp2, M_TRYWAIT, MT_DATA); + mp2->m_next = mp->m_next; + mp->m_next = mp2; + mp->m_len -= left; + mp = mp2; + *cp2 = p = mtod(mp, caddr_t); + bcopy(*dposp, p, left); /* Copy what was left */ + siz2 = siz - left; + p += left; + mp2 = mp->m_next; + /* Loop around copying up the siz2 bytes */ + while (siz2 > 0) { + if (mp2 == NULL) + RPC_RETURN(EBADRPC); + xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; + if (xfer > 0) { + bcopy(mtod(mp2, caddr_t), p, xfer); + RPCMADV(mp2, xfer); + mp2->m_len -= xfer; + p += xfer; + siz2 -= xfer; + } + if (siz2 > 0) + mp2 = mp2->m_next; + } + mp->m_len = siz; + *mdp = mp2; + *dposp = mtod(mp2, caddr_t); + } + RPC_RETURN(0); +} + + + +static u_int32_t +rpcclnt_proct(rpc, procid) + struct rpcclnt *rpc; + u_int32_t procid; +{ + if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL && + procid < rpc->rc_proctlen) { + return rpc->rc_proct[procid]; + } + return (0); +} + +static int +rpc_adv(mdp, dposp, offs, left) + struct mbuf **mdp; + caddr_t *dposp; + int offs; + int left; +{ + struct mbuf *m; + int s; + + m = *mdp; + s = left; + while (s < offs) { + offs -= s; + m = m->m_next; + if (m == NULL) + RPC_RETURN(EBADRPC); + s = m->m_len; + } + *mdp = m; + *dposp = mtod(m, caddr_t) + offs; + RPC_RETURN(0); +} + +int +rpcclnt_cancelreqs(rpc) + struct rpcclnt *rpc; +{ + struct rpctask *task; + int i, s; + + s = splnet(); + TAILQ_FOREACH(task, &rpctask_q, r_chain) { + if (rpc != task->r_rpcclnt || task->r_mrep != NULL || + (task->r_flags & R_SOFTTERM)) + continue; + rpcclnt_softterm(task); + } + splx(s); + + for (i = 0; i < 30; i++) { + s = splnet(); + TAILQ_FOREACH(task, &rpctask_q, r_chain) { + if (rpc == task->r_rpcclnt) + break; + } + splx(s); + if (task == NULL) + return (0); + tsleep(&lbolt, PSOCK, "nfscancel", 0); + } + return (EBUSY); +} + +static void +rpcclnt_softterm(struct rpctask * task) +{ + task->r_flags |= R_SOFTTERM; + if (task->r_flags & R_SENT) { + task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE; + task->r_flags &= ~R_SENT; + } +} + + +#ifndef __OpenBSD__ +/* called by rpcclnt_get() */ +void +rpcclnt_create(struct rpcclnt ** rpc) +{ + MALLOC(*rpc, struct rpcclnt *, sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO); +} + +/* called by rpcclnt_put() */ +void +rpcclnt_destroy(struct rpcclnt * rpc) +{ + if (rpc != NULL) { + FREE(rpc, M_RPC); + } else { + RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)"); + } +} +#endif /* !__OpenBSD__ */ + + +/* XXX: add a lock around the auth structure in struct rpcclnt and make this + * call safe for calling durring a connection */ +static int +rpcauth_buildheader(struct rpc_auth * auth, struct ucred * cred, struct mbuf ** mhdr, caddr_t * bp) +{ + size_t authsiz, verfsiz; + uint32_t mlen, grpsiz; + register struct mbuf *mb, *mb2; + caddr_t bpos; + register u_int32_t *tl; + register int i; + + if (auth == NULL || mhdr == NULL) + return EFAULT; + + switch (auth->auth_type) { + case RPCAUTH_NULL: + authsiz = 0; + verfsiz = 0; + break; + case RPCAUTH_UNIX: + authsiz = (5 + cred->cr_ngroups) * RPCX_UNSIGNED; + verfsiz = 0; + break; + default: + return EPROTONOSUPPORT; + break; + }; + + mlen = rpcm_rndup(authsiz) + rpcm_rndup(verfsiz) + 4 * RPCX_UNSIGNED; + + mb = *mhdr; + bpos = *bp; + + rpcm_build(tl, u_int32_t *, mlen); + + *bp = bpos; + *mhdr = mb; + + *tl++ = txdr_unsigned(auth->auth_type); + *tl++ = txdr_unsigned(authsiz); + switch (auth->auth_type) { + case RPCAUTH_UNIX: + *tl++ = 0; + *tl++ = 0; + + *tl++ = txdr_unsigned(cred->cr_uid); + *tl++ = txdr_unsigned(cred->cr_groups[0]); + grpsiz = cred->cr_ngroups; + *tl++ = txdr_unsigned(grpsiz); + /* XXX: groups[0] is already sent... */ + for (i = 0 ; i < grpsiz ; i++) { + *tl++ = txdr_unsigned(cred->cr_groups[i]); + } + + /* null verification header */ + *tl++ = txdr_unsigned(RPCAUTH_NULL); + *tl++ = 0; + break; + case RPCAUTH_NULL: + /* just a null verf header */ + *tl++ = txdr_unsigned(RPCAUTH_NULL); + *tl = 0; + break; + default: + panic("inconsistent rpc auth type"); + break; + } + + return 0; +} diff --git a/sys/rpc/rpcclnt.h b/sys/rpc/rpcclnt.h new file mode 100644 index 0000000..48681ab --- /dev/null +++ b/sys/rpc/rpcclnt.h @@ -0,0 +1,358 @@ +/* $FreeBSD$ */ +/* $OpenBSD: nfsmount.h,v 1.11 2002/03/14 01:27:13 millert Exp $ */ +/* $NetBSD: nfsmount.h,v 1.10 1996/02/18 11:54:03 fvdl Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsmount.h 8.3 (Berkeley) 3/30/95 + */ + + +#ifndef _RPCCLNT_H_ +#define _RPCCLNT_H_ + +#include <sys/types.h> +#include <sys/malloc.h> +#include <sys/lock.h> +#include <sys/mutex.h> + +#ifdef __OpenBSD__ + #define RPC_EXEC_CTX struct proc * +#else + #define RPC_EXEC_CTX struct thread * +#endif + + +#ifdef RPCCLNT_DEBUG +#define RPCDEBUG(args...) do{ \ + if(rpcdebugon != 0){ \ + printf("%s(): ", __FUNCTION__);\ + printf(args); \ + printf("\n"); \ + }}while(0) +#else +#define RPCDEBUG(args...) +#endif + +/* from nfs/nfs.h */ +#define RPC_TICKINTVL 5 + + +/* from nfs/nfsproto.h */ +#define RPC_MAXDATA 32768 +#define RPC_MAXPKTHDR 404 +#define RPC_MAXPACKET (RPC_MAXPKTHDR + RPC_MAXDATA) + +#define RPCX_UNSIGNED 4 + +/* defines for rpcclnt's rc_flags + XXX these flags came from the NFSMNT_* flags in OpenBSD's sys/mount.h */ +#define RPCCLNT_SOFT 0x001 /* soft mount (hard is details) */ +#define RPCCLNT_INT 0x002 /* allow interrupts on hard mounts */ +#define RPCCLNT_NOCONN 0x004 /* dont connect the socket (udp) */ +#define RPCCLNT_DUMBTIMR 0x010 + +#define RPCCLNT_SNDLOCK 0x100 +#define RPCCLNT_WANTSND 0x200 +#define RPCCLNT_RCVLOCK 0x400 +#define RPCCLNT_WANTRCV 0x800 + + +/* Flag values for r_flags */ +#define R_TIMING 0x01 /* timing request (in mntp) */ +#define R_SENT 0x02 /* request has been sent */ +#define R_SOFTTERM 0x04 /* soft mnt, too many retries */ +#define R_INTR 0x08 /* intr mnt, signal pending */ +#define R_SOCKERR 0x10 /* Fatal error on socket */ +#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ +#define R_MUSTRESEND 0x40 /* Must resend request */ +#define R_GETONEREP 0x80 /* Probe for one reply only */ + + +#define RPC_HZ (hz / rpcclnt_ticks) /* Ticks/sec */ +#define RPC_TIMEO (1 * RPC_HZ) /* Default timeout = 1 second */ + +#define RPC_MAXREXMIT 100 /* Stop counting after this many */ + + +#define RPCIGNORE_SOERROR(s, e) \ + ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ + ((s) & PR_CONNREQUIRED) == 0) + +#define RPCINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ + sigmask(SIGHUP)|sigmask(SIGQUIT)) + +#define RPCMADV(m, s) (m)->m_data += (s) + +#define RPCAUTH_ROOTCREDS NULL + +#define RPCCLNTINT_SIGMASK(set) \ + (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \ + SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \ + SIGISMEMBER(set, SIGQUIT)) + + +#define fxdr_unsigned(t, v) ((t)ntohl((int32_t)(v))) +#define txdr_unsigned(v) (htonl((int32_t)(v))) + + +/* global rpcstats + * XXX should be per rpcclnt */ +struct rpcstats { + int rpcretries; + int rpcrequests; + int rpctimeouts; + int rpcunexpected; + int rpcinvalid; +}; + +struct rpc_program { + u_int32_t prog_id; + u_int32_t prog_version; + char * prog_name; +}; + +struct rpc_auth { + unsigned int auth_type; +}; + +struct rpctask { + TAILQ_ENTRY(rpctask) r_chain; + struct mbuf *r_mreq; + struct mbuf *r_mrep; + struct mbuf *r_md; + caddr_t r_dpos; + + struct rpcclnt *r_rpcclnt; + + u_int32_t r_xid; + int r_flags; /* flags on request, see below */ + int r_retry; /* max retransmission count */ + int r_rexmit; /* current retrans count */ + int r_timer; /* tick counter on reply */ + int r_procnum; /* NFS procedure number */ + int r_rtt; /* RTT for rpc */ + RPC_EXEC_CTX r_td; +}; + +struct rpc_reply { + struct { + u_int32_t type; + u_int32_t status; + + /* used only when reply == RPC_MSGDENIED and + * status == RPC_AUTHERR */ + u_int32_t autherr; + + /* rpc mismatch info if reply == RPC_MSGDENIED and + * status == RPC_MISMATCH */ + struct { + u_int32_t low; + u_int32_t high; + } mismatch_info; + } stat; + + /* head of the mbuf chain */ + struct mbuf * mrep; + + /* mbuf and position of the verification opaque data + * note that this is only valid when stat.reply == RPC_MSGACCEPTED */ + u_int32_t verf_type; + u_int32_t verf_size; + struct mbuf * verf_md; + caddr_t verf_dpos; + + /* mbuf and postion of the result of the rpc request */ + struct mbuf * result_md; + caddr_t result_dpos; +}; + + +/* + * RPC Client connection context. + * One allocated on every NFS mount. + * Holds RPC specific information for mount. + */ +/* XXX: please note that all pointer type variables are just set (not copied), + * so it is up to the user to free these values */ +struct rpcclnt { + int rc_flag; /* For RPCCLNT_* flags */ + + int rc_wsize; /* Max size of the request data */ + int rc_rsize; /* Max size of the response data */ + struct sockaddr *rc_name; + struct socket *rc_so; /* Rpc socket */ + int rc_sotype; /* Type of socket */ + int rc_soproto; /* and protocol */ + int rc_soflags; /* pr_flags for socket protocol */ + + int rc_timeo; /* Init timer for NFSMNT_DUMBTIMR */ + int rc_retry; /* Max retries */ + int rc_srtt[4]; /* Timers for rpcs */ + int rc_sdrtt[4]; + int rc_sent; /* Request send count */ + int rc_cwnd; /* Request send window */ + int rc_timeouts; /* Request timeouts */ + +/* XXX: this is not being set!!!! */ + int rc_deadthresh; /* Threshold of timeouts-->dead server*/ + + + /* authentication: */ + /* currently can be RPCAUTH_NULL, RPCAUTH_KERBV4, RPCAUTH_UNIX */ + /* should be kept in XDR form */ + int rc_authtype; /* Authenticator type */ + + +#if 0 + /* RPCAUTH_KERB4 */ + int rc_authlen; /* and length */ + char *rc_authstr; /* Authenticator string */ + int rc_verflen; + char *rc_verfstr; /* and the verifier */ +#endif + + /* RPCAUTH_UNIX*/ + struct rpc_auth * rc_auth; /* authentication */ + +#if 0 + /* stored in XDR form (network byte order) */ + unsigned int rc_progid; /* program id */ + unsigned int rc_progvers; /* program version */ + + /* name of server for log messages */ + const char *rc_servername; /* for printing error messages */ +#else + struct rpc_program * rc_prog; +#endif + + /* XXX: this should be removed */ + int rc_proctlen; /* if == 0 then rc_proct == NULL */ + int * rc_proct; +}; + +#ifdef __OpenBSD__ +extern struct pool rpcreply_pool; +extern struct pool rpcclnt_pool; +#else +/* MALLOC_DECLARE(M_RPC); */ +#endif +extern int rpcdebugon; + + +#ifdef __OpenBSD__ +#define rpcclnt_get(X) \ + do { \ + (X) = pool_get(&rpcclnt_pool, PR_WAITOK); \ + bzero((X), sizeof(struct rpcclnt)); \ + }while(0) + +#define rpcclnt_put(X) \ + do { \ + if ((X) != NULL){ \ + pool_put(&rpcclnt_pool, (X)); \ + }}while(0) + +#else /* !__OpenBSD__ */ + +/* usage count for module (un)loading */ +extern unsigned int rpcclnt_usage; +extern struct mtx rpcclnt_usage_mutex; + +void rpcclnt_create(struct rpcclnt ** rpc); +void rpcclnt_destroy(struct rpcclnt * rpc); + +#define rpcclnt_get(X) rpcclnt_create(&(X)) +#define rpcclnt_put(X) rpcclnt_destroy(X) + +#ifdef RPCCLNT_TEST +struct rpcclnt_test_args { + int nothing; +}; +int rpcclnt_test(struct thread *, struct rpcclnt_test_args *); + +#define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0) +#endif /* RPCCLNT_TEST */ + +#endif /* !__OpenBSD__ */ + +void rpcclnt_init(void); +void rpcclnt_uninit(void); +#if 0 +int rpcclnt_setup(struct rpcclnt *, int, struct sockaddr *, int, int, int, int, const char *, int, int); +#endif + +int rpcclnt_setup(struct rpcclnt *, struct rpc_program *, struct sockaddr *, int, int, struct rpc_auth *, int, int, int); + + +int rpcclnt_connect(struct rpcclnt *, RPC_EXEC_CTX td); +int rpcclnt_reconnect(struct rpctask *, RPC_EXEC_CTX td); +void rpcclnt_disconnect(struct rpcclnt *); +void rpcclnt_safedisconnect(struct rpcclnt *); + +void rpcclnt_setauth(struct rpcclnt *, u_int32_t, u_int32_t, char *, u_int32_t, char *, struct ucred *); +int rpcclnt_request(struct rpcclnt *, struct mbuf *, int, RPC_EXEC_CTX, struct ucred *, struct rpc_reply *); +int rpcclnt_err(struct rpc_reply *); + +int rpcclnt_cancelreqs(struct rpcclnt *); +int rpcclnt_sigintr(struct rpcclnt *, struct rpctask *, RPC_EXEC_CTX); + + +#endif /* _RPCCLNT_H_ */ diff --git a/sys/rpc/rpcm_subs.h b/sys/rpc/rpcm_subs.h new file mode 100644 index 0000000..09e452a --- /dev/null +++ b/sys/rpc/rpcm_subs.h @@ -0,0 +1,133 @@ +/* $FreeBSD$ */ +/* $OpenBSD: nfsm_subs.h,v 1.11 2000/01/05 20:50:52 millert Exp $ */ +/* $NetBSD: nfsm_subs.h,v 1.10 1996/03/20 21:59:56 fvdl Exp $ */ + +/* + * copyright (c) 2003 + * the regents of the university of michigan + * all rights reserved + * + * permission is granted to use, copy, create derivative works and redistribute + * this software and such derivative works for any purpose, so long as the name + * of the university of michigan is not used in any advertising or publicity + * pertaining to the use or distribution of this software without specific, + * written prior authorization. if the above copyright notice or any other + * identification of the university of michigan is included in any copy of any + * portion of this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the university + * of michigan as to its fitness for any purpose, and without warranty by the + * university of michigan of any kind, either express or implied, including + * without limitation the implied warranties of merchantability and fitness for + * a particular purpose. the regents of the university of michigan shall not be + * liable for any damages, including special, indirect, incidental, or + * consequential damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been or is hereafter + * advised of the possibility of such damages. + */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsm_subs.h 8.2 (Berkeley) 3/30/95 + */ + + +#ifndef _RPC_RPCM_SUBS_H_ +#define _RPC_RPCM_SUBS_H_ + + +/* + * Now for the macros that do the simple stuff and call the functions + * for the hard stuff. + * These macros use several vars. declared in rpcm_reqhead and these + * vars. must not be used elsewhere unless you are careful not to corrupt + * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries + * that may be used so long as the value is not expected to retained + * after a macro. + * I know, this is kind of dorkey, but it makes the actual op functions + * fairly clean and deals with the mess caused by the xdr discriminating + * unions. + */ + +#define rpcm_build(a,c,s) \ + { if ((s) > M_TRAILINGSPACE(mb)) { \ + MGET(mb2, M_WAIT, MT_DATA); \ + if ((s) > MLEN) \ + panic("build > MLEN"); \ + mb->m_next = mb2; \ + mb = mb2; \ + mb->m_len = 0; \ + bpos = mtod(mb, caddr_t); \ + } \ + (a) = (c)(bpos); \ + mb->m_len += (s); \ + bpos += (s); } + +#define rpcm_dissect(a, c, s) \ + { t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + (a) = (c)(dpos); \ + dpos += (s); \ + } else if ((t1 = rpcm_disct(&md, &dpos, (s), t1, &cp2)) != 0){ \ + error = t1; \ + goto rpcmout; \ + } else { \ + (a) = (c)cp2; \ + } } + +#if 0 +#define rpcm_mtouio(p,s) \ + if ((s) > 0 && \ + (t1 = rpcm_mbuftouio(&md,(p),(s),&dpos)) != 0) { \ + error = t1; \ + goto rpcmout; \ + } +#endif + +#define rpcm_rndup(a) (((a)+3)&(~0x3)) + +#define rpcm_adv(s) \ + { t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + dpos += (s); \ + } else if ((t1 = rpc_adv(&md, &dpos, (s), t1)) != 0) { \ + error = t1; \ + goto rpcmout; \ + } } + +#define RPCMADV(m, s) (m)->m_data += (s) + +#endif /* _RPC_RPCM_SUBS_H_ */ diff --git a/sys/rpc/rpcv2.h b/sys/rpc/rpcv2.h new file mode 100644 index 0000000..316ebd2 --- /dev/null +++ b/sys/rpc/rpcv2.h @@ -0,0 +1,101 @@ +/* $FreeBSD$ */ +/* $OpenBSD: rpcv2.h,v 1.5 2002/06/11 15:45:44 hin Exp $ */ +/* $NetBSD: rpcv2.h,v 1.8 1996/02/18 11:54:11 fvdl Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)rpcv2.h 8.2 (Berkeley) 3/30/95 + */ + + +#ifndef _NFS_RPCV2_H_ +#define _NFS_RPCV2_H_ + +/* + * Definitions for Sun RPC Version 2, from + * "RPC: Remote Procedure Call Protocol Specification" RFC1057 + */ + +/* Version # */ +#define RPC_VER2 2 + +/* Authentication */ +#define RPCAUTH_NULL 0 +#define RPCAUTH_UNIX 1 +#define RPCAUTH_SHORT 2 +#define RPCAUTH_KERB4 4 +#define RPCAUTH_MAXSIZ 400 +#define RPCVERF_MAXSIZ 12 /* For Kerb, can actually be 400 */ +#define RPCAUTH_UNIXGIDS 16 + +/* Rpc Constants */ + +/* msg type */ +#define RPC_CALL 0 +#define RPC_REPLY 1 + +/* reply stat */ +#define RPC_MSGACCEPTED 0 +#define RPC_MSGDENIED 1 + +/* accepted stat: */ +#define RPC_SUCCESS 0 +#define RPC_PROGUNAVAIL 1 +#define RPC_PROGMISMATCH 2 +#define RPC_PROCUNAVAIL 3 +#define RPC_GARBAGE 4 /* I like this one */ +#define RPC_SYSTEMERR 5 + +/* reject stat */ +#define RPC_MISMATCH 0 +#define RPC_AUTHERR 1 + +/* Authentication failures */ +#define AUTH_OK 0 +#define AUTH_BADCRED 1 +#define AUTH_REJECTCRED 2 +#define AUTH_BADVERF 3 +#define AUTH_REJECTVERF 4 +#define AUTH_TOOWEAK 5 /* Give em wheaties */ + + +/* Sizes of rpc header parts */ +#define RPC_SIZ 24 +#define RPC_REPLYSIZ 28 + +typedef u_char NFSKERBKEY_T[2]; + +#endif |