summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorrmacklem <rmacklem@FreeBSD.org>2013-08-14 21:11:26 +0000
committerrmacklem <rmacklem@FreeBSD.org>2013-08-14 21:11:26 +0000
commit802b1728d8a3d2a016628022a7631f365f00e337 (patch)
treea20eea4fffded2fee18dcd50c288a819ea803066 /sys
parent79360f25fc862bef26d62d9fd2ec20d0de41756e (diff)
downloadFreeBSD-src-802b1728d8a3d2a016628022a7631f365f00e337.zip
FreeBSD-src-802b1728d8a3d2a016628022a7631f365f00e337.tar.gz
Fix several performance related issues in the new NFS server's
DRC for NFS over TCP. - Increase the size of the hash tables. - Create a separate mutex for each hash list of the TCP hash table. - Single thread the code that deletes stale cache entries. - Add a tunable called vfs.nfsd.tcphighwater, which can be increased to allow the cache to grow larger, avoiding the overhead of frequent scans to delete stale cache entries. (The default value will result in frequent scans to delete stale cache entries, analagous to what the pre-patched code does.) - Add a tunable called vfs.nfsd.cachetcp that can be used to disable DRC caching for NFS over TCP, since the old NFS server didn't DRC cache TCP. It also adjusts the size of nfsrc_floodlevel dynamically, so that it is always greater than vfs.nfsd.tcphighwater. For UDP the algorithm remains the same as the pre-patched code, but the tunable vfs.nfsd.udphighwater can be used to allow the cache to grow larger and reduce the overhead caused by frequent scans for stale entries. UDP also uses a larger hash table size than the pre-patched code. Reported by: wollman Tested by: wollman (earlier version of patch) Submitted by: ivoras (earlier patch) Reviewed by: jhb (earlier version of patch) MFC after: 1 month
Diffstat (limited to 'sys')
-rw-r--r--sys/fs/nfs/nfsport.h5
-rw-r--r--sys/fs/nfs/nfsrvcache.h10
-rw-r--r--sys/fs/nfsserver/nfs_nfsdcache.c282
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c18
4 files changed, 230 insertions, 85 deletions
diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h
index f0e20f1..cec50a6 100644
--- a/sys/fs/nfs/nfsport.h
+++ b/sys/fs/nfs/nfsport.h
@@ -603,11 +603,6 @@ void nfsrvd_rcv(struct socket *, void *, int);
#define NFSREQSPINLOCK extern struct mtx nfs_req_mutex
#define NFSLOCKREQ() mtx_lock(&nfs_req_mutex)
#define NFSUNLOCKREQ() mtx_unlock(&nfs_req_mutex)
-#define NFSCACHEMUTEX extern struct mtx nfs_cache_mutex
-#define NFSCACHEMUTEXPTR (&nfs_cache_mutex)
-#define NFSLOCKCACHE() mtx_lock(&nfs_cache_mutex)
-#define NFSUNLOCKCACHE() mtx_unlock(&nfs_cache_mutex)
-#define NFSCACHELOCKREQUIRED() mtx_assert(&nfs_cache_mutex, MA_OWNED)
#define NFSSOCKMUTEX extern struct mtx nfs_slock_mutex
#define NFSSOCKMUTEXPTR (&nfs_slock_mutex)
#define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex)
diff --git a/sys/fs/nfs/nfsrvcache.h b/sys/fs/nfs/nfsrvcache.h
index 30f757a..5c9dc57 100644
--- a/sys/fs/nfs/nfsrvcache.h
+++ b/sys/fs/nfs/nfsrvcache.h
@@ -41,8 +41,9 @@
#define NFSRVCACHE_MAX_SIZE 2048
#define NFSRVCACHE_MIN_SIZE 64
-#define NFSRVCACHE_HASHSIZE 20
+#define NFSRVCACHE_HASHSIZE 500
+/* Cache table entry. */
struct nfsrvcache {
LIST_ENTRY(nfsrvcache) rc_hash; /* Hash chain */
TAILQ_ENTRY(nfsrvcache) rc_lru; /* UDP lru chain */
@@ -104,4 +105,11 @@ struct nfsrvcache {
LIST_HEAD(nfsrvhashhead, nfsrvcache);
+/* The fine-grained locked cache hash table for TCP. */
+struct nfsrchash_bucket {
+ struct mtx mtx;
+ char lock_name[16];
+ struct nfsrvhashhead tbl;
+};
+
#endif /* _NFS_NFSRVCACHE_H_ */
diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c
index b979b35..32a053d 100644
--- a/sys/fs/nfsserver/nfs_nfsdcache.c
+++ b/sys/fs/nfsserver/nfs_nfsdcache.c
@@ -160,15 +160,51 @@ __FBSDID("$FreeBSD$");
#include <fs/nfs/nfsport.h>
extern struct nfsstats newnfsstats;
-NFSCACHEMUTEX;
+extern struct mtx nfsrc_udpmtx;
+extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
#endif /* !APPLEKEXT */
-static int nfsrc_tcpnonidempotent = 1;
-static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0;
+SYSCTL_DECL(_vfs_nfsd);
+
+static u_int nfsrc_tcphighwater = 0;
+static int
+sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
+{
+ int error, newhighwater;
+
+ newhighwater = nfsrc_tcphighwater;
+ error = sysctl_handle_int(oidp, &newhighwater, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (newhighwater < 0)
+ return (EINVAL);
+ if (newhighwater >= nfsrc_floodlevel)
+ nfsrc_floodlevel = newhighwater + newhighwater / 5;
+ nfsrc_tcphighwater = newhighwater;
+ return (0);
+}
+SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
+ sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
+ "High water mark for TCP cache entries");
+
+static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
+ &nfsrc_udphighwater, 0,
+ "High water mark for UDP cache entries");
+static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
+ &nfsrc_tcptimeout, 0,
+ "Timeout for TCP entries in the DRC");
+static u_int nfsrc_tcpnonidempotent = 1;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
+ &nfsrc_tcpnonidempotent, 0,
+ "Enable the DRC for NFS over TCP");
+
+static int nfsrc_udpcachesize = 0;
static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
-static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE],
- nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
+static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
+
/*
* and the reverse mapping from generic to Version 2 procedure numbers
*/
@@ -197,10 +233,11 @@ static int newnfsv2_procid[NFS_V3NPROCS] = {
NFSV2PROC_NOOP,
};
+#define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
#define NFSRCUDPHASH(xid) \
- (&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
+ (&nfsrvudphashtbl[nfsrc_hash(xid)])
#define NFSRCHASH(xid) \
- (&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
+ (&nfsrchash_table[nfsrc_hash(xid)].tbl)
#define TRUE 1
#define FALSE 0
#define NFSRVCACHE_CHECKLEN 100
@@ -251,6 +288,18 @@ static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
static void nfsrc_marksametcpconn(u_int64_t);
/*
+ * Return the correct mutex for this cache entry.
+ */
+static __inline struct mtx *
+nfsrc_cachemutex(struct nfsrvcache *rp)
+{
+
+ if ((rp->rc_flag & RC_UDP) != 0)
+ return (&nfsrc_udpmtx);
+ return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
+}
+
+/*
* Initialize the server request cache list
*/
APPLESTATIC void
@@ -264,7 +313,7 @@ nfsrvd_initcache(void)
inited = 1;
for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
LIST_INIT(&nfsrvudphashtbl[i]);
- LIST_INIT(&nfsrvhashtbl[i]);
+ LIST_INIT(&nfsrchash_table[i].tbl);
}
TAILQ_INIT(&nfsrvudplru);
nfsrc_tcpsavedreplies = 0;
@@ -325,10 +374,12 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
struct sockaddr_in6 *saddr6;
struct nfsrvhashhead *hp;
int ret = 0;
+ struct mtx *mutex;
+ mutex = nfsrc_cachemutex(newrp);
hp = NFSRCUDPHASH(newrp->rc_xid);
loop:
- NFSLOCKCACHE();
+ mtx_lock(mutex);
LIST_FOREACH(rp, hp, rc_hash) {
if (newrp->rc_xid == rp->rc_xid &&
newrp->rc_proc == rp->rc_proc &&
@@ -336,8 +387,8 @@ loop:
nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, NFSCACHEMUTEXPTR,
- (PZERO - 1) | PDROP, "nfsrc", 10 * hz);
+ (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ "nfsrc", 10 * hz);
goto loop;
}
if (rp->rc_flag == 0)
@@ -347,14 +398,14 @@ loop:
TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
if (rp->rc_flag & RC_INPROG) {
newnfsstats.srvcache_inproghits++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
ret = RC_DROPIT;
} else if (rp->rc_flag & RC_REPSTATUS) {
/*
* V2 only.
*/
newnfsstats.srvcache_nonidemdonehits++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
nfsrvd_rephead(nd);
*(nd->nd_errp) = rp->rc_status;
ret = RC_REPLY;
@@ -362,7 +413,7 @@ loop:
NFSRVCACHE_UDPTIMEOUT;
} else if (rp->rc_flag & RC_REPMBUF) {
newnfsstats.srvcache_nonidemdonehits++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
nd->nd_mreq = m_copym(rp->rc_reply, 0,
M_COPYALL, M_WAITOK);
ret = RC_REPLY;
@@ -377,7 +428,7 @@ loop:
}
}
newnfsstats.srvcache_misses++;
- newnfsstats.srvcache_size++;
+ atomic_add_int(&newnfsstats.srvcache_size, 1);
nfsrc_udpcachesize++;
newrp->rc_flag |= RC_INPROG;
@@ -392,7 +443,7 @@ loop:
}
LIST_INSERT_HEAD(hp, newrp, rc_hash);
TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
nd->nd_rp = newrp;
ret = RC_DOIT;
@@ -410,12 +461,14 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
struct nfsrvcache *rp;
struct nfsrvcache *retrp = NULL;
mbuf_t m;
+ struct mtx *mutex;
rp = nd->nd_rp;
if (!rp)
panic("nfsrvd_updatecache null rp");
nd->nd_rp = NULL;
- NFSLOCKCACHE();
+ mutex = nfsrc_cachemutex(rp);
+ mtx_lock(mutex);
nfsrc_lock(rp);
if (!(rp->rc_flag & RC_INPROG))
panic("nfsrvd_updatecache not inprog");
@@ -430,7 +483,7 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
*/
if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
newnfsstats.srvcache_nonidemdonehits++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
nd->nd_repstat = 0;
if (nd->nd_mreq)
mbuf_freem(nd->nd_mreq);
@@ -438,7 +491,7 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
panic("reply from cache");
nd->nd_mreq = m_copym(rp->rc_reply, 0,
M_COPYALL, M_WAITOK);
- rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT;
+ rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
nfsrc_unlock(rp);
goto out;
}
@@ -463,29 +516,28 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
rp->rc_status = nd->nd_repstat;
rp->rc_flag |= RC_REPSTATUS;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
} else {
if (!(rp->rc_flag & RC_UDP)) {
- nfsrc_tcpsavedreplies++;
+ atomic_add_int(&nfsrc_tcpsavedreplies, 1);
if (nfsrc_tcpsavedreplies >
newnfsstats.srvcache_tcppeak)
newnfsstats.srvcache_tcppeak =
nfsrc_tcpsavedreplies;
}
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
- NFSLOCKCACHE();
+ mtx_lock(mutex);
rp->rc_reply = m;
rp->rc_flag |= RC_REPMBUF;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
}
if (rp->rc_flag & RC_UDP) {
rp->rc_timestamp = NFSD_MONOSEC +
NFSRVCACHE_UDPTIMEOUT;
nfsrc_unlock(rp);
} else {
- rp->rc_timestamp = NFSD_MONOSEC +
- NFSRVCACHE_TCPTIMEOUT;
+ rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
if (rp->rc_refcnt > 0)
nfsrc_unlock(rp);
else
@@ -493,7 +545,7 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
}
} else {
nfsrc_freecache(rp);
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
}
out:
@@ -509,14 +561,16 @@ out:
APPLESTATIC void
nfsrvd_delcache(struct nfsrvcache *rp)
{
+ struct mtx *mutex;
+ mutex = nfsrc_cachemutex(rp);
if (!(rp->rc_flag & RC_INPROG))
panic("nfsrvd_delcache not in prog");
- NFSLOCKCACHE();
+ mtx_lock(mutex);
rp->rc_flag &= ~RC_INPROG;
if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
nfsrc_freecache(rp);
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
}
/*
@@ -528,7 +582,9 @@ APPLESTATIC void
nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
{
tcp_seq tmp_seq;
+ struct mtx *mutex;
+ mutex = nfsrc_cachemutex(rp);
if (!(rp->rc_flag & RC_LOCKED))
panic("nfsrvd_sentcache not locked");
if (!err) {
@@ -537,10 +593,10 @@ nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
so->so_proto->pr_protocol != IPPROTO_TCP)
panic("nfs sent cache");
if (nfsrv_getsockseqnum(so, &tmp_seq)) {
- NFSLOCKCACHE();
+ mtx_lock(mutex);
rp->rc_tcpseq = tmp_seq;
rp->rc_flag |= RC_TCPSEQ;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
}
}
nfsrc_unlock(rp);
@@ -559,11 +615,13 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
struct nfsrvcache *hitrp;
struct nfsrvhashhead *hp, nfsrc_templist;
int hit, ret = 0;
+ struct mtx *mutex;
+ mutex = nfsrc_cachemutex(newrp);
hp = NFSRCHASH(newrp->rc_xid);
newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
tryagain:
- NFSLOCKCACHE();
+ mtx_lock(mutex);
hit = 1;
LIST_INIT(&nfsrc_templist);
/*
@@ -621,8 +679,8 @@ tryagain:
rp = hitrp;
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, NFSCACHEMUTEXPTR,
- (PZERO - 1) | PDROP, "nfsrc", 10 * hz);
+ (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ "nfsrc", 10 * hz);
goto tryagain;
}
if (rp->rc_flag == 0)
@@ -630,7 +688,7 @@ tryagain:
rp->rc_flag |= RC_LOCKED;
if (rp->rc_flag & RC_INPROG) {
newnfsstats.srvcache_inproghits++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
if (newrp->rc_sockref == rp->rc_sockref)
nfsrc_marksametcpconn(rp->rc_sockref);
ret = RC_DROPIT;
@@ -639,24 +697,22 @@ tryagain:
* V2 only.
*/
newnfsstats.srvcache_nonidemdonehits++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
if (newrp->rc_sockref == rp->rc_sockref)
nfsrc_marksametcpconn(rp->rc_sockref);
ret = RC_REPLY;
nfsrvd_rephead(nd);
*(nd->nd_errp) = rp->rc_status;
- rp->rc_timestamp = NFSD_MONOSEC +
- NFSRVCACHE_TCPTIMEOUT;
+ rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
} else if (rp->rc_flag & RC_REPMBUF) {
newnfsstats.srvcache_nonidemdonehits++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
if (newrp->rc_sockref == rp->rc_sockref)
nfsrc_marksametcpconn(rp->rc_sockref);
ret = RC_REPLY;
nd->nd_mreq = m_copym(rp->rc_reply, 0,
M_COPYALL, M_WAITOK);
- rp->rc_timestamp = NFSD_MONOSEC +
- NFSRVCACHE_TCPTIMEOUT;
+ rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
} else {
panic("nfs tcp cache1");
}
@@ -665,7 +721,7 @@ tryagain:
goto out;
}
newnfsstats.srvcache_misses++;
- newnfsstats.srvcache_size++;
+ atomic_add_int(&newnfsstats.srvcache_size, 1);
/*
* For TCP, multiple entries for a key are allowed, so don't
@@ -674,7 +730,7 @@ tryagain:
newrp->rc_cachetime = NFSD_MONOSEC;
newrp->rc_flag |= RC_INPROG;
LIST_INSERT_HEAD(hp, newrp, rc_hash);
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
nd->nd_rp = newrp;
ret = RC_DOIT;
@@ -685,16 +741,17 @@ out:
/*
* Lock a cache entry.
- * Also puts a mutex lock on the cache list.
*/
static void
nfsrc_lock(struct nfsrvcache *rp)
{
- NFSCACHELOCKREQUIRED();
+ struct mtx *mutex;
+
+ mutex = nfsrc_cachemutex(rp);
+ mtx_assert(mutex, MA_OWNED);
while ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, PZERO - 1,
- "nfsrc", 0);
+ (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
}
rp->rc_flag |= RC_LOCKED;
}
@@ -705,11 +762,13 @@ nfsrc_lock(struct nfsrvcache *rp)
static void
nfsrc_unlock(struct nfsrvcache *rp)
{
+ struct mtx *mutex;
- NFSLOCKCACHE();
+ mutex = nfsrc_cachemutex(rp);
+ mtx_lock(mutex);
rp->rc_flag &= ~RC_LOCKED;
nfsrc_wanted(rp);
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
}
/*
@@ -732,7 +791,6 @@ static void
nfsrc_freecache(struct nfsrvcache *rp)
{
- NFSCACHELOCKREQUIRED();
LIST_REMOVE(rp, rc_hash);
if (rp->rc_flag & RC_UDP) {
TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
@@ -742,10 +800,10 @@ nfsrc_freecache(struct nfsrvcache *rp)
if (rp->rc_flag & RC_REPMBUF) {
mbuf_freem(rp->rc_reply);
if (!(rp->rc_flag & RC_UDP))
- nfsrc_tcpsavedreplies--;
+ atomic_add_int(&nfsrc_tcpsavedreplies, -1);
}
FREE((caddr_t)rp, M_NFSRVCACHE);
- newnfsstats.srvcache_size--;
+ atomic_add_int(&newnfsstats.srvcache_size, -1);
}
/*
@@ -757,20 +815,21 @@ nfsrvd_cleancache(void)
struct nfsrvcache *rp, *nextrp;
int i;
- NFSLOCKCACHE();
for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
- LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
+ mtx_lock(&nfsrchash_table[i].mtx);
+ LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
nfsrc_freecache(rp);
- }
+ mtx_unlock(&nfsrchash_table[i].mtx);
}
+ mtx_lock(&nfsrc_udpmtx);
for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
nfsrc_freecache(rp);
}
}
newnfsstats.srvcache_size = 0;
+ mtx_unlock(&nfsrc_udpmtx);
nfsrc_tcpsavedreplies = 0;
- NFSUNLOCKCACHE();
}
/*
@@ -780,28 +839,97 @@ static void
nfsrc_trimcache(u_int64_t sockref, struct socket *so)
{
struct nfsrvcache *rp, *nextrp;
- int i;
+ int i, j, k, time_histo[10];
+ time_t thisstamp;
+ static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
+ static int onethread = 0;
- NFSLOCKCACHE();
- TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
- if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
- && rp->rc_refcnt == 0
- && ((rp->rc_flag & RC_REFCNT) ||
- NFSD_MONOSEC > rp->rc_timestamp ||
- nfsrc_udpcachesize > nfsrc_udphighwater))
- nfsrc_freecache(rp);
- }
- for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
- LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
+ if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
+ return;
+ if (NFSD_MONOSEC != udp_lasttrim ||
+ nfsrc_udpcachesize >= (nfsrc_udphighwater +
+ nfsrc_udphighwater / 2)) {
+ mtx_lock(&nfsrc_udpmtx);
+ udp_lasttrim = NFSD_MONOSEC;
+ TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
&& rp->rc_refcnt == 0
&& ((rp->rc_flag & RC_REFCNT) ||
- NFSD_MONOSEC > rp->rc_timestamp ||
- nfsrc_activesocket(rp, sockref, so)))
+ udp_lasttrim > rp->rc_timestamp ||
+ nfsrc_udpcachesize > nfsrc_udphighwater))
nfsrc_freecache(rp);
}
+ mtx_unlock(&nfsrc_udpmtx);
+ }
+ if (NFSD_MONOSEC != tcp_lasttrim ||
+ nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
+ for (i = 0; i < 10; i++)
+ time_histo[i] = 0;
+ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+ mtx_lock(&nfsrchash_table[i].mtx);
+ if (i == 0)
+ tcp_lasttrim = NFSD_MONOSEC;
+ LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
+ nextrp) {
+ if (!(rp->rc_flag &
+ (RC_INPROG|RC_LOCKED|RC_WANTED))
+ && rp->rc_refcnt == 0) {
+ /*
+ * The timestamps range from roughly the
+ * present (tcp_lasttrim) to the present
+ * + nfsrc_tcptimeout. Generate a simple
+ * histogram of where the timeouts fall.
+ */
+ j = rp->rc_timestamp - tcp_lasttrim;
+ if (j >= nfsrc_tcptimeout)
+ j = nfsrc_tcptimeout - 1;
+ if (j < 0)
+ j = 0;
+ j = (j * 10 / nfsrc_tcptimeout) % 10;
+ time_histo[j]++;
+ if ((rp->rc_flag & RC_REFCNT) ||
+ tcp_lasttrim > rp->rc_timestamp ||
+ nfsrc_activesocket(rp, sockref, so))
+ nfsrc_freecache(rp);
+ }
+ }
+ mtx_unlock(&nfsrchash_table[i].mtx);
+ }
+ j = nfsrc_tcphighwater / 5; /* 20% of it */
+ if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) {
+ /*
+ * Trim some more with a smaller timeout of as little
+ * as 20% of nfsrc_tcptimeout to try and get below
+ * 80% of the nfsrc_tcphighwater.
+ */
+ k = 0;
+ for (i = 0; i < 8; i++) {
+ k += time_histo[i];
+ if (k > j)
+ break;
+ }
+ k = nfsrc_tcptimeout * (i + 1) / 10;
+ if (k < 1)
+ k = 1;
+ thisstamp = tcp_lasttrim + k;
+ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+ mtx_lock(&nfsrchash_table[i].mtx);
+ LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
+ rc_hash, nextrp) {
+ if (!(rp->rc_flag &
+ (RC_INPROG|RC_LOCKED|RC_WANTED))
+ && rp->rc_refcnt == 0
+ && ((rp->rc_flag & RC_REFCNT) ||
+ thisstamp > rp->rc_timestamp ||
+ nfsrc_activesocket(rp, sockref,
+ so)))
+ nfsrc_freecache(rp);
+ }
+ mtx_unlock(&nfsrchash_table[i].mtx);
+ }
+ }
}
- NFSUNLOCKCACHE();
+ atomic_store_rel_int(&onethread, 0);
}
/*
@@ -810,12 +938,14 @@ nfsrc_trimcache(u_int64_t sockref, struct socket *so)
APPLESTATIC void
nfsrvd_refcache(struct nfsrvcache *rp)
{
+ struct mtx *mutex;
- NFSLOCKCACHE();
+ mutex = nfsrc_cachemutex(rp);
+ mtx_lock(mutex);
if (rp->rc_refcnt < 0)
panic("nfs cache refcnt");
rp->rc_refcnt++;
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
}
/*
@@ -824,14 +954,16 @@ nfsrvd_refcache(struct nfsrvcache *rp)
APPLESTATIC void
nfsrvd_derefcache(struct nfsrvcache *rp)
{
+ struct mtx *mutex;
- NFSLOCKCACHE();
+ mutex = nfsrc_cachemutex(rp);
+ mtx_lock(mutex);
if (rp->rc_refcnt <= 0)
panic("nfs cache derefcnt");
rp->rc_refcnt--;
if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
nfsrc_freecache(rp);
- NFSUNLOCKCACHE();
+ mtx_unlock(mutex);
}
/*
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index dfb1303..a6a0169 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -60,7 +60,8 @@ extern SVCPOOL *nfsrvd_pool;
extern struct nfsv4lock nfsd_suspend_lock;
struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
NFSDLOCKMUTEX;
-struct mtx nfs_cache_mutex;
+struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
+struct mtx nfsrc_udpmtx;
struct mtx nfs_v4root_mutex;
struct nfsrvfh nfs_rootfh, nfs_pubfh;
int nfs_pubfhset = 0, nfs_rootfhset = 0;
@@ -3278,7 +3279,7 @@ extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
static int
nfsd_modevent(module_t mod, int type, void *data)
{
- int error = 0;
+ int error = 0, i;
static int loaded = 0;
switch (type) {
@@ -3286,7 +3287,14 @@ nfsd_modevent(module_t mod, int type, void *data)
if (loaded)
goto out;
newnfs_portinit();
- mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
+ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+ snprintf(nfsrchash_table[i].lock_name,
+ sizeof(nfsrchash_table[i].lock_name), "nfsrc_tcp%d",
+ i);
+ mtx_init(&nfsrchash_table[i].mtx,
+ nfsrchash_table[i].lock_name, NULL, MTX_DEF);
+ }
+ mtx_init(&nfsrc_udpmtx, "nfs_udpcache_mutex", NULL, MTX_DEF);
mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
MTX_DEF);
@@ -3330,7 +3338,9 @@ nfsd_modevent(module_t mod, int type, void *data)
svcpool_destroy(nfsrvd_pool);
/* and get rid of the locks */
- mtx_destroy(&nfs_cache_mutex);
+ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++)
+ mtx_destroy(&nfsrchash_table[i].mtx);
+ mtx_destroy(&nfsrc_udpmtx);
mtx_destroy(&nfs_v4root_mutex);
mtx_destroy(&nfsv4root_mnt.mnt_mtx);
lockdestroy(&nfsv4root_mnt.mnt_explock);
OpenPOWER on IntegriCloud