summaryrefslogtreecommitdiffstats
path: root/sys/netinet/tcp_usrreq.c
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2008-01-18 12:19:50 +0000
committerrwatson <rwatson@FreeBSD.org>2008-01-18 12:19:50 +0000
commitba4fb8ac52b3b401d72c452359c6c3353af7013a (patch)
tree9f86cdb3f4c1162111d08e83ace89bed97d08823 /sys/netinet/tcp_usrreq.c
parent66dc6e9a4e6ae326b17b0629d482d43d9b82074f (diff)
downloadFreeBSD-src-ba4fb8ac52b3b401d72c452359c6c3353af7013a.zip
FreeBSD-src-ba4fb8ac52b3b401d72c452359c6c3353af7013a.tar.gz
In tcp_ctloutput(), don't hold the inpcb lock over sooptcopyin(), rather,
drop the lock and then re-acquire it, revalidating TCP connection state assumptions when we do so. This avoids a potential lock order reversal (and potential deadlock, although none have been reported) due to the inpcb lock being held over a page fault. MFC after: 1 week PR: 102752 Reviewed by: bz Reported by: Václav Haisman <v dot haisman at sh dot cvut dot cz>
Diffstat (limited to 'sys/netinet/tcp_usrreq.c')
-rw-r--r--sys/netinet/tcp_usrreq.c80
1 files changed, 55 insertions, 25 deletions
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 02ab9d0..a70a551 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1232,15 +1232,20 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
}
/*
- * The new sockopt interface makes it possible for us to block in the
- * copyin/out step (if we take a page fault). Taking a page fault at
- * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
- * use TSM, there probably isn't any need for this function to run at
- * splnet() any more. This needs more examination.)
- *
- * XXXRW: The locking here is wrong; we may take a page fault while holding
- * the inpcb lock.
+ * tcp_ctloutput() must drop the inpcb lock before performing copyin on
+ * socket option arguments. When it re-acquires the lock after the copy, it
+ * has to revalidate that the connection is still valid for the socket
+ * option.
*/
+#define INP_LOCK_RECHECK(inp) do { \
+ INP_LOCK(inp); \
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { \
+ INP_UNLOCK(inp); \
+ return (ECONNRESET); \
+ } \
+ tp = intotcpcb(inp); \
+} while(0)
+
int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
@@ -1254,44 +1259,52 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
INP_LOCK(inp);
if (sopt->sopt_level != IPPROTO_TCP) {
- INP_UNLOCK(inp);
#ifdef INET6
- if (INP_CHECK_SOCKAF(so, AF_INET6))
+ if (INP_CHECK_SOCKAF(so, AF_INET6)) {
+ INP_UNLOCK(inp);
error = ip6_ctloutput(so, sopt);
- else
+ } else {
#endif /* INET6 */
- error = ip_ctloutput(so, sopt);
+ INP_UNLOCK(inp);
+ error = ip_ctloutput(so, sopt);
+#ifdef INET6
+ }
+#endif
return (error);
}
if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
- error = ECONNRESET;
- goto out;
+ INP_UNLOCK(inp);
+ return (ECONNRESET);
}
- tp = intotcpcb(inp);
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {
#ifdef TCP_SIGNATURE
case TCP_MD5SIG:
+ INP_UNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
- break;
+ return (error);
+ INP_LOCK_RECHECK(inp);
if (optval > 0)
tp->t_flags |= TF_SIGNATURE;
else
tp->t_flags &= ~TF_SIGNATURE;
+ INP_UNLOCK(inp);
break;
#endif /* TCP_SIGNATURE */
case TCP_NODELAY:
case TCP_NOOPT:
+ INP_UNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
- break;
+ return (error);
+ INP_LOCK_RECHECK(inp);
switch (sopt->sopt_name) {
case TCP_NODELAY:
opt = TF_NODELAY;
@@ -1308,83 +1321,100 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_flags |= opt;
else
tp->t_flags &= ~opt;
+ INP_UNLOCK(inp);
break;
case TCP_NOPUSH:
+ INP_UNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
- break;
+ return (error);
+ INP_LOCK_RECHECK(inp);
if (optval)
tp->t_flags |= TF_NOPUSH;
else {
tp->t_flags &= ~TF_NOPUSH;
error = tcp_output(tp);
}
+ INP_UNLOCK(inp);
break;
case TCP_MAXSEG:
+ INP_UNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
- break;
+ return (error);
+ INP_LOCK_RECHECK(inp);
if (optval > 0 && optval <= tp->t_maxseg &&
optval + 40 >= tcp_minmss)
tp->t_maxseg = optval;
else
error = EINVAL;
+ INP_UNLOCK(inp);
break;
case TCP_INFO:
+ INP_UNLOCK(inp);
error = EINVAL;
break;
default:
+ INP_UNLOCK(inp);
error = ENOPROTOOPT;
break;
}
break;
case SOPT_GET:
+ tp = intotcpcb(inp);
switch (sopt->sopt_name) {
#ifdef TCP_SIGNATURE
case TCP_MD5SIG:
optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
+ INP_UNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
#endif
+
case TCP_NODELAY:
optval = tp->t_flags & TF_NODELAY;
+ INP_UNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_MAXSEG:
optval = tp->t_maxseg;
+ INP_UNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_NOOPT:
optval = tp->t_flags & TF_NOOPT;
+ INP_UNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_NOPUSH:
optval = tp->t_flags & TF_NOPUSH;
+ INP_UNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_INFO:
tcp_fill_info(tp, &ti);
+ INP_UNLOCK(inp);
error = sooptcopyout(sopt, &ti, sizeof ti);
break;
default:
+ INP_UNLOCK(inp);
error = ENOPROTOOPT;
break;
}
break;
}
-out:
- INP_UNLOCK(inp);
return (error);
}
+#undef INP_LOCK_RECHECK
/*
* tcp_sendspace and tcp_recvspace are the default send and receive window
OpenPOWER on IntegriCloud