diff options
author | rwatson <rwatson@FreeBSD.org> | 2008-01-18 12:19:50 +0000 |
---|---|---|
committer | rwatson <rwatson@FreeBSD.org> | 2008-01-18 12:19:50 +0000 |
commit | ba4fb8ac52b3b401d72c452359c6c3353af7013a (patch) | |
tree | 9f86cdb3f4c1162111d08e83ace89bed97d08823 /sys/netinet/tcp_usrreq.c | |
parent | 66dc6e9a4e6ae326b17b0629d482d43d9b82074f (diff) | |
download | FreeBSD-src-ba4fb8ac52b3b401d72c452359c6c3353af7013a.zip FreeBSD-src-ba4fb8ac52b3b401d72c452359c6c3353af7013a.tar.gz |
In tcp_ctloutput(), don't hold the inpcb lock over sooptcopyin(), rather,
drop the lock and then re-acquire it, revalidating TCP connection state
assumptions when we do so. This avoids a potential lock order reversal
(and potential deadlock, although none have been reported) due to the
inpcb lock being held over a page fault.
MFC after: 1 week
PR: 102752
Reviewed by: bz
Reported by: Václav Haisman <v dot haisman at sh dot cvut dot cz>
Diffstat (limited to 'sys/netinet/tcp_usrreq.c')
-rw-r--r-- | sys/netinet/tcp_usrreq.c | 80 |
1 files changed, 55 insertions, 25 deletions
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 02ab9d0..a70a551 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1232,15 +1232,20 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) } /* - * The new sockopt interface makes it possible for us to block in the - * copyin/out step (if we take a page fault). Taking a page fault at - * splnet() is probably a Bad Thing. (Since sockets and pcbs both now - * use TSM, there probably isn't any need for this function to run at - * splnet() any more. This needs more examination.) - * - * XXXRW: The locking here is wrong; we may take a page fault while holding - * the inpcb lock. + * tcp_ctloutput() must drop the inpcb lock before performing copyin on + * socket option arguments. When it re-acquires the lock after the copy, it + * has to revalidate that the connection is still valid for the socket + * option. */ +#define INP_LOCK_RECHECK(inp) do { \ + INP_LOCK(inp); \ + if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { \ + INP_UNLOCK(inp); \ + return (ECONNRESET); \ + } \ + tp = intotcpcb(inp); \ +} while(0) + int tcp_ctloutput(struct socket *so, struct sockopt *sopt) { @@ -1254,44 +1259,52 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt) KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); INP_LOCK(inp); if (sopt->sopt_level != IPPROTO_TCP) { - INP_UNLOCK(inp); #ifdef INET6 - if (INP_CHECK_SOCKAF(so, AF_INET6)) + if (INP_CHECK_SOCKAF(so, AF_INET6)) { + INP_UNLOCK(inp); error = ip6_ctloutput(so, sopt); - else + } else { #endif /* INET6 */ - error = ip_ctloutput(so, sopt); + INP_UNLOCK(inp); + error = ip_ctloutput(so, sopt); +#ifdef INET6 + } +#endif return (error); } if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { - error = ECONNRESET; - goto out; + INP_UNLOCK(inp); + return (ECONNRESET); } - tp = intotcpcb(inp); switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { #ifdef TCP_SIGNATURE case TCP_MD5SIG: + INP_UNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + sizeof optval); if (error) - break; + return (error); + INP_LOCK_RECHECK(inp); if (optval > 0) tp->t_flags |= TF_SIGNATURE; else tp->t_flags &= ~TF_SIGNATURE; + INP_UNLOCK(inp); break; #endif /* TCP_SIGNATURE */ case TCP_NODELAY: case TCP_NOOPT: + INP_UNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + sizeof optval); if (error) - break; + return (error); + INP_LOCK_RECHECK(inp); switch (sopt->sopt_name) { case TCP_NODELAY: opt = TF_NODELAY; @@ -1308,83 +1321,100 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt) tp->t_flags |= opt; else tp->t_flags &= ~opt; + INP_UNLOCK(inp); break; case TCP_NOPUSH: + INP_UNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + sizeof optval); if (error) - break; + return (error); + INP_LOCK_RECHECK(inp); if (optval) tp->t_flags |= TF_NOPUSH; else { tp->t_flags &= ~TF_NOPUSH; error = tcp_output(tp); } + INP_UNLOCK(inp); break; case TCP_MAXSEG: + INP_UNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + sizeof optval); if (error) - break; + return (error); + INP_LOCK_RECHECK(inp); if (optval > 0 && optval <= tp->t_maxseg && optval + 40 >= tcp_minmss) tp->t_maxseg = optval; else error = EINVAL; + INP_UNLOCK(inp); break; case TCP_INFO: + INP_UNLOCK(inp); error = EINVAL; break; default: + INP_UNLOCK(inp); error = ENOPROTOOPT; break; } break; case SOPT_GET: + tp = intotcpcb(inp); switch (sopt->sopt_name) { #ifdef TCP_SIGNATURE case TCP_MD5SIG: optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; + INP_UNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; #endif + case TCP_NODELAY: optval = tp->t_flags & TF_NODELAY; + INP_UNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_MAXSEG: optval = tp->t_maxseg; + INP_UNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; + INP_UNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; + INP_UNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_INFO: tcp_fill_info(tp, &ti); + INP_UNLOCK(inp); error = sooptcopyout(sopt, &ti, sizeof ti); break; default: + INP_UNLOCK(inp); error = ENOPROTOOPT; break; } break; } -out: - INP_UNLOCK(inp); return (error); } +#undef INP_LOCK_RECHECK /* * tcp_sendspace and tcp_recvspace are the default send and receive window |