From 2d7192d6cbab20e153c47fa1559ffd41ceef0e79 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 Apr 2011 13:28:44 -0700 Subject: ipv4: Sanitize and simplify ip_route_{connect,newports}() These functions are used together as a unit for route resolution during connect(). They address the chicken-and-egg problem that exists when ports need to be allocated during connect() processing, yet such port allocations require addressing information from the routing code. It's currently more heavy handed than it needs to be, and in particular we allocate and initialize a flow object twice. Let the callers provide the on-stack flow object. That way we only need to initialize it once in the ip_route_connect() call. Later, if ip_route_newports() needs to do anything, it re-uses that flow object as-is except for the ports which it updates before the route re-lookup. Also, describe why this set of facilities are needed and how it works in a big comment. Signed-off-by: David S. Miller Reviewed-by: Eric Dumazet --- net/l2tp/l2tp_ip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index fce9bd3..cc67367 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -296,12 +296,12 @@ out_in_use: static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - int rc; - struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr; + struct inet_sock *inet = inet_sk(sk); + struct flowi4 fl4; struct rtable *rt; __be32 saddr; - int oif; + int oif, rc; rc = -EINVAL; if (addr_len < sizeof(*lsa)) @@ -320,7 +320,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) goto out; - rt = ip_route_connect(lsa->l2tp_addr.s_addr, saddr, + rt = ip_route_connect(&fl4, lsa->l2tp_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, IPPROTO_L2TP, 0, 0, sk, true); -- cgit v1.1 From f6d8bd051c391c1c0458a30b2a7abcd939329259 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Apr 2011 09:45:37 +0000 Subject: inet: add RCU protection to inet->opt We lack proper synchronization to manipulate inet->opt ip_options Problem is ip_make_skb() calls ip_setup_cork() and ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options), without any protection against another thread manipulating inet->opt. Another thread can change inet->opt pointer and free old one under us. Use RCU to protect inet->opt (changed to inet->inet_opt). Instead of handling atomic refcounts, just copy ip_options when necessary, to avoid cache line dirtying. We cant insert an rcu_head in struct ip_options since its included in skb->cb[], so this patch is large because I had to introduce a new ip_options_rcu structure. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index cc67367..962a607 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -416,7 +416,6 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m int rc; struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct ip_options *opt = inet->opt; struct rtable *rt = NULL; int connected = 0; __be32 daddr; @@ -471,9 +470,14 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m rt = (struct rtable *) __sk_dst_check(sk, 0); if (rt == NULL) { + struct ip_options_rcu *inet_opt; + + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); + /* Use correct destination address if we have options. */ - if (opt && opt->srr) - daddr = opt->faddr; + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times -- cgit v1.1 From 778865a550e7958c1211242cc481f48d46de0f04 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 13:54:06 -0700 Subject: l2tp: Fix inet_opt conversion. We don't actually hold the socket lock at this point, so the rcu_dereference_protected() isn't' correct. Thanks to Eric Dumazet for pointing this out. Thankfully, we're only interested in fetching the faddr value if srr is enabled, so we can simply make this an RCU sequence and use plain rcu_dereference(). Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 962a607..e13c166 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -472,13 +472,15 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (rt == NULL) { struct ip_options_rcu *inet_opt; - inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); /* Use correct destination address if we have options. */ if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; + rcu_read_unlock(); + /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. -- cgit v1.1 From 44901666a1a736b3f43eac7894b07183f127e9a9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 23:17:58 -0700 Subject: ipv4: Fetch route saddr from flow key in l2tp_ip_connect(). Now that output route lookups update the flow with source address selection, we can fetch it from fl4->saddr instead of rt->rt_src Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index e13c166..c100fa9 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -340,9 +340,9 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; if (!inet->inet_saddr) - inet->inet_saddr = rt->rt_src; + inet->inet_saddr = fl4.saddr; if (!inet->inet_rcv_saddr) - inet->inet_rcv_saddr = rt->rt_src; + inet->inet_rcv_saddr = fl4.saddr; inet->inet_daddr = rt->rt_dst; sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; -- cgit v1.1 From ad227425c9ca907b5498e2558320b7e585d86ec9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 23:50:49 -0700 Subject: ipv4: Get route daddr from flow key in l2tp_ip_connect(). Now that output route lookups update the flow with destination address selection, we can fetch it from fl4->daddr instead of rt->rt_dst Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index c100fa9..a4d2dfa 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -343,7 +343,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len inet->inet_saddr = fl4.saddr; if (!inet->inet_rcv_saddr) inet->inet_rcv_saddr = fl4.saddr; - inet->inet_daddr = rt->rt_dst; + inet->inet_daddr = fl4.daddr; sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; -- cgit v1.1 From 31e4543db29fb85496a122b965d6482c8d1a2bfe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 May 2011 20:25:42 -0700 Subject: ipv4: Make caller provide on-stack flow key to ip_route_output_ports(). Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index a4d2dfa..8189960 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -471,6 +471,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (rt == NULL) { struct ip_options_rcu *inet_opt; + struct flowi4 fl4; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); @@ -485,7 +486,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m * keep trying until route appears or the connection times * itself out. */ - rt = ip_route_output_ports(sock_net(sk), sk, + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, RT_CONN_FLAGS(sk), -- cgit v1.1 From 2f16270f41e1499e23e6be25c51be87d950ffc91 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 8 May 2011 13:39:01 -0700 Subject: l2tp: Fix locking in l2tp_ip.c Both l2tp_ip_connect() and l2tp_ip_sendmsg() must take the socket lock. They both modify socket state non-atomically, and in particular l2tp_ip_sendmsg() increments socket private counters without using atomic operations. Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 8189960..bd0cc0b 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -311,6 +311,8 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len if (lsa->l2tp_family != AF_INET) goto out; + lock_sock(sk); + sk_dst_reset(sk); oif = sk->sk_bound_dev_if; @@ -356,6 +358,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len rc = 0; out: + release_sock(sk); return rc; } @@ -420,18 +423,23 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m int connected = 0; __be32 daddr; + lock_sock(sk); + + rc = -ENOTCONN; if (sock_flag(sk, SOCK_DEAD)) - return -ENOTCONN; + goto out; /* Get and verify the address. */ if (msg->msg_name) { struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name; + rc = -EINVAL; if (msg->msg_namelen < sizeof(*lip)) - return -EINVAL; + goto out; if (lip->l2tp_family != AF_INET) { + rc = -EAFNOSUPPORT; if (lip->l2tp_family != AF_UNSPEC) - return -EAFNOSUPPORT; + goto out; } daddr = lip->l2tp_addr.s_addr; @@ -510,12 +518,15 @@ error: lsa->tx_errors++; } +out: + release_sock(sk); return rc; no_route: IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, -- cgit v1.1 From fdbb0f076b065a0c753ba26925f10357505f1d42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 8 May 2011 13:48:37 -0700 Subject: l2tp: Use cork flow in l2tp_ip_connect() and l2tp_ip_sendmsg() Now that the socket is consistently locked in these two routines, this transformation is legal. Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index bd0cc0b..1ca7489 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -298,7 +298,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len { struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr; struct inet_sock *inet = inet_sk(sk); - struct flowi4 fl4; + struct flowi4 *fl4; struct rtable *rt; __be32 saddr; int oif, rc; @@ -322,7 +322,8 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) goto out; - rt = ip_route_connect(&fl4, lsa->l2tp_addr.s_addr, saddr, + fl4 = &inet->cork.fl.u.ip4; + rt = ip_route_connect(fl4, lsa->l2tp_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, IPPROTO_L2TP, 0, 0, sk, true); @@ -342,10 +343,10 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; if (!inet->inet_saddr) - inet->inet_saddr = fl4.saddr; + inet->inet_saddr = fl4->saddr; if (!inet->inet_rcv_saddr) - inet->inet_rcv_saddr = fl4.saddr; - inet->inet_daddr = fl4.daddr; + inet->inet_rcv_saddr = fl4->saddr; + inet->inet_daddr = fl4->daddr; sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; @@ -420,6 +421,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk); struct inet_sock *inet = inet_sk(sk); struct rtable *rt = NULL; + struct flowi4 *fl4; int connected = 0; __be32 daddr; @@ -474,12 +476,12 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m goto error; } + fl4 = &inet->cork.fl.u.ip4; if (connected) rt = (struct rtable *) __sk_dst_check(sk, 0); if (rt == NULL) { struct ip_options_rcu *inet_opt; - struct flowi4 fl4; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); @@ -494,7 +496,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m * keep trying until route appears or the connection times * itself out. */ - rt = ip_route_output_ports(sock_net(sk), &fl4, sk, + rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, RT_CONN_FLAGS(sk), -- cgit v1.1 From d9d8da805dcb503ef8ee49918a94d49085060f23 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 6 May 2011 22:23:20 -0700 Subject: inet: Pass flowi to ->queue_xmit(). This allows us to acquire the exact route keying information from the protocol, however that might be managed. It handles all of the possibilities, from the simplest case of storing the key in inet->cork.fl to the more complex setup SCTP has where individual transports determine the flow. Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/l2tp/l2tp_ip.c') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 1ca7489..f7fb09e 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -508,7 +508,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m skb_dst_set(skb, dst_clone(&rt->dst)); /* Queue the packet to IP for output */ - rc = ip_queue_xmit(skb); + rc = ip_queue_xmit(skb, &inet->cork.fl); error: /* Update stats */ -- cgit v1.1