From 734907e82d21a75a514b80164185427a832a00c0 Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Fri, 8 Feb 2013 09:30:23 -0800 Subject: openvswitch: Fix ovs_vport_cmd_del return value on success If the pointer does not represent an error then the PTR_ERR macro may still return a nonzero value. The fix is the same as in ovs_vport_cmd_set. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f996db3..5e275b9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1772,6 +1772,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) goto exit_unlock; + err = 0; ovs_dp_detach_port(vport); genl_notify(reply, genl_info_net(info), info->snd_portid, -- cgit v1.1 From cb7c5bdffb727a3d4dea5247d9d1d52238b01d90 Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Fri, 8 Feb 2013 13:18:01 -0800 Subject: openvswitch: Fix ovs_vport_cmd_new return value on success If the pointer does not represent an error then the PTR_ERR macro may still return a nonzero value. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 5e275b9..a2cd3e6 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1691,6 +1691,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; + err = 0; reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { -- cgit v1.1 From a15ff76c955d17cf58313097e4a24124da022b1d Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Fri, 15 Feb 2013 11:07:43 -0800 Subject: openvswitch: Call genlmsg_end in queue_userspace_packet Without genlmsg_end the upcall message ends (according to nlmsg_len) after the struct ovs_header. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a2cd3e6..cae1062 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -395,6 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, skb_copy_and_csum_dev(skb, nla_data(nla)); + genlmsg_end(user_skb, upcall); err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: -- cgit v1.1 From 17b682a04841233f827073b327c6533e478dfcd4 Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Tue, 19 Feb 2013 11:10:30 -0800 Subject: openvswitch: Fix parsing invalid LLC/SNAP ethertypes Before this patch, if an LLC/SNAP packet with OUI 00:00:00 had an ethertype less than 1536 the flow key given to userspace in the upcall would contain the invalid ethertype (for example, 3). If userspace attempted to insert a kernel flow for this key it would be rejected by ovs_flow_from_nlattrs. This patch allows OVS to pass the OFTest pktact.DirectBadLlcPackets. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c3294ce..0c98d40 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -484,7 +484,11 @@ static __be16 parse_ethertype(struct sk_buff *skb) return htons(ETH_P_802_2); __skb_pull(skb, sizeof(struct llc_snap_hdr)); - return llc->ethertype; + + if (ntohs(llc->ethertype) >= 1536) + return llc->ethertype; + + return htons(ETH_P_802_2); } static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, -- cgit v1.1 From 7b024082b2b279af58e24ebd46e81777723d58da Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Feb 2013 17:32:26 +0800 Subject: openvswitch: fix the calculation of checksum for vlan header In vlan_insert_tag(), we insert a 4-byte VLAN header _after_ mac header: memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); ... veth->h_vlan_proto = htons(ETH_P_8021Q); ... veth->h_vlan_TCI = htons(vlan_tci); so after it, we should recompute the checksum to include these 4 bytes. skb->data still points to the mac header, therefore VLAN header is at (2 * ETH_ALEN = 12) bytes after it, not (ETH_HLEN = 14) bytes. This can also be observed via tcpdump: 0x0000: ffff ffff ffff 5254 005d 6f6e 8100 000a 0x0010: 0806 0001 0800 0604 0001 5254 005d 6f6e 0x0020: c0a8 026e 0000 0000 0000 c0a8 0282 Similar for __pop_vlan_tci(), the vlan header we remove is the one overwritten in: memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); Therefore the VLAN_HLEN = 4 bytes after 2 * ETH_ALEN is the part we want to sub from checksum. Cc: David S. Miller Cc: Jesse Gross Signed-off-by: Cong Wang Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ac2defe..d4d5363 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -58,7 +58,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *current_tci = vhdr->h_vlan_TCI; @@ -115,7 +115,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); } __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); -- cgit v1.1 From d176ca2a48ff2b5d7becfacdcbd1d72c73bd22d1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Feb 2013 19:41:26 +0800 Subject: openvswitch: remove some useless comments These comments are useless in upstream kernel. Cc: David S. Miller Cc: Jesse Gross Signed-off-by: Cong Wang Signed-off-by: Jesse Gross --- net/openvswitch/vport-netdev.c | 3 +-- net/openvswitch/vport.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 670cbc3..2130d61 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -43,8 +43,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) /* Make our own copy of the packet. Otherwise we will mangle the * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). - * (No one comes after us, since we tell handle_bridge() that we took - * the packet.) */ + */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 70af0be..6255e48 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -326,8 +326,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * @skb: skb that was received * * Must be called with rcu_read_lock. The packet cannot be shared and - * skb->data should point to the Ethernet header. The caller must have already - * called compute_ip_summed() to initialize the checksumming fields. + * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) { -- cgit v1.1 From b141e811a0763bb107af4cd99d456193ccdb8053 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 11:04:45 +0100 Subject: NFC: llcp: Decrease socket ack log when accepting a connection This is really difficult to test with real NFC devices, but without this fix an LLCP server will eventually refuse new connections. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 5332751..5c7cdf3f 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -278,6 +278,8 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, pr_debug("Returning sk state %d\n", sk->sk_state); + sk_acceptq_removed(parent); + return sk; } -- cgit v1.1 From 3536da06db0baa675f32de608c0a4c0f5ef0e9ff Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 15:40:04 +0100 Subject: NFC: llcp: Clean local timers and works when removing a device Whenever an adapter is removed we must clean all the local structures, especially the timers and scheduled work. Otherwise those asynchronous threads will eventually try to access the freed nfc_dev pointer if an LLCP link is up. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 7f8266d..77e1d97 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -142,20 +142,25 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) return local; } -static void local_release(struct kref *ref) +static void local_cleanup(struct nfc_llcp_local *local, bool listen) { - struct nfc_llcp_local *local; - - local = container_of(ref, struct nfc_llcp_local, ref); - - list_del(&local->list); - nfc_llcp_socket_release(local, false); + nfc_llcp_socket_release(local, listen); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); +} + +static void local_release(struct kref *ref) +{ + struct nfc_llcp_local *local; + + local = container_of(ref, struct nfc_llcp_local, ref); + + list_del(&local->list); + local_cleanup(local, false); kfree(local); } @@ -1427,6 +1432,8 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev) return; } + local_cleanup(local, false); + nfc_llcp_local_put(local); } -- cgit v1.1 From e6a3a4bb856a6fba551b43376c80f45836132710 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 16:33:30 +0100 Subject: NFC: llcp: Clean raw sockets from nfc_llcp_socket_release Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 77e1d97..8a35423 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -133,6 +133,35 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) } write_unlock(&local->sockets.lock); + + /* + * If we want to keep the listening sockets alive, + * we don't touch the RAW ones. + */ + if (listen == true) + return; + + write_lock(&local->raw_sockets.lock); + + sk_for_each_safe(sk, tmp, &local->raw_sockets.head) { + llcp_sock = nfc_llcp_sock(sk); + + bh_lock_sock(sk); + + nfc_llcp_socket_purge(llcp_sock); + + sk->sk_state = LLCP_CLOSED; + + sk->sk_state_change(sk); + + bh_unlock_sock(sk); + + sock_orphan(sk); + + sk_del_node_init(sk); + } + + write_unlock(&local->raw_sockets.lock); } struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) -- cgit v1.1 From 3bbc0ceb7ac2bf694d31362eea2c71a680e5deeb Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 17:01:06 +0100 Subject: NFC: llcp: Report error to pending sockets when a device is removed Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 8a35423..b530afa 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -68,7 +68,8 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) } } -static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) +static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, + int err) { struct sock *sk; struct hlist_node *tmp; @@ -100,7 +101,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) nfc_llcp_accept_unlink(accept_sk); + if (err) + accept_sk->sk_err = err; accept_sk->sk_state = LLCP_CLOSED; + accept_sk->sk_state_change(sk); bh_unlock_sock(accept_sk); @@ -123,7 +127,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) continue; } + if (err) + sk->sk_err = err; sk->sk_state = LLCP_CLOSED; + sk->sk_state_change(sk); bh_unlock_sock(sk); @@ -150,8 +157,9 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) nfc_llcp_socket_purge(llcp_sock); + if (err) + sk->sk_err = err; sk->sk_state = LLCP_CLOSED; - sk->sk_state_change(sk); bh_unlock_sock(sk); @@ -173,7 +181,7 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) static void local_cleanup(struct nfc_llcp_local *local, bool listen) { - nfc_llcp_socket_release(local, listen); + nfc_llcp_socket_release(local, listen, ENXIO); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); @@ -1382,7 +1390,7 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev) return; /* Close and purge all existing sockets */ - nfc_llcp_socket_release(local, true); + nfc_llcp_socket_release(local, true, 0); } void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, -- cgit v1.1 From b47506d91259c29b9c75c404737eb6525556f9b4 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Mon, 4 Mar 2013 10:39:49 +0800 Subject: batman-adv: verify tt len does not exceed packet len batadv_iv_ogm_process() accesses the packet using the tt_num_changes attribute regardless of the real packet len (assuming the length check was done before). Therefore a length check is needed to avoid reading random memory. Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a0b253e..a5bb0a76 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1288,7 +1288,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; /* unpack the aggregated packets and process them one by one */ - do { + while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, + batadv_ogm_packet->tt_num_changes)) { tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff, @@ -1299,8 +1300,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, packet_pos = packet_buff + buff_pos; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; - } while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, - batadv_ogm_packet->tt_num_changes)); + } kfree_skb(skb); return NET_RX_SUCCESS; -- cgit v1.1 From 4660c7f498c07c43173142ea95145e9dac5a6d14 Mon Sep 17 00:00:00 2001 From: David Ward Date: Mon, 11 Mar 2013 10:43:39 +0000 Subject: net/ipv4: Ensure that location of timestamp option is stored This is needed in order to detect if the timestamp option appears more than once in a packet, to remove the option if the packet is fragmented, etc. My previous change neglected to store the option location when the router addresses were prespecified and Pointer > Length. But now the option location is also stored when Flag is an unrecognized value, to ensure these option handling behaviors are still performed. Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 310a364..ec72645 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -370,7 +370,6 @@ int ip_options_compile(struct net *net, } switch (optptr[3]&0xF) { case IPOPT_TS_TSONLY: - opt->ts = optptr - iph; if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; @@ -381,7 +380,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; if (rt) { spec_dst_fill(&spec_dst, skb); memcpy(&optptr[optptr[2]-1], &spec_dst, 4); @@ -396,7 +394,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); @@ -429,12 +426,12 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 3; goto error; } - opt->ts = optptr - iph; if (skb) { optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); opt->is_changed = 1; } } + opt->ts = optptr - iph; break; case IPOPT_RA: if (optlen < 4) { -- cgit v1.1 From 3da889b616164bde76a37350cf28e0d17a94e979 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 11 Mar 2013 13:52:17 +0000 Subject: bridge: reserve space for IFLA_BRPORT_FAST_LEAVE The bridge multicast fast leave feature was added sufficient space was not reserved in the netlink message. This means the flag may be lost in netlink events and results of queries. Found by observation while looking up some netlink stuff for discussion with Vlad. Problem introduced by commit c2d3babfafbb9f6629cfb47139758e59a5eb0d80 Author: David S. Miller Date: Wed Dec 5 16:24:45 2012 -0500 bridge: implement multicast fast leave Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 27aa3ee..db12a0f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -29,6 +29,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_MODE */ + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + 0; } -- cgit v1.1 From c80a8512ee3a8e1f7c3704140ea55f21dc6bd651 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 11 Mar 2013 20:30:44 +0000 Subject: net/core: move vlan_depth out of while loop in skb_network_protocol() [ Bug added added in commit 05e8ef4ab2d8087d (net: factor out skb_mac_gso_segment() from skb_gso_segment() ) ] move vlan_depth out of while loop, or else vlan_depth always is ETH_HLEN, can not be increased, and lead to infinite loop when frame has two vlan headers. Signed-off-by: Li RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index dffbef7..d540ced 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2219,9 +2219,9 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; __be16 type = skb->protocol; + int vlan_depth = ETH_HLEN; while (type == htons(ETH_P_8021Q)) { - int vlan_depth = ETH_HLEN; struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) -- cgit v1.1 From f2815633504b442ca0b0605c16bf3d88a3a0fcea Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 12 Mar 2013 15:53:23 +0000 Subject: sctp: Use correct sideffect command in duplicate cookie handling When SCTP is done processing a duplicate cookie chunk, it tries to delete a newly created association. For that, it has to set the right association for the side-effect processing to work. However, when it uses the SCTP_CMD_NEW_ASOC command, that performs more work then really needed (like hashing the associationa and assigning it an id) and there is no point to do that only to delete the association as a next step. In fact, it also creates an impossible condition where an association may be found by the getsockopt() call, and that association is empty. This causes a crash in some sctp getsockopts. The solution is rather simple. We simply use SCTP_CMD_SET_ASOC command that doesn't have all the overhead and does exactly what we need. Reported-by: Karl Heiss Tested-by: Karl Heiss CC: Neil Horman Signed-off-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5131fcf..de1a013 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2082,7 +2082,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, } /* Delete the tempory new association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); /* Restore association pointer to provide SCTP command interpeter -- cgit v1.1 From 2317f449af30073cfa6ec8352e4a65a89e357bdd Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Thu, 7 Mar 2013 21:39:37 +0000 Subject: sctp: don't break the loop while meeting the active_path so as to find the matched transport sctp_assoc_lookup_tsn() function searchs which transport a certain TSN was sent on, if not found in the active_path transport, then go search all the other transports in the peer's transport_addr_list, however, we should continue to the next entry rather than break the loop when meet the active_path transport. Signed-off-by: Xufeng Zhang Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 43cd0dd..d2709e2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1079,7 +1079,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc, transports) { if (transport == active) - break; + continue; list_for_each_entry(chunk, &transport->transmitted, transmitted_list) { if (key == chunk->subh.data_hdr->tsn) { -- cgit v1.1 From 16fad69cfe4adbbfa813de516757b87bcae36d93 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2013 05:40:32 +0000 Subject: tcp: fix skb_availroom() Chrome OS team reported a crash on a Pixel ChromeBook in TCP stack : https://code.google.com/p/chromium/issues/detail?id=182056 commit a21d45726acac (tcp: avoid order-1 allocations on wifi and tx path) did a poor choice adding an 'avail_size' field to skb, while what we really needed was a 'reserved_tailroom' one. It would have avoided commit 22b4a4f22da (tcp: fix retransmit of partially acked frames) and this commit. Crash occurs because skb_split() is not aware of the 'avail_size' management (and should not be aware) Signed-off-by: Eric Dumazet Reported-by: Mukesh Agrawal Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_output.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 47e854f..e220207 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -775,7 +775,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) * Make sure that we have exactly size bytes * available to the caller, no more, no less. */ - skb->avail_size = size; + skb->reserved_tailroom = skb->end - skb->tail - size; return skb; } __kfree_skb(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461..817fbb3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1298,7 +1298,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); - skb->avail_size -= eat; len -= eat; if (!len) return; -- cgit v1.1 From eb20ff9c91ddcb2d55c1849a87d3db85af5e88a9 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 13 Mar 2013 19:46:20 -0300 Subject: Bluetooth: Fix not closing SCO sockets in the BT_CONNECT2 state With deferred setup for SCO, it is possible that userspace closes the socket when it is in the BT_CONNECT2 state, after the Connect Request is received but before the Accept Synchonous Connection is sent. If this happens the following crash was observed, when the connection is terminated: [ +0.000003] hci_sync_conn_complete_evt: hci0 status 0x10 [ +0.000005] sco_connect_cfm: hcon ffff88003d1bd800 bdaddr 40:98:4e:32:d7:39 status 16 [ +0.000003] sco_conn_del: hcon ffff88003d1bd800 conn ffff88003cc8e300, err 110 [ +0.000015] BUG: unable to handle kernel NULL pointer dereference at 0000000000000199 [ +0.000906] IP: [] __lock_acquire+0xed/0xe82 [ +0.000000] PGD 3d21f067 PUD 3d291067 PMD 0 [ +0.000000] Oops: 0002 [#1] SMP [ +0.000000] Modules linked in: rfcomm bnep btusb bluetooth [ +0.000000] CPU 0 [ +0.000000] Pid: 1481, comm: kworker/u:2H Not tainted 3.9.0-rc1-25019-gad82cdd #1 Bochs Bochs [ +0.000000] RIP: 0010:[] [] __lock_acquire+0xed/0xe82 [ +0.000000] RSP: 0018:ffff88003c3c19d8 EFLAGS: 00010002 [ +0.000000] RAX: 0000000000000001 RBX: 0000000000000246 RCX: 0000000000000000 [ +0.000000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003d1be868 [ +0.000000] RBP: ffff88003c3c1a98 R08: 0000000000000002 R09: 0000000000000000 [ +0.000000] R10: ffff88003d1be868 R11: ffff88003e20b000 R12: 0000000000000002 [ +0.000000] R13: ffff88003aaa8000 R14: 000000000000006e R15: ffff88003d1be850 [ +0.000000] FS: 0000000000000000(0000) GS:ffff88003e200000(0000) knlGS:0000000000000000 [ +0.000000] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ +0.000000] CR2: 0000000000000199 CR3: 000000003c1cb000 CR4: 00000000000006b0 [ +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000000] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ +0.000000] Process kworker/u:2H (pid: 1481, threadinfo ffff88003c3c0000, task ffff88003aaa8000) [ +0.000000] Stack: [ +0.000000] ffffffff81b16342 0000000000000000 0000000000000000 ffff88003d1be868 [ +0.000000] ffffffff00000000 00018c0c7863e367 000000003c3c1a28 ffffffff8101efbd [ +0.000000] 0000000000000000 ffff88003e3d2400 ffff88003c3c1a38 ffffffff81007c7a [ +0.000000] Call Trace: [ +0.000000] [] ? kvm_clock_read+0x34/0x3b [ +0.000000] [] ? paravirt_sched_clock+0x9/0xd [ +0.000000] [] ? sched_clock+0x9/0xb [ +0.000000] [] ? sched_clock_local+0x12/0x75 [ +0.000000] [] lock_acquire+0x93/0xb1 [ +0.000000] [] ? spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] ? lock_release_holdtime.part.22+0x4e/0x55 [ +0.000000] [] _raw_spin_lock+0x40/0x74 [ +0.000000] [] ? spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] ? _raw_spin_unlock+0x23/0x36 [ +0.000000] [] spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] sco_conn_del+0x76/0xbb [bluetooth] [ +0.000000] [] sco_connect_cfm+0x2da/0x2e9 [bluetooth] [ +0.000000] [] hci_proto_connect_cfm+0x38/0x65 [bluetooth] [ +0.000000] [] hci_sync_conn_complete_evt.isra.79+0x11a/0x13e [bluetooth] [ +0.000000] [] hci_event_packet+0x153b/0x239d [bluetooth] [ +0.000000] [] ? _raw_spin_unlock_irqrestore+0x48/0x5c [ +0.000000] [] hci_rx_work+0xf3/0x2e3 [bluetooth] [ +0.000000] [] process_one_work+0x1dc/0x30b [ +0.000000] [] ? process_one_work+0x172/0x30b [ +0.000000] [] ? spin_lock_irq+0x9/0xb [ +0.000000] [] worker_thread+0x123/0x1d2 [ +0.000000] [] ? manage_workers+0x240/0x240 [ +0.000000] [] kthread+0x9d/0xa5 [ +0.000000] [] ? __kthread_parkme+0x60/0x60 [ +0.000000] [] ret_from_fork+0x7c/0xb0 [ +0.000000] [] ? __kthread_parkme+0x60/0x60 [ +0.000000] Code: d7 44 89 8d 50 ff ff ff 4c 89 95 58 ff ff ff e8 44 fc ff ff 44 8b 8d 50 ff ff ff 48 85 c0 4c 8b 95 58 ff ff ff 0f 84 7a 04 00 00 ff 80 98 01 00 00 83 3d 25 41 a7 00 00 45 8b b5 e8 05 00 00 [ +0.000000] RIP [] __lock_acquire+0xed/0xe82 [ +0.000000] RSP [ +0.000000] CR2: 0000000000000199 [ +0.000000] ---[ end trace e73cd3b52352dd34 ]--- Cc: stable@vger.kernel.org [3.8] Signed-off-by: Vinicius Costa Gomes Tested-by: Frederic Dalleau Signed-off-by: Gustavo Padovan --- net/bluetooth/sco.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 57f250c..aaf1957 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -361,6 +361,7 @@ static void __sco_sock_close(struct sock *sk) sco_chan_del(sk, ECONNRESET); break; + case BT_CONNECT2: case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); -- cgit v1.1 From 0d98da5d845e0d0293055913ce65c9904b3b902a Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 7 Mar 2013 17:20:46 +0000 Subject: netfilter: nf_conntrack: register pernet subsystem before register L4 proto In (c296bb4 netfilter: nf_conntrack: refactor l4proto support for netns) the l4proto gre/dccp/udplite/sctp registration happened before the pernet subsystem, which is wrong. Register pernet subsystem before register L4proto since after register L4proto, init_conntrack may try to access the resources which allocated in register_pernet_subsys. Reported-by: Alexey Dobriyan Cc: Alexey Dobriyan Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_gre.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_sctp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_udplite.c | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 432f957..ba65b20 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -969,6 +969,10 @@ static int __init nf_conntrack_proto_dccp_init(void) { int ret; + ret = register_pernet_subsys(&dccp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&dccp_proto4); if (ret < 0) goto out_dccp4; @@ -977,16 +981,12 @@ static int __init nf_conntrack_proto_dccp_init(void) if (ret < 0) goto out_dccp6; - ret = register_pernet_subsys(&dccp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&dccp_proto6); out_dccp6: nf_ct_l4proto_unregister(&dccp_proto4); out_dccp4: + unregister_pernet_subsys(&dccp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index bd7d01d..155ce9f 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -420,18 +420,18 @@ static int __init nf_ct_proto_gre_init(void) { int ret; - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); - if (ret < 0) - goto out_gre4; - ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + if (ret < 0) + goto out_gre4; + return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); out_gre4: + unregister_pernet_subsys(&proto_gre_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 480f616..ec83536 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -888,6 +888,10 @@ static int __init nf_conntrack_proto_sctp_init(void) { int ret; + ret = register_pernet_subsys(&sctp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); if (ret < 0) goto out_sctp4; @@ -896,16 +900,12 @@ static int __init nf_conntrack_proto_sctp_init(void) if (ret < 0) goto out_sctp6; - ret = register_pernet_subsys(&sctp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); out_sctp6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); out_sctp4: + unregister_pernet_subsys(&sctp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 1574895..ca969f6 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -371,6 +371,10 @@ static int __init nf_conntrack_proto_udplite_init(void) { int ret; + ret = register_pernet_subsys(&udplite_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); if (ret < 0) goto out_udplite4; @@ -379,16 +383,12 @@ static int __init nf_conntrack_proto_udplite_init(void) if (ret < 0) goto out_udplite6; - ret = register_pernet_subsys(&udplite_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); out_udplite6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); out_udplite4: + unregister_pernet_subsys(&udplite_net_ops); +out_pernet: return ret; } -- cgit v1.1 From bae99f7a1d372374aaf9ed8910f3b825da995b36 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2013 06:03:18 +0000 Subject: netfilter: nfnetlink_queue: fix incorrect initialization of copy range field 2^16 = 0xffff, not 0xfffff (note the extra 'f'). Not dangerous since you adjust it to min_t(data_len, skb->len) just after on. Reported-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 858fd52..1cb4854 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -112,7 +112,7 @@ instance_create(u_int16_t queue_num, int portid) inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xfffff; + inst->copy_range = 0xffff; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); -- cgit v1.1 From a82783c91d5dce680dbd290ebf301a520b0e72a5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Mar 2013 20:11:01 +0000 Subject: netfilter: ip6t_NPT: restrict to mangle table As the translation is stateless, using it in nat table doesn't work (only initial packet is translated). filter table OUTPUT works but won't re-route the packet after translation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_NPT.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 83acc14..33608c6 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -114,6 +114,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", + .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, @@ -124,6 +125,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { }, { .name = "DNPT", + .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, -- cgit v1.1 From 8c6216d7f118a128678270824b6a1286a63863ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Wed, 13 Mar 2013 02:37:49 +0000 Subject: Revert "ip_gre: make ipgre_tunnel_xmit() not parse network header as IP unconditionally" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 412ed94744d16806fbec3bd250fd94e71cde5a1f. The commit is wrong as tiph points to the outer IPv4 header which is installed at ipgre_header() and not the inner one which is protocol dependant. This commit broke succesfully opennhrp which use PF_PACKET socket with ETH_P_NHRP protocol. Additionally ssl_addr is set to the link-layer IPv4 address. This address is written by ipgre_header() to the skb earlier, and this is the IPv4 header tiph should point to - regardless of the inner protocol payload. Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d0ef0e6..91d66db 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -798,10 +798,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; - if (skb->protocol == htons(ETH_P_IP)) - tiph = (const struct iphdr *)skb->data; - else - tiph = &tunnel->parms.iph; + tiph = (const struct iphdr *)skb->data; } else { gre_hlen = tunnel->hlen; tiph = &tunnel->parms.iph; -- cgit v1.1 From a5b8db91442fce9c9713fcd656c3698f1adde1d6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Mar 2013 04:18:58 +0000 Subject: rtnetlink: Mask the rta_type when range checking Range/validity checks on rta_type in rtnetlink_rcv_msg() do not account for flags that may be set. This causes the function to return -EINVAL when flags are set on the type (for example NLA_F_NESTED). Signed-off-by: Vlad Yasevich Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a585d45..5fb8d7e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2621,7 +2621,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); while (RTA_OK(attr, attrlen)) { - unsigned int flavor = attr->rta_type; + unsigned int flavor = attr->rta_type & NLA_TYPE_MASK; if (flavor) { if (flavor > rta_max[sz_idx]) return -EINVAL; -- cgit v1.1 From 3d84fa98aca7f05f7010022bc45acb1b50326332 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 15 Mar 2013 06:39:12 +0000 Subject: bridge: Add support for setting BR_ROOT_BLOCK flag. Most of the support was already there. The only thing that was missing was the call to set the flag. Add this call. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index db12a0f..299fc5f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -330,6 +330,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); -- cgit v1.1 From 92f28d973cce45ef5823209aab3138eb45d8b349 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 15 Mar 2013 01:03:33 -0700 Subject: scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids. Don't allow spoofing pids over unix domain sockets in the corner cases where a user has created a user namespace but has not yet created a pid namespace. Cc: stable@vger.kernel.org Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- net/core/scm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 905dcc6..2dc6cda 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || -- cgit v1.1 From 0d4f0608619de59fd8169dd8e72aadc28d80e715 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Mar 2013 07:01:28 +0000 Subject: tcp: dont handle MTU reduction on LISTEN socket When an ICMP ICMP_FRAG_NEEDED (or ICMPV6_PKT_TOOBIG) message finds a LISTEN socket, and this socket is currently owned by the user, we set TCP_MTU_REDUCED_DEFERRED flag in listener tsq_flags. This is bad because if we clone the parent before it had a chance to clear the flag, the child inherits the tsq_flags value, and next tcp_release_cb() on the child will decrement sk_refcnt. Result is that we might free a live TCP socket, as reported by Dormando. IPv4: Attempt to release TCP socket in state 1 Fix this issue by testing sk_state against TCP_LISTEN early, so that we set TCP_MTU_REDUCED_DEFERRED on appropriate sockets (not a LISTEN one) This bug was introduced in commit 563d34d05786 (tcp: dont drop MTU reduction indications) Reported-by: dormando Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 14 +++++++------- net/ipv6/tcp_ipv6.c | 7 +++++++ 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a8ec45..d09203c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 mtu = tcp_sk(sk)->mtu_info; - /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs - * send out by Linux are always <576bytes so they should go through - * unfragmented). - */ - if (sk->sk_state == TCP_LISTEN) - return; - dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -408,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = info; if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9b64600..f6d629f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -389,6 +389,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (type == ICMPV6_PKT_TOOBIG) { + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); -- cgit v1.1 From cf2e39429c245245db889fffdfbdf3f889a6cb22 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:06 +0200 Subject: ipvs: fix sctp chunk length order Fix wrong but non-fatal access to chunk length. sch->length should be in network order, next chunk should be aligned to 4 bytes. Problem noticed in sparse output. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index ae8ec6f..cd1d729 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -906,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; - int ihl; + int ihl, cofs; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -914,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ihl = ip_hdrlen(skb); #endif - sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), - sizeof(_sctpch), &_sctpch); + cofs = ihl + sizeof(sctp_sctphdr_t); + sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; @@ -933,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, */ if ((sch->type == SCTP_CID_COOKIE_ECHO) || (sch->type == SCTP_CID_COOKIE_ACK)) { - sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) + - sch->length), sizeof(_sctpch), &_sctpch); - if (sch) { - if (sch->type == SCTP_CID_ABORT) + int clen = ntohs(sch->length); + + if (clen >= sizeof(sctp_chunkhdr_t)) { + sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), + sizeof(_sctpch), &_sctpch); + if (sch && sch->type == SCTP_CID_ABORT) chunk_type = sch->type; } } -- cgit v1.1 From 0c12582fbcdea0cbb0dfd224e1c5f9a8428ffa18 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:04 +0200 Subject: ipvs: add backup_only flag to avoid loops Dmitry Akindinov is reporting for a problem where SYNs are looping between the master and backup server when the backup server is used as real server in DR mode and has IPVS rules to function as director. Even when the backup function is enabled we continue to forward traffic and schedule new connections when the current master is using the backup server as real server. While this is not a problem for NAT, for DR and TUN method the backup server can not determine if a request comes from client or from director. To avoid such loops add new sysctl flag backup_only. It can be needed for DR/TUN setups that do not need backup and director function at the same time. When the backup function is enabled we stop any forwarding and pass the traffic to the local stack (real server mode). The flag disables the director function when the backup function is enabled. For setups that enable backup function for some virtual services and director function for other virtual services there should be another more complex solution to support DR/TUN mode, may be to assign per-virtual service syncid value, so that we can differentiate the requests. Reported-by: Dmitry Akindinov Tested-by: German Myzovsky Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 12 ++++++++---- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++++++ 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 47edf5a..18b4bc5 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1577,7 +1577,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, &iph); @@ -1654,7 +1655,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1815,13 +1815,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp(skb, &r, hooknum); @@ -1835,6 +1837,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; struct ip_vs_iphdr iphdr; ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); @@ -1843,7 +1846,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c68198b..9e2d1cc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1808,6 +1808,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "backup_only", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3741,6 +3747,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; ipvs->sysctl_pmtu_disc = 1; tbl[idx++].data = &ipvs->sysctl_pmtu_disc; + tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); -- cgit v1.1 From bf93ad72cd8cfabe66a7b3d66236a1266d357189 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:05 +0200 Subject: ipvs: remove extra rcu lock In 3.7 we added code that uses ipv4_update_pmtu but after commit c5ae7d4192 (ipv4: must use rcu protection while calling fib_lookup) the RCU lock is not needed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 18b4bc5..61f49d2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1394,10 +1394,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); - rcu_read_lock(); ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); - rcu_read_unlock(); /* Client uses PMTUD? */ if (!(cih->frag_off & htons(IP_DF))) goto ignore_ipip; -- cgit v1.1 From 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 15 Mar 2013 11:32:30 +0000 Subject: inet: limit length of fragment queue hash table bucket lists This patch introduces a constant limit of the fragment queue hash table bucket list lengths. Currently the limit 128 is choosen somewhat arbitrary and just ensures that we can fill up the fragment cache with empty packets up to the default ip_frag_high_thresh limits. It should just protect from list iteration eating considerable amounts of cpu. If we reach the maximum length in one hash bucket a warning is printed. This is implemented on the caller side of inet_frag_find to distinguish between the different users of inet_fragment.c. I dropped the out of memory warning in the ipv4 fragment lookup path, because we already get a warning by the slab allocator. Cc: Eric Dumazet Cc: Jesper Dangaard Brouer Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 20 +++++++++++++++++++- net/ipv4/ip_fragment.c | 11 ++++------- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++------ net/ipv6/reassembly.c | 8 ++++++-- 4 files changed, 35 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 245ae078a..f4fd23d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -21,6 +21,7 @@ #include #include +#include #include static void inet_frag_secret_rebuild(unsigned long dummy) @@ -277,6 +278,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, __releases(&f->lock) { struct inet_frag_queue *q; + int depth = 0; hlist_for_each_entry(q, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { @@ -284,9 +286,25 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, read_unlock(&f->lock); return q; } + depth++; } read_unlock(&f->lock); - return inet_frag_create(nf, f, key); + if (depth <= INETFRAGS_MAXDEPTH) + return inet_frag_create(nf, f, key); + else + return ERR_PTR(-ENOBUFS); } EXPORT_SYMBOL(inet_frag_find); + +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, + const char *prefix) +{ + static const char msg[] = "inet_frag_find: Fragment hash bucket" + " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) + ". Dropping fragment.\n"; + + if (PTR_ERR(q) == -ENOBUFS) + LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); +} +EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b6d30ac..a6445b8 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -292,14 +292,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); - if (q == NULL) - goto out_nomem; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct ipq, q); - -out_nomem: - LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n")); - return NULL; } /* Is the fragment too far ahead to be part of ipq? */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 54087e9..6700069 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -14,6 +14,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv6-nf: " fmt + #include #include #include @@ -180,13 +182,11 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); - if (q == NULL) - goto oom; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct frag_queue, q); - -oom: - return NULL; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3c6a772..196ab93 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -26,6 +26,9 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ + +#define pr_fmt(fmt) "IPv6: " fmt + #include #include #include @@ -185,9 +188,10 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); - if (q == NULL) + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; - + } return container_of(q, struct frag_queue, q); } -- cgit v1.1 From 3dd6664fac7e6041bfc8756ae9e8c78f59108cd9 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 19 Mar 2013 13:09:59 +0000 Subject: netfilter: remove unused "config IP_NF_QUEUE" Kconfig symbol IP_NF_QUEUE is unused since commit d16cf20e2f2f13411eece7f7fb72c17d141c4a84 ("netfilter: remove ip_queue support"). Let's remove it too. Signed-off-by: Paul Bolle Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ce2d43e..0d755c5 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config IP_NF_QUEUE - tristate "IP Userspace queueing via NETLINK (OBSOLETE)" - depends on NETFILTER_ADVANCED - help - Netfilter has the ability to queue packets to user space: the - netlink device can be used to access them using this driver. - - This option enables the old IPv4-only "ip_queue" implementation - which has been obsoleted by the new "nfnetlink_queue" code (see - CONFIG_NETFILTER_NETLINK_QUEUE). - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n -- cgit v1.1 From f1e79e208076ffe7bad97158275f1c572c04f5c7 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Tue, 19 Mar 2013 01:47:27 +0000 Subject: genetlink: trigger BUG_ON if a group name is too long Trigger BUG_ON if a group name is longer than GENL_NAMSIZ. Signed-off-by: Masatake YAMATO Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f2aabb6..5a55be3 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -142,6 +142,7 @@ int genl_register_mc_group(struct genl_family *family, int err = 0; BUG_ON(grp->name[0] == '\0'); + BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); genl_lock(); -- cgit v1.1 From 44046a593eb770dbecdabf1c82bcd252f2a8337b Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:12 +0000 Subject: udp: add encap_destroy callback Users of udp encapsulation currently have an encap_rcv callback which they can use to hook into the udp receive path. In situations where a encapsulation user allocates resources associated with a udp encap socket, it may be convenient to be able to also hook the proto .destroy operation. For example, if an encap user holds a reference to the udp socket, the destroy hook might be used to relinquish this reference. This patch adds a socket destroy hook into udp, which is set and enabled in the same way as the existing encap_rcv hook. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 +++++++ net/ipv6/udp.c | 8 ++++++++ 2 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 265c42c..0a073a2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1762,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); + if (static_key_false(&udp_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } } /* diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 599e1ba6..d8e5e85 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1285,10 +1285,18 @@ do_confirm: void udpv6_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + inet6_destroy_sock(sk); } -- cgit v1.1 From 9980d001cec86c3c75f3a6008ddb73c397ea3b3e Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:13 +0000 Subject: l2tp: add udp encap socket destroy handler L2TP sessions hold a reference to the tunnel socket to prevent it going away while sessions are still active. However, since tunnel destruction is handled by the sock sk_destruct callback there is a catch-22: a tunnel with sessions cannot be deleted since each session holds a reference to the tunnel socket. If userspace closes a managed tunnel socket, or dies, the tunnel will persist and it will be neccessary to individually delete the sessions using netlink commands. This is ugly. To prevent this occuring, this patch leverages the udp encapsulation socket destroy callback to gain early notification when the tunnel socket is closed. This allows us to safely close the sessions running in the tunnel, dropping the tunnel socket references in the process. The tunnel socket is then destroyed as normal, and the tunnel resources deallocated in sk_destruct. While we're at it, ensure that l2tp_tunnel_closeall correctly drops session references to allow the sessions to be deleted rather than leaking. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d36875f..ee726a7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1282,6 +1282,7 @@ static void l2tp_tunnel_destruct(struct sock *sk) /* No longer an encapsulation socket. See net/ipv4/udp.c */ (udp_sk(sk))->encap_type = 0; (udp_sk(sk))->encap_rcv = NULL; + (udp_sk(sk))->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; @@ -1360,6 +1361,8 @@ again: if (session->deref != NULL) (*session->deref)(session); + l2tp_session_dec_refcount(session); + write_lock_bh(&tunnel->hlist_lock); /* Now restart from the beginning of this hash @@ -1373,6 +1376,16 @@ again: write_unlock_bh(&tunnel->hlist_lock); } +/* Tunnel socket destroy hook for UDP encapsulation */ +static void l2tp_udp_encap_destroy(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } +} + /* Really kill the tunnel. * Come here only when all sessions have been cleared from the tunnel. */ @@ -1668,6 +1681,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP; udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; + udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) udpv6_encap_enable(); -- cgit v1.1 From e34f4c7050e5471b6d4fb25380713937fc837514 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:14 +0000 Subject: l2tp: export l2tp_tunnel_closeall l2tp_core internally uses l2tp_tunnel_closeall to close all sessions in a tunnel when a UDP-encapsulation socket is destroyed. We need to do something similar for IP-encapsulation sockets. Export l2tp_tunnel_closeall as a GPL symbol to enable l2tp_ip and l2tp_ip6 to call it from their .destroy handlers. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- net/l2tp/l2tp_core.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ee726a7..287e327 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -114,7 +114,6 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); static inline struct l2tp_net *l2tp_pernet(struct net *net) { @@ -1312,7 +1311,7 @@ end: /* When the tunnel is closed, all the attached sessions need to go too. */ -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { int hash; struct hlist_node *walk; @@ -1375,6 +1374,7 @@ again: } write_unlock_bh(&tunnel->hlist_lock); } +EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8eb8f1d..b0861f6 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -240,6 +240,7 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); +extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); extern int l2tp_session_delete(struct l2tp_session *session); -- cgit v1.1 From 936063175afd895913a5e9db77e1a0ef43ea44ea Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:15 +0000 Subject: l2tp: close sessions in ip socket destroy callback l2tp_core hooks UDP's .destroy handler to gain advance warning of a tunnel socket being closed from userspace. We need to do the same thing for IP-encapsulation sockets. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 6 ++++++ net/l2tp/l2tp_ip6.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 7f41b70..571db8d 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { struct sk_buff *skb; + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + sk_refcnt_debug_dec(sk); } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 41f2f81..c74f5a9 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -241,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + inet6_destroy_sock(sk); } -- cgit v1.1 From 2b551c6e7d5bca2c78c216b15ef675653d4f459a Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:16 +0000 Subject: l2tp: close sessions before initiating tunnel delete When a user deletes a tunnel using netlink, all the sessions in the tunnel should also be deleted. Since running sessions will pin the tunnel socket with the references they hold, have the l2tp_tunnel_delete close all sessions in a tunnel before finally closing the tunnel socket. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 287e327..0dd50c0 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1737,6 +1737,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_closeall(tunnel); return (false == queue_work(l2tp_wq, &tunnel->del_work)); } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); -- cgit v1.1 From 8abbbe8ff572fd84d1b98eb9acf30611a97cf72e Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:17 +0000 Subject: l2tp: take a reference for kernel sockets in l2tp_tunnel_sock_lookup When looking up the tunnel socket in struct l2tp_tunnel, hold a reference whether the socket was created by the kernel or by userspace. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 0dd50c0..45373fe 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -191,6 +191,7 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) } else { /* Socket is owned by kernelspace */ sk = tunnel->sock; + sock_hold(sk); } out: @@ -209,6 +210,7 @@ void l2tp_tunnel_sock_put(struct sock *sk) } sock_put(sk); } + sock_put(sk); } EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); -- cgit v1.1 From 02d13ed5f94af38c37d1abd53462fe48d78bcc9d Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:18 +0000 Subject: l2tp: don't BUG_ON sk_socket being NULL It is valid for an existing struct sock object to have a NULL sk_socket pointer, so don't BUG_ON in l2tp_tunnel_del_work if that should occur. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 45373fe..e841ef2 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1412,19 +1412,21 @@ static void l2tp_tunnel_del_work(struct work_struct *work) return; sock = sk->sk_socket; - BUG_ON(!sock); - /* If the tunnel socket was created directly by the kernel, use the - * sk_* API to release the socket now. Otherwise go through the - * inet_* layer to shut the socket down, and let userspace close it. + /* If the tunnel socket was created by userspace, then go through the + * inet layer to shut the socket down, and let userspace close it. + * Otherwise, if we created the socket directly within the kernel, use + * the sk API to release it here. * In either case the tunnel resources are freed in the socket * destructor when the tunnel socket goes away. */ - if (sock->file == NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + if (tunnel->fd >= 0) { + if (sock) + inet_shutdown(sock, 2); } else { - inet_shutdown(sock, 2); + if (sock) + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sk); } l2tp_tunnel_sock_put(sk); -- cgit v1.1 From 48f72f92b31431c40279b0fba6c5588e07e67d95 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:19 +0000 Subject: l2tp: add session reorder queue purge function to core If an l2tp session is deleted, it is necessary to delete skbs in-flight on the session's reorder queue before taking it down. Rather than having each pseudowire implementation reaching into the l2tp_session struct to handle this itself, provide a function in l2tp_core to purge the session queue. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 17 +++++++++++++++++ net/l2tp/l2tp_core.h | 1 + 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e841ef2..69c316d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -829,6 +829,23 @@ discard: } EXPORT_SYMBOL(l2tp_recv_common); +/* Drop skbs from the session's reorder_q + */ +int l2tp_session_queue_purge(struct l2tp_session *session) +{ + struct sk_buff *skb = NULL; + BUG_ON(!session); + BUG_ON(session->magic != L2TP_SESSION_MAGIC); + while ((skb = skb_dequeue(&session->reorder_q))) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + if (session->deref) + (*session->deref)(session); + } + return 0; +} +EXPORT_SYMBOL_GPL(l2tp_session_queue_purge); + /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame * here. The skb is not on a list when we get here. * Returns 0 if the packet was a data packet and was successfully passed on. diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index b0861f6..d40713d 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -246,6 +246,7 @@ extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunne extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); +extern int l2tp_session_queue_purge(struct l2tp_session *session); extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); -- cgit v1.1 From 4c6e2fd35460208596fa099ee0750a4b0438aa5c Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:20 +0000 Subject: l2tp: purge session reorder queue on delete Add calls to l2tp_session_queue_purge as a part of l2tp_tunnel_closeall and l2tp_session_delete. Pseudowire implementations which are deleted only via. l2tp_core l2tp_session_delete calls can dispense with their own code for flushing the reorder queue. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 69c316d..c00f31b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1373,6 +1373,8 @@ again: synchronize_rcu(); } + l2tp_session_queue_purge(session); + if (session->session_close != NULL) (*session->session_close)(session); @@ -1813,6 +1815,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_free); */ int l2tp_session_delete(struct l2tp_session *session) { + l2tp_session_queue_purge(session); + if (session->session_close != NULL) (*session->session_close)(session); -- cgit v1.1 From cf2f5c886a209377daefd5d2ba0bcd49c3887813 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:21 +0000 Subject: l2tp: push all ppp pseudowire shutdown through .release handler If userspace deletes a ppp pseudowire using the netlink API, either by directly deleting the session or by deleting the tunnel that contains the session, we need to tear down the corresponding pppox channel. Rather than trying to manage two pppox unbind codepaths, switch the netlink and l2tp_core session_close handlers to close via. the l2tp_ppp socket .release handler. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 53 ++++++++++------------------------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6a53371..7e3e16a 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -447,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session) { struct pppol2tp_session *ps = l2tp_session_priv(session); struct sock *sk = ps->sock; - struct sk_buff *skb; + struct socket *sock = sk->sk_socket; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (session->session_id == 0) - goto out; - - if (sk != NULL) { - lock_sock(sk); - - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { - pppox_unbind_sock(sk); - sk->sk_state = PPPOX_DEAD; - sk->sk_state_change(sk); - } - - /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } - release_sock(sk); + if (sock) { + inet_shutdown(sock, 2); + /* Don't let the session go away before our socket does */ + l2tp_session_inc_refcount(session); } - -out: return; } @@ -525,16 +508,12 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); if (session != NULL) { - struct sk_buff *skb; - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } + l2tp_session_queue_purge(session); sock_put(sk); } + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); release_sock(sk); @@ -880,18 +859,6 @@ out: return error; } -/* Called when deleting sessions via the netlink interface. - */ -static int pppol2tp_session_delete(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock == NULL) - l2tp_session_dec_refcount(session); - - return 0; -} - #endif /* CONFIG_L2TP_V3 */ /* getname() support. @@ -1839,7 +1806,7 @@ static const struct pppox_proto pppol2tp_proto = { static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = { .session_create = pppol2tp_session_create, - .session_delete = pppol2tp_session_delete, + .session_delete = l2tp_session_delete, }; #endif /* CONFIG_L2TP_V3 */ -- cgit v1.1 From 7b7c0719cd7afee725b920d75ec6a500b76107e6 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:22 +0000 Subject: l2tp: avoid deadlock in l2tp stats update l2tp's u64_stats writers were incorrectly synchronised, making it possible to deadlock a 64bit machine running a 32bit kernel simply by sending the l2tp code netlink commands while passing data through l2tp sessions. Previous discussion on netdev determined that alternative solutions such as spinlock writer synchronisation or per-cpu data would bring unjustified overhead, given that most users interested in high volume traffic will likely be running 64bit kernels on 64bit hardware. As such, this patch replaces l2tp's use of u64_stats with atomic_long_t, thereby avoiding the deadlock. Ref: http://marc.info/?l=linux-netdev&m=134029167910731&w=2 http://marc.info/?l=linux-netdev&m=134079868111131&w=2 Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 75 +++++++++++++------------------------------------ net/l2tp/l2tp_core.h | 19 ++++++------- net/l2tp/l2tp_debugfs.c | 28 +++++++++--------- net/l2tp/l2tp_netlink.c | 72 ++++++++++++++++++----------------------------- net/l2tp/l2tp_ppp.c | 46 +++++++++++++++--------------- 5 files changed, 93 insertions(+), 147 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c00f31b..97d30ac 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -374,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk struct sk_buff *skbp; struct sk_buff *tmp; u32 ns = L2TP_SKB_CB(skb)->ns; - struct l2tp_stats *sstats; spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (L2TP_SKB_CB(skbp)->ns > ns) { __skb_queue_before(&session->reorder_q, skbp, skb); @@ -385,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n", session->name, ns, L2TP_SKB_CB(skbp)->ns, skb_queue_len(&session->reorder_q)); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_oos_packets++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_oos_packets); goto out; } } @@ -404,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * { struct l2tp_tunnel *tunnel = session->tunnel; int length = L2TP_SKB_CB(skb)->length; - struct l2tp_stats *tstats, *sstats; /* We're about to requeue the skb, so return resources * to its current owner (a socket receive buffer). */ skb_orphan(skb); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += length; - sstats->rx_packets++; - sstats->rx_bytes += length; - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&tunnel->stats.rx_packets); + atomic_long_add(length, &tunnel->stats.rx_bytes); + atomic_long_inc(&session->stats.rx_packets); + atomic_long_add(length, &session->stats.rx_bytes); if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ @@ -451,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) { struct sk_buff *skb; struct sk_buff *tmp; - struct l2tp_stats *sstats; /* If the pkt at the head of the queue has the nr that we * expect to send up next, dequeue it and any other @@ -459,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) */ start: spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skb, tmp) { if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); + atomic_long_inc(&session->stats.rx_errors); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -624,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; int offset; u32 ns, nr; - struct l2tp_stats *sstats = &session->stats; /* The ref count is increased since we now hold a pointer to * the session. Take care to decrement the refcnt when exiting @@ -641,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: cookie mismatch (%u/%u). Discarding.\n", tunnel->name, tunnel->tunnel_id, session->session_id); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_cookie_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } ptr += session->peer_cookie_len; @@ -712,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -733,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } } @@ -789,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * packets */ if (L2TP_SKB_CB(skb)->ns != session->nr) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -817,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, return; discard: - u64_stats_update_begin(&sstats->syncp); - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); if (session->deref) @@ -861,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, u32 tunnel_id, session_id; u16 version; int length; - struct l2tp_stats *tstats; if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) goto discard_bad_csum; @@ -950,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, discard_bad_csum: LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - tstats->rx_errors++; - u64_stats_update_end(&tstats->syncp); + atomic_long_inc(&tunnel->stats.rx_errors); kfree_skb(skb); return 0; @@ -1080,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; unsigned int len = skb->len; int error; - struct l2tp_stats *tstats, *sstats; /* Debug */ if (session->send_seq) @@ -1109,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, error = ip_queue_xmit(skb, fl); /* Update stats */ - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); if (error >= 0) { - tstats->tx_packets++; - tstats->tx_bytes += len; - sstats->tx_packets++; - sstats->tx_bytes += len; + atomic_long_inc(&tunnel->stats.tx_packets); + atomic_long_add(len, &tunnel->stats.tx_bytes); + atomic_long_inc(&session->stats.tx_packets); + atomic_long_add(len, &session->stats.tx_bytes); } else { - tstats->tx_errors++; - sstats->tx_errors++; + atomic_long_inc(&tunnel->stats.tx_errors); + atomic_long_inc(&session->stats.tx_errors); } - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); return 0; } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index d40713d..519b013 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -36,16 +36,15 @@ enum { struct sk_buff; struct l2tp_stats { - u64 tx_packets; - u64 tx_bytes; - u64 tx_errors; - u64 rx_packets; - u64 rx_bytes; - u64 rx_seq_discards; - u64 rx_oos_packets; - u64 rx_errors; - u64 rx_cookie_discards; - struct u64_stats_sync syncp; + atomic_long_t tx_packets; + atomic_long_t tx_bytes; + atomic_long_t tx_errors; + atomic_long_t rx_packets; + atomic_long_t rx_bytes; + atomic_long_t rx_seq_discards; + atomic_long_t rx_oos_packets; + atomic_long_t rx_errors; + atomic_long_t rx_cookie_discards; }; struct l2tp_tunnel; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index c3813bc..072d720 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, atomic_read(&tunnel->ref_count)); - seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); if (tunnel->show != NULL) tunnel->show(m, tunnel); @@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) seq_printf(m, "\n"); } - seq_printf(m, " %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (session->show != NULL) session->show(m, session); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index c1bab22..0825ff2 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -246,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np = NULL; #endif - struct l2tp_stats stats; - unsigned int start; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); @@ -265,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&tunnel->stats.syncp); - stats.tx_packets = tunnel->stats.tx_packets; - stats.tx_bytes = tunnel->stats.tx_bytes; - stats.tx_errors = tunnel->stats.tx_errors; - stats.rx_packets = tunnel->stats.rx_packets; - stats.rx_bytes = tunnel->stats.rx_bytes; - stats.rx_errors = tunnel->stats.rx_errors; - stats.rx_seq_discards = tunnel->stats.rx_seq_discards; - stats.rx_oos_packets = tunnel->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&tunnel->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&tunnel->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&tunnel->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&tunnel->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&tunnel->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&tunnel->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&tunnel->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&tunnel->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&tunnel->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); @@ -612,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl struct nlattr *nest; struct l2tp_tunnel *tunnel = session->tunnel; struct sock *sk = NULL; - struct l2tp_stats stats; - unsigned int start; sk = tunnel->sock; @@ -656,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&session->stats.syncp); - stats.tx_packets = session->stats.tx_packets; - stats.tx_bytes = session->stats.tx_bytes; - stats.tx_errors = session->stats.tx_errors; - stats.rx_packets = session->stats.rx_packets; - stats.rx_bytes = session->stats.rx_bytes; - stats.rx_errors = session->stats.rx_errors; - stats.rx_seq_discards = session->stats.rx_seq_discards; - stats.rx_oos_packets = session->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&session->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&session->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&session->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&session->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&session->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&session->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&session->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&session->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&session->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7e3e16a..9d0eb8c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -260,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int session->name); /* Not bound. Nothing we can do, so discard. */ - session->stats.rx_errors++; + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } @@ -992,14 +992,14 @@ end: static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest, struct l2tp_stats *stats) { - dest->tx_packets = stats->tx_packets; - dest->tx_bytes = stats->tx_bytes; - dest->tx_errors = stats->tx_errors; - dest->rx_packets = stats->rx_packets; - dest->rx_bytes = stats->rx_bytes; - dest->rx_seq_discards = stats->rx_seq_discards; - dest->rx_oos_packets = stats->rx_oos_packets; - dest->rx_errors = stats->rx_errors; + dest->tx_packets = atomic_long_read(&stats->tx_packets); + dest->tx_bytes = atomic_long_read(&stats->tx_bytes); + dest->tx_errors = atomic_long_read(&stats->tx_errors); + dest->rx_packets = atomic_long_read(&stats->rx_packets); + dest->rx_bytes = atomic_long_read(&stats->rx_bytes); + dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards); + dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets); + dest->rx_errors = atomic_long_read(&stats->rx_errors); } /* Session ioctl helper. @@ -1633,14 +1633,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) tunnel->name, (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N', atomic_read(&tunnel->ref_count) - 1); - seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); } static void pppol2tp_seq_session_show(struct seq_file *m, void *v) @@ -1675,14 +1675,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (po) seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); -- cgit v1.1 From f6e16b299bacaa71c6604a784f2d088a966f8c23 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:23 +0000 Subject: l2tp: unhash l2tp sessions on delete, not on free If we postpone unhashing of l2tp sessions until the structure is freed, we risk: 1. further packets arriving and getting queued while the pseudowire is being closed down 2. the recv path hitting "scheduling while atomic" errors in the case that recv drops the last reference to a session and calls l2tp_session_free while in atomic context As such, l2tp sessions should be unhashed from l2tp_core data structures early in the teardown process prior to calling pseudowire close. For pseudowires like l2tp_ppp which have multiple shutdown codepaths, provide an unhash hook. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 75 ++++++++++++++++++++++++---------------------------- net/l2tp/l2tp_core.h | 1 + net/l2tp/l2tp_ppp.c | 12 +++------ 3 files changed, 38 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 97d30ac..8aecf5d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1316,26 +1316,12 @@ again: hlist_del_init(&session->hlist); - /* Since we should hold the sock lock while - * doing any unbinding, we need to release the - * lock we're holding before taking that lock. - * Hold a reference to the sock so it doesn't - * disappear as we're jumping between locks. - */ if (session->ref != NULL) (*session->ref)(session); write_unlock_bh(&tunnel->hlist_lock); - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); - } - + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close != NULL) @@ -1732,64 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); */ void l2tp_session_free(struct l2tp_session *session) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = session->tunnel; BUG_ON(atomic_read(&session->ref_count) != 0); - tunnel = session->tunnel; - if (tunnel != NULL) { + if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); + if (session->session_id != 0) + atomic_dec(&l2tp_session_count); + sock_put(tunnel->sock); + session->tunnel = NULL; + l2tp_tunnel_dec_refcount(tunnel); + } + + kfree(session); + + return; +} +EXPORT_SYMBOL_GPL(l2tp_session_free); + +/* Remove an l2tp session from l2tp_core's hash lists. + * Provides a tidyup interface for pseudowire code which can't just route all + * shutdown via. l2tp_session_delete and a pseudowire-specific session_close + * callback. + */ +void __l2tp_session_unhash(struct l2tp_session *session) +{ + struct l2tp_tunnel *tunnel = session->tunnel; - /* Delete the session from the hash */ + /* Remove the session from core hashes */ + if (tunnel) { + /* Remove from the per-tunnel hash */ write_lock_bh(&tunnel->hlist_lock); hlist_del_init(&session->hlist); write_unlock_bh(&tunnel->hlist_lock); - /* Unlink from the global hash if not L2TPv2 */ + /* For L2TPv3 we have a per-net hash: remove from there, too */ if (tunnel->version != L2TP_HDR_VER_2) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_session_hlist_lock); hlist_del_init_rcu(&session->global_hlist); spin_unlock_bh(&pn->l2tp_session_hlist_lock); synchronize_rcu(); } - - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); - - sock_put(tunnel->sock); - - /* This will delete the tunnel context if this - * is the last session on the tunnel. - */ - session->tunnel = NULL; - l2tp_tunnel_dec_refcount(tunnel); } - - kfree(session); - - return; } -EXPORT_SYMBOL_GPL(l2tp_session_free); +EXPORT_SYMBOL_GPL(__l2tp_session_unhash); /* This function is used by the netlink SESSION_DELETE command and by pseudowire modules. */ int l2tp_session_delete(struct l2tp_session *session) { + if (session->ref) + (*session->ref)(session); + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); - if (session->session_close != NULL) (*session->session_close)(session); - + if (session->deref) + (*session->ref)(session); l2tp_session_dec_refcount(session); - return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); - /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 519b013..485a490 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -242,6 +242,7 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +extern void __l2tp_session_unhash(struct l2tp_session *session); extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 9d0eb8c..637a341 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -466,19 +466,12 @@ static void pppol2tp_session_close(struct l2tp_session *session) */ static void pppol2tp_session_destruct(struct sock *sk) { - struct l2tp_session *session; - - if (sk->sk_user_data != NULL) { - session = sk->sk_user_data; - if (session == NULL) - goto out; - + struct l2tp_session *session = sk->sk_user_data; + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); l2tp_session_dec_refcount(session); } - -out: return; } @@ -509,6 +502,7 @@ static int pppol2tp_release(struct socket *sock) /* Purge any queued data */ if (session != NULL) { + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); sock_put(sk); } -- cgit v1.1 From 8ed781668dd49b608f1e67a22e3b445fd0c2cd6f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Mar 2013 06:39:29 +0000 Subject: flow_keys: include thoff into flow_keys for later usage In skb_flow_dissect(), we perform a dissection of a skbuff. Since we're doing the work here anyway, also store thoff for a later usage, e.g. in the BPF filter. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9d4c720..e187bf0 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -140,6 +140,8 @@ ipv6: flow->ports = *ports; } + flow->thoff = (u16) nhoff; + return true; } EXPORT_SYMBOL(skb_flow_dissect); -- cgit v1.1 From 283951f95b067877ca5ea77afaa212bb1e0507b5 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Tue, 19 Mar 2013 08:19:29 +0000 Subject: ipconfig: Fix newline handling in log message. When using ipconfig the logs currently look like: Single name server: [ 3.467270] IP-Config: Complete: [ 3.470613] device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1 [ 3.480670] host=infigo-1, domain=, nis-domain=(none) [ 3.486166] bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath= [ 3.492910] nameserver0=172.16.42.1[ 3.496853] ALSA device list: Three name servers: [ 3.496949] IP-Config: Complete: [ 3.500293] device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1 [ 3.510367] host=infigo-1, domain=, nis-domain=(none) [ 3.515864] bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath= [ 3.522635] nameserver0=172.16.42.1, nameserver1=172.16.42.100 [ 3.529149] , nameserver2=172.16.42.200 Fix newline handling for these cases Signed-off-by: Martin Fuzzey Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 98cbc68..bf6c5cf 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1522,7 +1522,8 @@ static int __init ip_auto_config(void) } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; -- cgit v1.1 From 896ee0eee6261e30c3623be931c3f621428947df Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Mar 2013 05:19:24 +0000 Subject: net/irda: add missing error path release_sock call This makes sure that release_sock is called for all error conditions in irda_getsockopt. Signed-off-by: Kees Cook Reported-by: Brad Spengler Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/irda/af_irda.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index d07e3a6..d28e7f0 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2583,8 +2583,10 @@ bed: NULL, NULL, NULL); /* Check if the we got some results */ - if (!self->cachedaddr) - return -EAGAIN; /* Didn't find any devices */ + if (!self->cachedaddr) { + err = -EAGAIN; /* Didn't find any devices */ + goto out; + } daddr = self->cachedaddr; /* Cleanup */ self->cachedaddr = 0; -- cgit v1.1 From f4541d60a449afd40448b06496dcd510f505928e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Mar 2013 17:36:09 +0000 Subject: tcp: preserve ACK clocking in TSO A long standing problem with TSO is the fact that tcp_tso_should_defer() rearms the deferred timer, while it should not. Current code leads to following bad bursty behavior : 20:11:24.484333 IP A > B: . 297161:316921(19760) ack 1 win 119 20:11:24.484337 IP B > A: . ack 263721 win 1117 20:11:24.485086 IP B > A: . ack 265241 win 1117 20:11:24.485925 IP B > A: . ack 266761 win 1117 20:11:24.486759 IP B > A: . ack 268281 win 1117 20:11:24.487594 IP B > A: . ack 269801 win 1117 20:11:24.488430 IP B > A: . ack 271321 win 1117 20:11:24.489267 IP B > A: . ack 272841 win 1117 20:11:24.490104 IP B > A: . ack 274361 win 1117 20:11:24.490939 IP B > A: . ack 275881 win 1117 20:11:24.491775 IP B > A: . ack 277401 win 1117 20:11:24.491784 IP A > B: . 316921:332881(15960) ack 1 win 119 20:11:24.492620 IP B > A: . ack 278921 win 1117 20:11:24.493448 IP B > A: . ack 280441 win 1117 20:11:24.494286 IP B > A: . ack 281961 win 1117 20:11:24.495122 IP B > A: . ack 283481 win 1117 20:11:24.495958 IP B > A: . ack 285001 win 1117 20:11:24.496791 IP B > A: . ack 286521 win 1117 20:11:24.497628 IP B > A: . ack 288041 win 1117 20:11:24.498459 IP B > A: . ack 289561 win 1117 20:11:24.499296 IP B > A: . ack 291081 win 1117 20:11:24.500133 IP B > A: . ack 292601 win 1117 20:11:24.500970 IP B > A: . ack 294121 win 1117 20:11:24.501388 IP B > A: . ack 295641 win 1117 20:11:24.501398 IP A > B: . 332881:351881(19000) ack 1 win 119 While the expected behavior is more like : 20:19:49.259620 IP A > B: . 197601:202161(4560) ack 1 win 119 20:19:49.260446 IP B > A: . ack 154281 win 1212 20:19:49.261282 IP B > A: . ack 155801 win 1212 20:19:49.262125 IP B > A: . ack 157321 win 1212 20:19:49.262136 IP A > B: . 202161:206721(4560) ack 1 win 119 20:19:49.262958 IP B > A: . ack 158841 win 1212 20:19:49.263795 IP B > A: . ack 160361 win 1212 20:19:49.264628 IP B > A: . ack 161881 win 1212 20:19:49.264637 IP A > B: . 206721:211281(4560) ack 1 win 119 20:19:49.265465 IP B > A: . ack 163401 win 1212 20:19:49.265886 IP B > A: . ack 164921 win 1212 20:19:49.266722 IP B > A: . ack 166441 win 1212 20:19:49.266732 IP A > B: . 211281:215841(4560) ack 1 win 119 20:19:49.267559 IP B > A: . ack 167961 win 1212 20:19:49.268394 IP B > A: . ack 169481 win 1212 20:19:49.269232 IP B > A: . ack 171001 win 1212 20:19:49.269241 IP A > B: . 215841:221161(5320) ack 1 win 119 Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Van Jacobson Cc: Neal Cardwell Cc: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 817fbb3..5d0b438 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1809,8 +1809,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. + * Do not rearm the timer if already set to not break TCP ACK clocking. + */ + if (!tp->tso_deferred) + tp->tso_deferred = 1 | (jiffies << 1); return true; -- cgit v1.1 From 9979a55a833883242e3a29f3596676edd7199c46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2013 14:38:28 +0000 Subject: net: remove a WARN_ON() in net_enable_timestamp() The WARN_ON(in_interrupt()) in net_enable_timestamp() can get false positive, in socket clone path, run from softirq context : [ 3641.624425] WARNING: at net/core/dev.c:1532 net_enable_timestamp+0x7b/0x80() [ 3641.668811] Call Trace: [ 3641.671254] [] warn_slowpath_common+0x87/0xc0 [ 3641.677871] [] warn_slowpath_null+0x1a/0x20 [ 3641.683683] [] net_enable_timestamp+0x7b/0x80 [ 3641.689668] [] sk_clone_lock+0x425/0x450 [ 3641.695222] [] inet_csk_clone_lock+0x16/0x170 [ 3641.701213] [] tcp_create_openreq_child+0x29/0x820 [ 3641.707663] [] ? ipt_do_table+0x222/0x670 [ 3641.713354] [] tcp_v4_syn_recv_sock+0xab/0x3d0 [ 3641.719425] [] tcp_check_req+0x3da/0x530 [ 3641.724979] [] ? inet_hashinfo_init+0x60/0x80 [ 3641.730964] [] ? tcp_v4_rcv+0x79f/0xbe0 [ 3641.736430] [] tcp_v4_do_rcv+0x38d/0x4f0 [ 3641.741985] [] tcp_v4_rcv+0xa7a/0xbe0 Its safe at this point because the parent socket owns a reference on the netstamp_needed, so we cant have a 0 -> 1 transition, which requires to lock a mutex. Instead of refining the check, lets remove it, as all known callers are safe. If it ever changes in the future, static_key_slow_inc() will complain anyway. Reported-by: Laurent Chavey Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d540ced..b13e5c7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1545,7 +1545,6 @@ void net_enable_timestamp(void) return; } #endif - WARN_ON(in_interrupt()); static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); -- cgit v1.1 From 4a7df340ed1bac190c124c1601bfc10cde9fb4fb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Mar 2013 19:14:07 +0000 Subject: 8021q: fix a potential use-after-free vlan_vid_del() could possibly free ->vlan_info after a RCU grace period, however, we may still refer to the freed memory area by 'grp' pointer. Found by code inspection. This patch moves vlan_vid_del() as behind as possible. Cc: Patrick McHardy Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a187144..85addcd 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -86,13 +86,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp = &vlan_info->grp; - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan_id); - grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_MVRP) @@ -114,6 +107,13 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } + /* Take it out of our own structures, but be sure to interlock with + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). + */ + if (vlan_id) + vlan_vid_del(real_dev, vlan_id); + /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } -- cgit v1.1 From 9b46922e15f4d9d2aedcd320c3b7f7f54d956da7 Mon Sep 17 00:00:00 2001 From: Hong zhi guo Date: Sat, 23 Mar 2013 02:27:50 +0000 Subject: bridge: fix crash when set mac address of br interface When I tried to set mac address of a bridge interface to a mac address which already learned on this bridge, I got system hang. The cause is straight forward: function br_fdb_change_mac_address calls fdb_insert with NULL source nbp. Then an fdb lookup is performed. If an fdb entry is found and it's local, it's OK. But if it's not local, source is dereferenced for printk without NULL check. Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b0812c9..bab338e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -423,7 +423,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return 0; br_warn(br, "adding interface %s with same address " "as a received packet\n", - source->dev->name); + source ? source->dev->name : br->dev->name); fdb_delete(br, fdb); } -- cgit v1.1 From 7ebe183c6d444ef5587d803b64a1f4734b18c564 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 24 Mar 2013 10:42:25 +0000 Subject: tcp: undo spurious timeout after SACK reneging On SACK reneging the sender immediately retransmits and forces a timeout but disables Eifel (undo). If the (buggy) receiver does not drop any packet this can trigger a false slow-start retransmit storm driven by the ACKs of the original packets. This can be detected with undo and TCP timestamps. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdac..3bd55ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2059,11 +2059,8 @@ void tcp_enter_loss(struct sock *sk, int how) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - if (!how) { - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - tp->undo_marker = tp->snd_una; - } else { + tp->undo_marker = tp->snd_una; + if (how) { tp->sacked_out = 0; tp->fackets_out = 0; } -- cgit v1.1 From 1166fde6a923c30f4351515b6a9a1efc513e7d00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Mar 2013 11:23:40 -0400 Subject: SUNRPC: Add barriers to ensure read ordering in rpc_wake_up_task_queue_locked We need to be careful when testing task->tk_waitqueue in rpc_wake_up_task_queue_locked, because it can be changed while we are holding the queue->lock. By adding appropriate memory barriers, we can ensure that it is safe to test task->tk_waitqueue for equality if the RPC_TASK_QUEUED bit is set. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/sched.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fb20f25..f8529fc 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } /* -- cgit v1.1 From ded34e0fe8fe8c2d595bfa30626654e4b87621e0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Mar 2013 03:18:33 +0000 Subject: unix: fix a race condition in unix_release() As reported by Jan, and others over the past few years, there is a race condition caused by unix_release setting the sock->sk pointer to NULL before properly marking the socket as dead/orphaned. This can cause a problem with the LSM hook security_unix_may_send() if there is another socket attempting to write to this partially released socket in between when sock->sk is set to NULL and it is marked as dead/orphaned. This patch fixes this by only setting sock->sk to NULL after the socket has been marked as dead; I also take the opportunity to make unix_release_sock() a void function as it only ever returned 0/success. Dave, I think this one should go on the -stable pile. Special thanks to Jan for coming up with a reproducer for this problem. Reported-by: Jan Stancek Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/unix/af_unix.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51be64f..f153a8d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; @@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -699,9 +697,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) -- cgit v1.1 From a79ca223e029aa4f09abb337accf1812c900a800 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Tue, 26 Mar 2013 01:52:45 +0800 Subject: ipv6: fix bad free of addrconf_init_net Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2c7e61..26512250 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4784,26 +4784,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __net_init addrconf_init_net(struct net *net) { - int err; + int err = -ENOMEM; struct ipv6_devconf *all, *dflt; - err = -ENOMEM; - all = &ipv6_devconf; - dflt = &ipv6_devconf_dflt; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) - goto err_alloc_all; + dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; - dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) - goto err_alloc_dflt; - } else { - /* these will be inherited by all namespaces */ - dflt->autoconf = ipv6_defaults.autoconf; - dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; - } + /* these will be inherited by all namespaces */ + dflt->autoconf = ipv6_defaults.autoconf; + dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; -- cgit v1.1 From 14134f6584212d585b310ce95428014b653dfaf6 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Mon, 25 Mar 2013 17:02:04 +0000 Subject: af_unix: dont send SCM_CREDENTIAL when dest socket is NULL SCM_SCREDENTIALS should apply to write() syscalls only either source or destination socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong, and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom). Origionally-authored-by: Karel Srot Signed-off-by: Ding Tianhong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d..971282b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + (other->sk_socket && + test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.1 From 330305cc4a6b0cb75c22fc01b8826f0ad755550f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 24 Mar 2013 17:36:29 +0000 Subject: ipv4: Fix ip-header identification for gso packets. ip-header id needs to be incremented even if IP_DF flag is set. This behaviour was changed in commit 490ab08127cebc25e3a26 (IP_GRE: Fix IP-Identification). Following patch fixes it so that identification is always incremented. Reported-by: Cong Wang Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 68f6a94..c929d9c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1333,8 +1333,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); } else { - if (!(iph->frag_off & htons(IP_DF))) - iph->id = htons(id++); + iph->id = htons(id++); } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; -- cgit v1.1