From e050dbeb0d1e19072d0d656b51f06af1af860f19 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Aug 2014 10:19:39 +0200 Subject: batman-adv: Fix parameter order of hlist_add_behind 1d023284c31a4e40a94d5bbcb7dbb7a35ee0bcbc ("list: fix order of arguments for hlist_add_after(_rcu)") was incorrectly rebased on top of d9124268d84a836f14a6ead54ff9d8eee4c43be5 ("batman-adv: Fix out-of-order fragmentation support"). The parameter order change of the rebased patch was not re-applied as expected. This causes a memory leak and can cause crashes when out-of-order packets are received. hlist_add_behind will try to access the uninitalized list pointers of frag_entry_new to find the previous/next entry and may modify/read random memory locations. Signed-off-by: Sven Eckelmann Cc: Andrew Morton Signed-off-by: David S. Miller --- net/batman-adv/fragmentation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 52c43f9..fc1835c 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, /* Reached the end of the list, so insert after 'frag_entry_last'. */ if (likely(frag_entry_last)) { - hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list); + hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list); chain->size += skb->len - hdr_size; chain->timestamp = jiffies; ret = true; -- cgit v1.1 From ac32c7f705692b92fe12dcbe88fe87136fdfff6f Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 15 Aug 2014 16:44:35 +0200 Subject: tipc: fix message importance range check Commit 3b4f302d8578 ("tipc: eliminate redundant locking") introduced a bug by removing the sanity check for message importance, allowing programs to assign any value to the msg_user field. This will mess up the packet reception logic and may cause random link resets. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/port.h | 4 +++- net/tipc/socket.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/port.h b/net/tipc/port.h index 3f93454..a69118f 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -179,8 +179,10 @@ static inline int tipc_port_importance(struct tipc_port *port) return msg_importance(&port->phdr); } -static inline void tipc_port_set_importance(struct tipc_port *port, int imp) +static inline int tipc_port_set_importance(struct tipc_port *port, int imp) { + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL msg_set_importance(&port->phdr, (u32)imp); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7d423ee..ff8c811 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1973,7 +1973,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - tipc_port_set_importance(port, value); + res = tipc_port_set_importance(port, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) -- cgit v1.1 From 8993cf8edf42527119186b558766539243b791a5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 Aug 2014 18:21:49 +0200 Subject: netfilter: move NAT Kconfig switches out of the iptables scope Currently, the NAT configs depend on iptables and ip6tables. However, users should be capable of enabling NAT for nft without having to switch on iptables. Fix this by adding new specific IP_NF_NAT and IP6_NF_NAT config switches for iptables and ip6tables NAT support. I have also moved the original NF_NAT_IPV4 and NF_NAT_IPV6 configs out of the scope of iptables to make them independent of it. This patch also adds NETFILTER_XT_NAT which selects the xt_nat combo that provides snat/dnat for iptables. We cannot use NF_NAT anymore since nf_tables can select this. Reported-by: Matteo Croce Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 102 ++++++++++++++++++++++++-------------------- net/ipv4/netfilter/Makefile | 2 +- net/ipv6/netfilter/Kconfig | 26 ++++++++--- net/ipv6/netfilter/Makefile | 2 +- net/netfilter/Makefile | 2 +- 5 files changed, 77 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fb17312..7cbcaf4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -82,6 +82,52 @@ config NF_TABLES_ARP help This option enables the ARP support for nf_tables. +config NF_NAT_IPV4 + tristate "IPv4 NAT" + depends on NF_CONNTRACK_IPV4 + default m if NETFILTER_ADVANCED=n + select NF_NAT + help + The IPv4 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. This can be + controlled by iptables or nft. + +if NF_NAT_IPV4 + +config NF_NAT_SNMP_BASIC + tristate "Basic SNMP-ALG support" + depends on NF_CONNTRACK_SNMP + depends on NETFILTER_ADVANCED + default NF_NAT && NF_CONNTRACK_SNMP + ---help--- + + This module implements an Application Layer Gateway (ALG) for + SNMP payloads. In conjunction with NAT, it allows a network + management system to access multiple private networks with + conflicting addresses. It works by modifying IP addresses + inside SNMP payloads to match IP-layer NAT mapping. + + This is the "basic" form of SNMP-ALG, as described in RFC 2962 + + To compile it as a module, choose M here. If unsure, say N. + +config NF_NAT_PROTO_GRE + tristate + depends on NF_CT_PROTO_GRE + +config NF_NAT_PPTP + tristate + depends on NF_CONNTRACK + default NF_CONNTRACK_PPTP + select NF_NAT_PROTO_GRE + +config NF_NAT_H323 + tristate + depends on NF_CONNTRACK + default NF_CONNTRACK_H323 + +endif # NF_NAT_IPV4 + config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n @@ -170,19 +216,21 @@ config IP_NF_TARGET_SYNPROXY To compile it as a module, choose M here. If unsure, say N. # NAT + specific targets: nf_conntrack -config NF_NAT_IPV4 - tristate "IPv4 NAT" +config IP_NF_NAT + tristate "iptables NAT support" depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n select NF_NAT + select NF_NAT_IPV4 + select NETFILTER_XT_NAT help - The IPv4 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. It is controlled by - the `nat' table in iptables: see the man page for iptables(8). + This enables the `nat' table in iptables. This allows masquerading, + port forwarding and other forms of full Network Address Port + Translation. To compile it as a module, choose M here. If unsure, say N. -if NF_NAT_IPV4 +if IP_NF_NAT config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" @@ -214,47 +262,7 @@ config IP_NF_TARGET_REDIRECT (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_REDIRECT. -endif - -config NF_NAT_SNMP_BASIC - tristate "Basic SNMP-ALG support" - depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4 - depends on NETFILTER_ADVANCED - default NF_NAT && NF_CONNTRACK_SNMP - ---help--- - - This module implements an Application Layer Gateway (ALG) for - SNMP payloads. In conjunction with NAT, it allows a network - management system to access multiple private networks with - conflicting addresses. It works by modifying IP addresses - inside SNMP payloads to match IP-layer NAT mapping. - - This is the "basic" form of SNMP-ALG, as described in RFC 2962 - - To compile it as a module, choose M here. If unsure, say N. - -# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y), -# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. -# From kconfig-language.txt: -# -# '&&' (6) -# -# (6) Returns the result of min(/expr/, /expr/). - -config NF_NAT_PROTO_GRE - tristate - depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE - -config NF_NAT_PPTP - tristate - depends on NF_CONNTRACK && NF_NAT_IPV4 - default NF_NAT_IPV4 && NF_CONNTRACK_PPTP - select NF_NAT_PROTO_GRE - -config NF_NAT_H323 - tristate - depends on NF_CONNTRACK && NF_NAT_IPV4 - default NF_NAT_IPV4 && NF_CONNTRACK_H323 +endif # IP_NF_NAT # mangle + specific targets config IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3300162..edf4af3 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ac93df1..cf0b88f 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -60,6 +60,16 @@ config NF_LOG_IPV6 depends on NETFILTER_ADVANCED select NF_LOG_COMMON +config NF_NAT_IPV6 + tristate "IPv6 NAT" + depends on NF_CONNTRACK_IPV6 + depends on NETFILTER_ADVANCED + select NF_NAT + help + The IPv6 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. This can be + controlled by iptables or nft. + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 @@ -232,19 +242,21 @@ config IP6_NF_SECURITY If unsure, say N. -config NF_NAT_IPV6 - tristate "IPv6 NAT" +config IP6_NF_NAT + tristate "ip6tables NAT support" depends on NF_CONNTRACK_IPV6 depends on NETFILTER_ADVANCED select NF_NAT + select NF_NAT_IPV6 + select NETFILTER_XT_NAT help - The IPv6 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. It is controlled by - the `nat' table in ip6tables, see the man page for ip6tables(8). + This enables the `nat' table in ip6tables. This allows masquerading, + port forwarding and other forms of full Network Address Port + Translation. To compile it as a module, choose M here. If unsure, say N. -if NF_NAT_IPV6 +if IP6_NF_NAT config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" @@ -265,7 +277,7 @@ config IP6_NF_TARGET_NPT To compile it as a module, choose M here. If unsure, say N. -endif # NF_NAT_IPV6 +endif # IP6_NF_NAT endif # IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index c0b2631..c3d3286 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o -obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o +obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o # objects for l3 independent conntrack nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8308624..fad5fdb 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -95,7 +95,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o -obj-$(CONFIG_NF_NAT) += xt_nat.o +obj-$(CONFIG_NETFILTER_XT_NAT) += xt_nat.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o -- cgit v1.1 From 7629d1eaf33672c9d35ba1e2ad12b459d56ca3b1 Mon Sep 17 00:00:00 2001 From: Martin Townsend Date: Tue, 19 Aug 2014 19:03:28 +0200 Subject: mac802154: fixed potential skb leak with mac802154_parse_frame_start This patch fix a memory leak if received frame was not able to parse. Signed-off-by: Martin Townsend Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/wpan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 3c3069f..4c13323 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -573,6 +573,7 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) ret = mac802154_parse_frame_start(skb, &hdr); if (ret) { pr_debug("got invalid frame\n"); + kfree_skb(skb); return; } -- cgit v1.1 From c4cb901ac667f81786b402ca7d69a9063e770b3a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 19 Aug 2014 19:03:29 +0200 Subject: ieee802154: 6lowpan_rtnl: fix correct errno value This patch correct the return value of lowpan_alloc_frag if an error occur. Errno numbers should always be negative. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/ieee802154/6lowpan_rtnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c index 016b77e..71fa7d4 100644 --- a/net/ieee802154/6lowpan_rtnl.c +++ b/net/ieee802154/6lowpan_rtnl.c @@ -246,7 +246,7 @@ lowpan_alloc_frag(struct sk_buff *skb, int size, return ERR_PTR(-rc); } } else { - frag = ERR_PTR(ENOMEM); + frag = ERR_PTR(-ENOMEM); } return frag; -- cgit v1.1 From 6e361d6ffee322fcd092d97720e05032ffb98ae6 Mon Sep 17 00:00:00 2001 From: Martin Townsend Date: Tue, 19 Aug 2014 19:03:30 +0200 Subject: ieee802154: mac802154: handle the reserved dest mode by dropping the packet If received frame contains the reserved destination address mode. The frame should be dropped and free the skb. Signed-off-by: Martin Townsend Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/wpan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 4c13323..5478388 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -462,7 +462,10 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb, skb->pkt_type = PACKET_OTHERHOST; break; default: - break; + spin_unlock_bh(&sdata->mib_lock); + pr_debug("invalid dest mode\n"); + kfree_skb(skb); + return NET_RX_DROP; } spin_unlock_bh(&sdata->mib_lock); -- cgit v1.1 From 685d632804b89ea25d3339afad162c48646ada5c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 19 Aug 2014 19:03:31 +0200 Subject: ieee802154: 6lowpan: ensure of sending 1280 packets This patch changes the 1281 MTU to 1280. Others stack have only a 1280 byte array for uncompressed 6LoWPAN packets, this avoid that these stacks have an overflow. Sending 1281 uncompressed 6LoWPAN packets isn't also rfc complaint. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/ieee802154/6lowpan_rtnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c index 71fa7d4..6591d27 100644 --- a/net/ieee802154/6lowpan_rtnl.c +++ b/net/ieee802154/6lowpan_rtnl.c @@ -437,7 +437,7 @@ static void lowpan_setup(struct net_device *dev) /* Frame Control + Sequence Number + Address fields + Security Header */ dev->hard_header_len = 2 + 1 + 20 + 14; dev->needed_tailroom = 2; /* FCS */ - dev->mtu = 1281; + dev->mtu = IPV6_MIN_MTU; dev->tx_queue_len = 0; dev->flags = IFF_BROADCAST | IFF_MULTICAST; dev->watchdog_timeo = 0; -- cgit v1.1 From 6697dabe27e03302ddfddc975275e6401defe2dd Mon Sep 17 00:00:00 2001 From: Martin Townsend Date: Tue, 19 Aug 2014 19:03:32 +0200 Subject: ieee802154: 6lowpan: ensure MTU of 1280 for 6lowpan This patch drops the userspace accessable sysfs entry for the maximum datagram size of a 6LoWPAN fragment packet. A fragment should not have a datagram size value greater than 1280 byte. Instead of make this value configurable, we accept 1280 datagram size fragment packets only. Signed-off-by: Martin Townsend Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/ieee802154/reassembly.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index ffec6ce..32755cb 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -355,8 +355,6 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) struct net *net = dev_net(skb->dev); struct lowpan_frag_info *frag_info = lowpan_cb(skb); struct ieee802154_addr source, dest; - struct netns_ieee802154_lowpan *ieee802154_lowpan = - net_ieee802154_lowpan(net); int err; source = mac_cb(skb)->source; @@ -366,8 +364,10 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) if (err < 0) goto err; - if (frag_info->d_size > ieee802154_lowpan->max_dsize) + if (frag_info->d_size > IPV6_MIN_MTU) { + net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n"); goto err; + } fq = fq_find(net, frag_info, &source, &dest); if (fq != NULL) { @@ -415,13 +415,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "6lowpanfrag_max_datagram_size", - .data = &init_net.ieee802154_lowpan.max_dsize, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { } }; @@ -458,7 +451,6 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) table[1].data = &ieee802154_lowpan->frags.low_thresh; table[1].extra2 = &ieee802154_lowpan->frags.high_thresh; table[2].data = &ieee802154_lowpan->frags.timeout; - table[3].data = &ieee802154_lowpan->max_dsize; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -533,7 +525,6 @@ static int __net_init lowpan_frags_init_net(struct net *net) ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - ieee802154_lowpan->max_dsize = 0xFFFF; inet_frags_init_net(&ieee802154_lowpan->frags); -- cgit v1.1 From 73d0f37ac4ee5b60e6b9c1b3ccb8766bade9d9c5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 14 Aug 2014 12:27:47 +0400 Subject: cbq: incorrectly low bandwidth setting blocks limited traffic Mainstream commit f0f6ee1f70c4 ("cbq: incorrect processing of high limits") have side effect: if cbq bandwidth setting is less than real interface throughput non-limited traffic can delay limited traffic for a very long time. This happen because of q->now changes incorrectly in cbq_dequeue(): in described scenario L2T is much greater than real time delay, and q->now gets an extra boost for each transmitted packet. Accumulated boost prevents update q->now, and blocked class can wait very long time until (q->now >= cl->undertime) will be true again. To fix the problem the patch updates q->now on each cbq_update() call. L2T-related pre-modification q->now was moved to cbq_update(). My testing confirmed that it fixes the problem and did not discover any side-effects Fixes: f0f6ee1f70c4 ("cbq: incorrect processing of high limits") Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ead5264..550be95 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -700,8 +700,13 @@ cbq_update(struct cbq_sched_data *q) struct cbq_class *this = q->tx_class; struct cbq_class *cl = this; int len = q->tx_len; + psched_time_t now; q->tx_class = NULL; + /* Time integrator. We calculate EOS time + * by adding expected packet transmission time. + */ + now = q->now + L2T(&q->link, len); for ( ; cl; cl = cl->share) { long avgidle = cl->avgidle; @@ -717,7 +722,7 @@ cbq_update(struct cbq_sched_data *q) * idle = (now - last) - last_pktlen/rate */ - idle = q->now - cl->last; + idle = now - cl->last; if ((unsigned long)idle > 128*1024*1024) { avgidle = cl->maxidle; } else { @@ -761,7 +766,7 @@ cbq_update(struct cbq_sched_data *q) idle -= L2T(&q->link, len); idle += L2T(cl, len); - cl->undertime = q->now + idle; + cl->undertime = now + idle; } else { /* Underlimit */ @@ -771,7 +776,8 @@ cbq_update(struct cbq_sched_data *q) else cl->avgidle = avgidle; } - cl->last = q->now; + if ((s64)(now - cl->last) > 0) + cl->last = now; } cbq_update_toplevel(q, this, q->tx_borrowed); @@ -943,30 +949,13 @@ cbq_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct cbq_sched_data *q = qdisc_priv(sch); psched_time_t now; - psched_tdiff_t incr; now = psched_get_time(); - incr = now - q->now_rt; - - if (q->tx_class) { - psched_tdiff_t incr2; - /* Time integrator. We calculate EOS time - * by adding expected packet transmission time. - * If real time is greater, we warp artificial clock, - * so that: - * - * cbq_time = max(real_time, work); - */ - incr2 = L2T(&q->link, q->tx_len); - q->now += incr2; + + if (q->tx_class) cbq_update(q); - if ((incr -= incr2) < 0) - incr = 0; - q->now += incr; - } else { - if (now > q->now) - q->now = now; - } + + q->now = now; q->now_rt = now; for (;;) { -- cgit v1.1 From 7201c1ddf774c12daa2dd5da098b8929db53f047 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 14 Aug 2014 12:27:59 +0400 Subject: cbq: now_rt removal Now q->now_rt is identical to q->now and is not required anymore. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 550be95..762a04b 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -159,7 +159,6 @@ struct cbq_sched_data { struct cbq_class *tx_borrowed; int tx_len; psched_time_t now; /* Cached timestamp */ - psched_time_t now_rt; /* Cached real time */ unsigned int pmask; struct hrtimer delay_timer; @@ -353,12 +352,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) int toplevel = q->toplevel; if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { - psched_time_t now; - psched_tdiff_t incr; - - now = psched_get_time(); - incr = now - q->now_rt; - now = q->now + incr; + psched_time_t now = psched_get_time(); do { if (cl->undertime < now) { @@ -956,7 +950,6 @@ cbq_dequeue(struct Qdisc *sch) cbq_update(q); q->now = now; - q->now_rt = now; for (;;) { q->wd_expires = 0; @@ -1212,7 +1205,6 @@ cbq_reset(struct Qdisc *sch) hrtimer_cancel(&q->delay_timer); q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); - q->now_rt = q->now; for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) q->active[prio] = NULL; @@ -1396,7 +1388,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); - q->now_rt = q->now; cbq_link_class(&q->link); -- cgit v1.1 From 02784f1b05b8f241c8180af88869e717e2758593 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Aug 2014 11:14:02 -0700 Subject: tipc: Fix build. Missing semicolon in range check fix. Signed-off-by: David S. Miller --- net/tipc/port.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/port.h b/net/tipc/port.h index a69118f..3087da3 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -182,8 +182,9 @@ static inline int tipc_port_importance(struct tipc_port *port) static inline int tipc_port_set_importance(struct tipc_port *port, int imp) { if (imp > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL + return -EINVAL; msg_set_importance(&port->phdr, (u32)imp); + return 0; } #endif -- cgit v1.1 From caa8ad94edf686d02b555c65a6162c0d1b434958 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 18 Aug 2014 15:46:28 +0200 Subject: netfilter: x_tables: allow to use default cgroup match There's actually no good reason why we cannot use cgroup id 0, so lets just remove this artificial barrier. Reported-by: Alexey Perevalov Signed-off-by: Daniel Borkmann Tested-by: Alexey Perevalov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index f4e8330..7198d66 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -31,7 +31,7 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) if (info->invert & ~1) return -EINVAL; - return info->id ? 0 : -EINVAL; + return 0; } static bool -- cgit v1.1 From 1e8430f30b55a1f3f6925c9f37f8cc9afd141d2e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 Aug 2014 18:21:50 +0200 Subject: netfilter: nf_tables: nat expression must select CONFIG_NF_NAT This enables the netfilter NAT engine in first place, otherwise you cannot ever select the nf_tables nat expression if iptables is not selected. Reported-by: Matteo Croce Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ad751fe..05eb177 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -499,7 +499,7 @@ config NFT_LIMIT config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK - depends on NF_NAT + select NF_NAT tristate "Netfilter nf_tables nat module" help This option adds the "nat" expression that you can use to perform -- cgit v1.1 From f161dd4122ffa73e4e12000309dca65bec80d416 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Aug 2014 21:06:54 +0300 Subject: Bluetooth: Fix hci_conn reference counting for auto-connections Recently the LE passive scanning and auto-connections feature was introduced. It uses the hci_connect_le() API which returns a hci_conn along with a reference count to that object. All previous users would tie this returned reference to some existing object, such as an L2CAP channel, and there'd be no leaked references this way. For auto-connections however the reference was returned but not stored anywhere, leaving established connections with one higher reference count than they should have. Instead of playing special tricks with hci_conn_hold/drop this patch associates the returned reference from hci_connect_le() with the object that in practice does own this reference, i.e. the hci_conn_params struct that caused us to initiate a connection in the first place. Once the connection is established or fails to establish this reference is removed appropriately. One extra thing needed is to call hci_pend_le_actions_clear() before calling hci_conn_hash_flush() so that the reference is cleared before the hci_conn objects are fully removed. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 8 ++++++++ net/bluetooth/hci_core.c | 14 ++++++++++++-- net/bluetooth/hci_event.c | 17 +++++++++++++++-- 3 files changed, 35 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b50dabb..faff624 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -589,6 +589,14 @@ EXPORT_SYMBOL(hci_get_route); void hci_le_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; + struct hci_conn_params *params; + + params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst, + conn->dst_type); + if (params && params->conn) { + hci_conn_drop(params->conn); + params->conn = NULL; + } conn->state = BT_CLOSED; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c32d361..1d9c29a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2536,8 +2536,13 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev) { struct hci_conn_params *p; - list_for_each_entry(p, &hdev->le_conn_params, list) + list_for_each_entry(p, &hdev->le_conn_params, list) { + if (p->conn) { + hci_conn_drop(p->conn); + p->conn = NULL; + } list_del_init(&p->action); + } BT_DBG("All LE pending actions cleared"); } @@ -2578,8 +2583,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_dev_lock(hdev); hci_inquiry_cache_flush(hdev); - hci_conn_hash_flush(hdev); hci_pend_le_actions_clear(hdev); + hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); hci_notify(hdev, HCI_DEV_DOWN); @@ -3727,6 +3732,9 @@ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) if (!params) return; + if (params->conn) + hci_conn_drop(params->conn); + list_del(¶ms->action); list_del(¶ms->list); kfree(params); @@ -3757,6 +3765,8 @@ void hci_conn_params_clear_all(struct hci_dev *hdev) struct hci_conn_params *params, *tmp; list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) { + if (params->conn) + hci_conn_drop(params->conn); list_del(¶ms->action); list_del(¶ms->list); kfree(params); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index be35598..a600082 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4221,8 +4221,13 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_proto_connect_cfm(conn, ev->status); params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) + if (params) { list_del_init(¶ms->action); + if (params->conn) { + hci_conn_drop(params->conn); + params->conn = NULL; + } + } unlock: hci_update_background_scan(hdev); @@ -4304,8 +4309,16 @@ static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW, HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER); - if (!IS_ERR(conn)) + if (!IS_ERR(conn)) { + /* Store the pointer since we don't really have any + * other owner of the object besides the params that + * triggered it. This way we can abort the connection if + * the parameters get removed and keep the reference + * count consistent once the connection is established. + */ + params->conn = conn; return; + } switch (PTR_ERR(conn)) { case -EBUSY: -- cgit v1.1 From 6df378d2d1f87a249a88ac4a8c7a14861d9c9474 Mon Sep 17 00:00:00 2001 From: chas williams - CONTRACTOR Date: Thu, 14 Aug 2014 09:19:47 -0400 Subject: lec: Use rtnl lock/unlock when updating MTU The LECS response contains the MTU that should be used. Correctly synchronize with other layers when updating. Signed-off-by: Chas Williams - CONTRACTOR Signed-off-by: David S. Miller --- net/atm/lec.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index e4853b5..4b98f89 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -410,9 +410,11 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) priv->lane2_ops = NULL; if (priv->lane_version > 1) priv->lane2_ops = &lane2_ops; + rtnl_lock(); if (dev_set_mtu(dev, mesg->content.config.mtu)) pr_info("%s: change_mtu to %d failed\n", dev->name, mesg->content.config.mtu); + rtnl_unlock(); priv->is_proxy = mesg->content.config.is_proxy; break; case l_flush_tran_id: -- cgit v1.1 From dc808110bb62b64a448696ecac3938902c92e1ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Aug 2014 09:16:04 -0700 Subject: packet: handle too big packets for PACKET_V3 af_packet can currently overwrite kernel memory by out of bound accesses, because it assumed a [new] block can always hold one frame. This is not generally the case, even if most existing tools do it right. This patch clamps too long frames as API permits, and issue a one time error on syslog. [ 394.357639] tpacket_rcv: packet too big, clamped from 5042 to 3966. macoff=82 In this example, packet header tp_snaplen was set to 3966, and tp_len was set to 5042 (skb->len) Signed-off-by: Eric Dumazet Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Acked-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/packet/af_packet.c | 17 +++++++++++++++++ net/packet/internal.h | 1 + 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8d9f804..93896d2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -632,6 +632,7 @@ static void init_prb_bdqc(struct packet_sock *po, p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po, tx_ring); prb_open_block(p1, pbd); @@ -1942,6 +1943,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if ((int)snaplen < 0) snaplen = 0; } + } else if (unlikely(macoff + snaplen > + GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { + u32 nval; + + nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; + pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", + snaplen, nval, macoff); + snaplen = nval; + if (unlikely((int)snaplen < 0)) { + snaplen = 0; + macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; + } } spin_lock(&sk->sk_receive_queue.lock); h.raw = packet_current_rx_frame(po, skb, @@ -3783,6 +3796,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) goto out; + if (po->tp_version >= TPACKET_V3 && + (int)(req->tp_block_size - + BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) goto out; diff --git a/net/packet/internal.h b/net/packet/internal.h index eb9580a..cdddf6a 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -29,6 +29,7 @@ struct tpacket_kbdq_core { char *pkblk_start; char *pkblk_end; int kblk_size; + unsigned int max_frame_len; unsigned int knum_blocks; uint64_t knxt_seq_num; char *prev; -- cgit v1.1 From 061079ac0b9be7a578dcd09f7865c2c0d6ac894a Mon Sep 17 00:00:00 2001 From: zhuyj Date: Wed, 20 Aug 2014 17:31:43 +0800 Subject: sctp: not send SCTP_PEER_ADDR_CHANGE notifications with failed probe Since the transport has always been in state SCTP_UNCONFIRMED, it therefore wasn't active before and hasn't been used before, and it always has been, so it is unnecessary to bug the user with a notification. Reported-by: Deepak Khandelwal Suggested-by: Vlad Yasevich Suggested-by: Michael Tuexen Suggested-by: Daniel Borkmann Signed-off-by: Zhu Yanjun Acked-by: Vlad Yasevich Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/associola.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 06a9ee6..aaafb32 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -813,6 +813,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, else { dst_release(transport->dst); transport->dst = NULL; + ulp_notify = false; } spc_state = SCTP_ADDR_UNREACHABLE; -- cgit v1.1 From 793c3b4000a1ef611ae7e5c89bd2a9c6b776cb5e Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 21 Aug 2014 19:37:48 +0200 Subject: net: ipv6: fib: don't sleep inside atomic lock The function fib6_commit_metrics() allocates a piece of memory in mode GFP_KERNEL while holding an atomic lock from higher up in the stack, in the function __ip6_ins_rt(). This produces the following BUG: > BUG: sleeping function called from invalid context at mm/slub.c:1250 > in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: dhcpcd > 2 locks held by dhcpcd/2909: > #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 > #1: (&tb->tb6_lock){++--+.}, at: [] ip6_route_add+0x65a/0x800 > CPU: 1 PID: 2909 Comm: dhcpcd Not tainted 3.17.0-rc1 #1 > Hardware name: ASUS All Series/Q87T, BIOS 0216 10/16/2013 > 0000000000000008 ffff8800c8f13858 ffffffff81af135a 0000000000000000 > ffff880212202430 ffff8800c8f13878 ffffffff810f8d3a ffff880212202c98 > 0000000000000010 ffff8800c8f138c8 ffffffff8121ad0e 0000000000000001 > Call Trace: > [] dump_stack+0x4e/0x68 > [] __might_sleep+0x10a/0x120 > [] kmem_cache_alloc_trace+0x4e/0x190 > [] ? fib6_commit_metrics+0x66/0x110 > [] fib6_commit_metrics+0x66/0x110 > [] fib6_add+0x883/0xa80 > [] ? ip6_route_add+0x65a/0x800 > [] ip6_route_add+0x675/0x800 > [] ? ip6_route_add+0x6a/0x800 > [] inet6_rtm_newroute+0x5c/0x80 > [] rtnetlink_rcv_msg+0x211/0x260 > [] ? rtnl_lock+0x17/0x20 > [] ? lock_release_holdtime+0x28/0x180 > [] ? rtnl_lock+0x17/0x20 > [] ? __rtnl_unlock+0x20/0x20 > [] netlink_rcv_skb+0x6e/0xd0 > [] rtnetlink_rcv+0x25/0x40 > [] netlink_unicast+0xd9/0x180 > [] netlink_sendmsg+0x700/0x770 > [] ? local_clock+0x25/0x30 > [] sock_sendmsg+0x6c/0x90 > [] ? might_fault+0xa3/0xb0 > [] ? verify_iovec+0x7d/0xf0 > [] ___sys_sendmsg+0x37e/0x3b0 > [] ? trace_hardirqs_on_caller+0x185/0x220 > [] ? mutex_unlock+0xe/0x10 > [] ? netlink_insert+0xbc/0xe0 > [] ? netlink_autobind.isra.30+0x125/0x150 > [] ? netlink_autobind.isra.30+0x60/0x150 > [] ? netlink_bind+0x159/0x230 > [] ? might_fault+0x5a/0xb0 > [] ? SYSC_bind+0x7e/0xd0 > [] __sys_sendmsg+0x4d/0x80 > [] SyS_sendmsg+0x12/0x20 > [] system_call_fastpath+0x16/0x1b Fixing this by replacing the mode GFP_KERNEL with GFP_ATOMIC. Signed-off-by: Benjamin Block Acked-by: David Rientjes Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index cb4459b..76b7f5e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -643,7 +643,7 @@ static int fib6_commit_metrics(struct dst_entry *dst, if (dst->flags & DST_HOST) { mp = dst_metrics_write_ptr(dst); } else { - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); if (!mp) return -ENOMEM; dst_init_metrics(dst, mp, 0); -- cgit v1.1 From 2ba5af42a7b59ef01f9081234d8855140738defd Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 21 Aug 2014 21:33:44 +0200 Subject: openvswitch: fix panic with multiple vlan headers When there are multiple vlan headers present in a received frame, the first one is put into vlan_tci and protocol is set to ETH_P_8021Q. Anything in the skb beyond the VLAN TPID may be still non-linear, including the inner TCI and ethertype. While ovs_flow_extract takes care of IP and IPv6 headers, it does nothing with ETH_P_8021Q. Later, if OVS_ACTION_ATTR_POP_VLAN is executed, __pop_vlan_tci pulls the next vlan header into vlan_tci. This leads to two things: 1. Part of the resulting ethernet header is in the non-linear part of the skb. When eth_type_trans is called later as the result of OVS_ACTION_ATTR_OUTPUT, kernel BUGs in __skb_pull. Also, __pop_vlan_tci is in fact accessing random data when it reads past the TPID. 2. network_header points into the ethernet header instead of behind it. mac_len is set to a wrong value (10), too. Reported-by: Yulong Pei Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fe5cda0..5231652 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -42,6 +42,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, static int make_writable(struct sk_buff *skb, int write_len) { + if (!pskb_may_pull(skb, write_len)) + return -ENOMEM; + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; @@ -70,6 +73,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; + if (skb_network_offset(skb) < ETH_HLEN) + skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); return 0; -- cgit v1.1 From ea4f19c1f81d4bf709c74e3789ec785828bc6e51 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Aug 2014 13:03:29 +0200 Subject: net: sctp: spare unnecessary comparison in sctp_trans_elect_best When both transports are the same, we don't have to go down that road only to realize that we will return the very same transport. We are guaranteed that curr is always non-NULL. Therefore, just short-circuit this special case. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index aaafb32..104fae4 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1245,7 +1245,7 @@ static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr, { u8 score_curr, score_best; - if (best == NULL) + if (best == NULL || curr == best) return curr; score_curr = sctp_trans_score(curr); -- cgit v1.1 From aa4a83ee8bbc08342c4acfd59ef234cac51a1eef Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Aug 2014 13:03:30 +0200 Subject: net: sctp: fix suboptimal edge-case on non-active active/retrans path selection In SCTP, selection of active (T.ACT) and retransmission (T.RET) transports is being done whenever transport control operations (UP, DOWN, PF, ...) are engaged through sctp_assoc_control_transport(). Commits 4c47af4d5eb2 ("net: sctp: rework multihoming retransmission path selection to rfc4960") and a7288c4dd509 ("net: sctp: improve sctp_select_active_and_retran_path selection") have both improved it towards a more fine-grained and optimal path selection. Currently, the selection algorithm for T.ACT and T.RET is as follows: 1) Elect the two most recently used ACTIVE transports T1, T2 for T.ACT, T.RET, where T.ACT<-T1 and T1 is most recently used 2) In case primary path T.PRI not in {T1, T2} but ACTIVE, set T.ACT<-T.PRI and T.RET<-T1 3) If only T1 is ACTIVE from the set, set T.ACT<-T1 and T.RET<-T1 4) If none is ACTIVE, set T.ACT<-best(T.PRI, T.RET, T3) where T3 is the most recently used (if avail) in PF, set T.RET<-T.PRI Prior to above commits, 4) was simply a camp on T.ACT<-T.PRI and T.RET<-T.PRI, ignoring possible paths in PF. Camping on T.PRI is still slightly suboptimal as it can lead to the following scenario: Setup: T1: p1p1 (10.0.10.10) <==> .'`) <==> p1p1 (10.0.10.12) <= T.PRI T2: p1p2 (10.0.10.20) <==> (_ . ) <==> p1p2 (10.0.10.22) net.sctp.rto_min = 1000 net.sctp.path_max_retrans = 2 net.sctp.pf_retrans = 0 net.sctp.hb_interval = 1000 T.PRI is permanently down, T2 is put briefly into PF state (e.g. due to link flapping). Here, the first time transmission is sent over PF path T2 as it's the only non-INACTIVE path, but the retransmitted data-chunks are sent over the INACTIVE path T1 (T.PRI), which is not good. After the patch, it's choosing better transports in both cases by modifying step 4): 4) If none is ACTIVE, set T.ACT_new<-best(T.ACT_old, T3) where T3 is the most recently used (if avail) in PF, set T.RET<-T.ACT_new This will still select a best possible path in PF if available (which can also include T.PRI/T.RET), and set both T.ACT/T.RET to it. In case sctp_assoc_control_transport() *just* put T.ACT_old into INACTIVE as it transitioned from ACTIVE->PF->INACTIVE and stays in INACTIVE just for a very short while before going back ACTIVE, it will guarantee that this path will be reselected for T.ACT/T.RET since T3 (PF) is not available. Previously, this was not possible, as we would only select between T.PRI and T.RET, and a possible T3 would be NULL due to the fact that we have just transitioned T3 in sctp_assoc_control_transport() from PF->INACTIVE and would select a suboptimal path when T.PRI/T.RET have worse properties. In the case that T.ACT_old permanently went to INACTIVE during this transition and there's no PF path available, plus T.PRI and T.RET are INACTIVE as well, we would now camp on T.ACT_old, but if everything is being INACTIVE there's really not much we can do except hoping for a successful HB to bring one of the transports back up again and, thus cause a new selection through sctp_assoc_control_transport(). Now both tests work fine: Case 1: 1. T1 S(ACTIVE) T.ACT T2 S(ACTIVE) T.RET 2. T1 S(ACTIVE) T.ACT, T.RET T2 S(PF) 3. T1 S(ACTIVE) T.ACT, T.RET T2 S(INACTIVE) 5. T1 S(PF) T.ACT, T.RET T2 S(INACTIVE) [ 5.1 T1 S(INACTIVE) T.ACT, T.RET T2 S(INACTIVE) ] 6. T1 S(ACTIVE) T.ACT, T.RET T2 S(INACTIVE) 7. T1 S(ACTIVE) T.ACT T2 S(ACTIVE) T.RET Case 2: 1. T1 S(ACTIVE) T.ACT T2 S(ACTIVE) T.RET 2. T1 S(PF) T2 S(ACTIVE) T.ACT, T.RET 3. T1 S(INACTIVE) T2 S(ACTIVE) T.ACT, T.RET 5. T1 S(INACTIVE) T2 S(PF) T.ACT, T.RET [ 5.1 T1 S(INACTIVE) T2 S(INACTIVE) T.ACT, T.RET ] 6. T1 S(INACTIVE) T2 S(ACTIVE) T.ACT, T.RET 7. T1 S(ACTIVE) T.ACT T2 S(ACTIVE) T.RET Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 104fae4..a88b852 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1356,14 +1356,11 @@ static void sctp_select_active_and_retran_path(struct sctp_association *asoc) trans_sec = trans_pri; /* If we failed to find a usable transport, just camp on the - * primary or retran, even if they are inactive, if possible - * pick a PF iff it's the better choice. + * active or pick a PF iff it's the better choice. */ if (trans_pri == NULL) { - trans_pri = sctp_trans_elect_best(asoc->peer.primary_path, - asoc->peer.retran_path); - trans_pri = sctp_trans_elect_best(trans_pri, trans_pf); - trans_sec = asoc->peer.primary_path; + trans_pri = sctp_trans_elect_best(asoc->peer.active_path, trans_pf); + trans_sec = trans_pri; } /* Set the active and retran transports. */ -- cgit v1.1 From 47e4df94d129cbca84de252ff63c4ded08a513e7 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Mon, 18 Aug 2014 13:19:09 +0200 Subject: mac80211: fix channel switch for chanctx-based drivers The new_ctx pointer is set only for non-chanctx drivers. This yielded a crash for chanctx-based drivers during channel switch finalization: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: ieee80211_vif_use_reserved_switch+0x71c/0xb00 [mac80211] Use an adequate chanctx pointer to fix this. Reported-by: Linus Torvalds Signed-off-by: Michal Kazior Signed-off-by: Linus Torvalds --- net/mac80211/chan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 6d537f0..0375009 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1444,7 +1444,7 @@ ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) list_del(&sdata->reserved_chanctx_list); list_move(&sdata->assigned_chanctx_list, - &new_ctx->assigned_vifs); + &ctx->assigned_vifs); sdata->reserved_chanctx = NULL; ieee80211_vif_chanctx_reservation_complete(sdata); -- cgit v1.1 From d1c85c2ebe7ffe1f1b27846bd1ba0944c513d822 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Fri, 22 Aug 2014 10:40:15 +0800 Subject: netfilter: HAVE_JUMP_LABEL instead of CONFIG_JUMP_LABEL Use HAVE_JUMP_LABEL as elsewhere in the kernel to ensure that the toolchain has the required support in addition to CONFIG_JUMP_LABEL being set. Signed-off-by: Zhouyi Zhou Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a93c97f..024a2e2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -72,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops *reg) } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; @@ -84,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); -- cgit v1.1 From fb70118c0e8b436eb0d957ef506a1d94028ae10c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 19 Aug 2014 15:41:32 +0300 Subject: net: rfkill: gpio: Add more Broadcom bluetooth ACPI IDs This adds one more ACPI ID of a Broadcom bluetooth chip. Signed-off-by: Mika Westerberg Signed-off-by: John W. Linville --- net/rfkill/rfkill-gpio.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 14c98e4..02a86a2 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -158,6 +158,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = { { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, { "BCM2E39", RFKILL_TYPE_BLUETOOTH }, { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E64", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, { "LNV4752", RFKILL_TYPE_GPS }, { }, -- cgit v1.1 From 4c75431ac3520631f1d9e74aa88407e6374dbbc4 Mon Sep 17 00:00:00 2001 From: "Alexander Y. Fomichev" Date: Mon, 25 Aug 2014 16:26:45 +0400 Subject: net: prevent of emerging cross-namespace symlinks Code manipulating sysfs symlinks on adjacent net_devices(s) currently doesn't take into account that devices potentially belong to different namespaces. This patch trying to fix an issue as follows: - check for net_ns before creating / deleting symlink. for now only netdev_adjacent_rename_links and __netdev_adjacent_dev_remove are affected, afaics __netdev_adjacent_dev_insert implies both net_devs belong to the same namespace. - Drop all existing symlinks to / from all adj_devs before switching namespace and recreate them just after. Signed-off-by: Alexander Y. Fomichev Signed-off-by: David S. Miller --- net/core/dev.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b65a505..66738e9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4889,7 +4889,8 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (netdev_adjacent_is_neigh_list(dev, dev_list)) + if (netdev_adjacent_is_neigh_list(dev, dev_list) && + net_eq(dev_net(dev),dev_net(adj_dev))) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); @@ -5159,11 +5160,65 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); +void netdev_adjacent_add_links(struct net_device *dev) +{ + struct netdev_adjacent *iter; + + struct net *net = dev_net(dev); + + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_add(dev, iter->dev, + &dev->adj_list.upper); + } + + list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_add(dev, iter->dev, + &dev->adj_list.lower); + } +} + +void netdev_adjacent_del_links(struct net_device *dev) +{ + struct netdev_adjacent *iter; + + struct net *net = dev_net(dev); + + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_del(iter->dev, dev->name, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_del(dev, iter->dev->name, + &dev->adj_list.upper); + } + + list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_del(iter->dev, dev->name, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_del(dev, iter->dev->name, + &dev->adj_list.lower); + } +} + void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) { struct netdev_adjacent *iter; + struct net *net = dev_net(dev); + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.lower); netdev_adjacent_sysfs_add(iter->dev, dev, @@ -5171,6 +5226,8 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) } list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.upper); netdev_adjacent_sysfs_add(iter->dev, dev, @@ -6773,6 +6830,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Send a netdev-removed uevent to the old namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); + netdev_adjacent_del_links(dev); /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -6787,6 +6845,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + netdev_adjacent_add_links(dev); /* Fixup kobjects */ err = device_rename(&dev->dev, dev->name); -- cgit v1.1 From db115037bb57cdfe97078b13da762213f7980e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 25 Aug 2014 15:16:22 +0200 Subject: net: fix checksum features handling in netif_skb_features() This is follow-up to da08143b8520 ("vlan: more careful checksum features handling") which introduced more careful feature intersection in vlan code, taking into account that HW_CSUM should be considered superset of IP_CSUM/IPV6_CSUM. The same is needed in netif_skb_features() in order to avoid offloading mismatch warning when vlan is created on top of a bond consisting of slaves supporting IP/IPv6 checksumming but not vlan Tx offloading. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/core/dev.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 66738e9..ab9a165 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2587,13 +2587,19 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + features = netdev_intersect_features(features, + skb->dev->vlan_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) - features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); return harmonize_features(skb, features); } -- cgit v1.1 From bb512ad0732232f1d2693bb68f31a76bed8f22ae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Aug 2014 12:08:09 +0200 Subject: Revert "mac80211: disable uAPSD if all ACs are under ACM" This reverts commit 24aa11ab8ae03292d38ec0dbd9bc2ac49fe8a6dd. That commit was wrong since it uses data that hasn't even been set up yet, but might be a hold-over from a previous connection. Additionally, it seems like a driver-specific workaround that shouldn't have been in mac80211 to start with. Cc: stable@vger.kernel.org Fixes: 24aa11ab8ae0 ("mac80211: disable uAPSD if all ACs are under ACM") Reviewed-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 31a8afa..b82a12a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4376,8 +4376,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (bss->wmm_used && bss->uapsd_supported && - (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) && - sdata->wmm_acm != 0xff) { + (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { assoc_data->uapsd = true; ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; } else { -- cgit v1.1 From 0e67c13667a72093aa3ef3cb54dd521e34e500fc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Jul 2014 16:20:22 +0200 Subject: mac80211: ignore AP_VLAN in ieee80211_recalc_chanctx_chantype When bringing down the AP, a WARN_ON is hit because the bss config chandef is empty here. Since AP_VLAN channel settings do not matter for anything chanctx related (always inherits the settings from the AP interface), let's just ignore it here. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 6d537f0..4206a11 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -541,6 +541,8 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, continue; if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) continue; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + continue; if (!compat) compat = &sdata->vif.bss_conf.chandef; -- cgit v1.1 From 3918edb0e6a8b16c2866f4657d9fed41f9da562d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Jul 2014 16:20:23 +0200 Subject: mac80211: fix smps mode check for AP_VLAN In ieee80211_sta_ps_deliver_wakeup, sdata->smps_mode is checked. This is initialized only for the base AP interface, not the individual VLANs. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c6ee213..441875f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1094,8 +1094,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) unsigned long flags; struct ps_data *ps; - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, + u.ap); + + if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; else if (ieee80211_vif_is_mesh(&sdata->vif)) ps = &sdata->u.mesh.ps; -- cgit v1.1 From 6c6fa49649dc826b7ef81314324fe55cf1d0d954 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 28 Jun 2014 16:35:25 -0400 Subject: mac80211: mesh_plink: handle confirm frames with new plid The 802.11 standard says when processing a plink confirm frame: "If the peerLinkID in the mesh peering instance has not been set, the Local Link ID field of the Mesh Peering Confirm request shall be copied into the peerLinkID in the mesh peering instance." We were only doing this when receiving an open peering frame, but it could happen that the open frame gets lost and so we should handle this case rather than rejecting the confirm and failing the whole peering process. Reported-by: Yu Niiro Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 63b8741..c47194d 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -959,7 +959,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, if (!matches_local) event = CNF_RJCT; if (!mesh_plink_free_count(sdata) || - (sta->llid != llid || sta->plid != plid)) + sta->llid != llid || + (sta->plid && sta->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; @@ -1080,6 +1081,10 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata, goto unlock_rcu; } + /* 802.11-2012 13.3.7.2 - update plid on CNF if not set */ + if (!sta->plid && event == CNF_ACPT) + sta->plid = plid; + changed |= mesh_plink_fsm(sdata, sta, event); unlock_rcu: -- cgit v1.1 From c7dcb45facedbff84237adb77bd3ba50f75c0de4 Mon Sep 17 00:00:00 2001 From: Denton Gentry Date: Mon, 28 Jul 2014 23:36:32 -0700 Subject: mac80211: fix start_seq_num in Rx reorder offload sta->last_seq_ctrl is the seq_ctrl field from the last header seen, need to shift it 4 bits to extract the sequence number. Otherwise the ieee80211_sn_less() check at the top of ieee80211_sta_manage_reorder_buf drops frames until the sequence number catches up. Cc: Michal Kazior Signed-off-by: Denton Gentry Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 01eede7..f75e5f1 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1175,8 +1175,8 @@ static void ieee80211_iface_work(struct work_struct *work) if (sta) { u16 last_seq; - last_seq = le16_to_cpu( - sta->last_seq_ctrl[rx_agg->tid]); + last_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu( + sta->last_seq_ctrl[rx_agg->tid])); __ieee80211_start_rx_ba_session(sta, 0, 0, -- cgit v1.1 From 14b058bbce9279ee432f0944ca14df69f4a0d170 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Aug 2014 09:34:05 +0200 Subject: mac80211: fix agg_status debugfs file alignment The "RX active" string is too long, so the columns get shifted. Change it to just "RX" to avoid this. Signed-off-by: Johannes Berg --- net/mac80211/debugfs_sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 3db96648..86173c0 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -167,7 +167,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); p += scnprintf(p, sizeof(buf) + buf - p, - "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); + "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); -- cgit v1.1 From ea1d5d7755a3e556de78cc757d1895d5c7180548 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 1 Aug 2014 10:36:17 +0300 Subject: ipvs: properly declare tunnel encapsulation The tunneling method should properly use tunnel encapsulation. Fixes problem with CHECKSUM_PARTIAL packets when TCP/UDP csum offload is supported. Thanks to Alex Gartrell for reporting the problem, providing solution and for all suggestions. Reported-by: Alex Gartrell Signed-off-by: Julian Anastasov Signed-off-by: Alex Gartrell Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 6f70bdd..56896a41 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -38,6 +38,7 @@ #include /* for ip_route_output */ #include #include +#include #include #include #include @@ -862,11 +863,15 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ip_hdr(skb); } - skb->transport_header = skb->network_header; - /* fix old IP header checksum */ ip_send_check(old_iph); + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto tx_error; + + skb->transport_header = skb->network_header; + skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -900,7 +905,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; tx_error: - kfree_skb(skb); + if (!IS_ERR(skb)) + kfree_skb(skb); rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; @@ -953,6 +959,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ipv6_hdr(skb); } + /* GSO: we need to provide proper SKB_GSO_ value for IPv6 */ + skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */ + if (IS_ERR(skb)) + goto tx_error; + skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct ipv6hdr)); @@ -988,7 +999,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; tx_error: - kfree_skb(skb); + if (!IS_ERR(skb)) + kfree_skb(skb); rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -- cgit v1.1 From eb90b0c734ad793d5f5bf230a9e9a4dcc48df8aa Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Aug 2014 17:53:41 +0300 Subject: ipvs: fix ipv6 hook registration for local replies commit fc604767613b6d2036cdc35b660bc39451040a47 ("ipvs: changes for local real server") from 2.6.37 introduced DNAT support to local real server but the IPv6 LOCAL_OUT handler ip_vs_local_reply6() is registered incorrectly as IPv4 hook causing any outgoing IPv4 traffic to be dropped depending on the IP header values. Chris tracked down the problem to CONFIG_IP_VS_IPV6=y Bug report: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1349768 Reported-by: Chris J Arges Tested-by: Chris J Arges Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e683675..5c34e8d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1906,7 +1906,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { { .hook = ip_vs_local_reply6, .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, -- cgit v1.1 From d9b2938aabf757da2d40153489b251d4fc3fdd18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2014 20:49:34 -0700 Subject: net: attempt a single high order allocation In commit ed98df3361f0 ("net: use __GFP_NORETRY for high order allocations") we tried to address one issue caused by order-3 allocations. We still observe high latencies and system overhead in situations where compaction is not successful. Instead of trying order-3, order-2, and order-1, do a single order-3 best effort and immediately fallback to plain order-0. This mimics slub strategy to fallback to slab min order if the high order allocation used for performance failed. Order-3 allocations give a performance boost only if they can be done without recurring and expensive memory scan. Quoting David : The page allocator relies on synchronous (sync light) memory compaction after direct reclaim for allocations that don't retry and deferred compaction doesn't work with this strategy because the allocation order is always decreasing from the previous failed attempt. This means sync light compaction will always be encountered if memory cannot be defragmented or reclaimed several times during the skb_page_frag_refill() iteration. Signed-off-by: Eric Dumazet Acked-by: David Rientjes Signed-off-by: David S. Miller --- net/core/sock.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 2714811..2987057 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1822,6 +1822,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, order); if (page) goto fill_page; + /* Do not retry other high order allocations */ + order = 1; + max_page_order = 0; } order--; } @@ -1869,10 +1872,8 @@ EXPORT_SYMBOL(sock_alloc_send_skb); * no guarantee that allocations succeed. Therefore, @sz MUST be * less or equal than PAGE_SIZE. */ -bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) +bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) { - int order; - if (pfrag->page) { if (atomic_read(&pfrag->page->_count) == 1) { pfrag->offset = 0; @@ -1883,20 +1884,21 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) put_page(pfrag->page); } - order = SKB_FRAG_PAGE_ORDER; - do { - gfp_t gfp = prio; - - if (order) - gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; - pfrag->page = alloc_pages(gfp, order); + pfrag->offset = 0; + if (SKB_FRAG_PAGE_ORDER) { + pfrag->page = alloc_pages(gfp | __GFP_COMP | + __GFP_NOWARN | __GFP_NORETRY, + SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { - pfrag->offset = 0; - pfrag->size = PAGE_SIZE << order; + pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; return true; } - } while (--order >= 0); - + } + pfrag->page = alloc_page(gfp); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE; + return true; + } return false; } EXPORT_SYMBOL(skb_page_frag_refill); -- cgit v1.1 From 38ab1fa981d543e1b00f4ffbce4ddb480cd2effe Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Aug 2014 15:28:26 +0200 Subject: net: sctp: fix ABI mismatch through sctp_assoc_to_state helper Since SCTP day 1, that is, 19b55a2af145 ("Initial commit") from lksctp tree, the official header carries a copy of enum sctp_sstat_state that looks like (compared to the current in-kernel enumeration): User definition: Kernel definition: enum sctp_sstat_state { typedef enum { SCTP_EMPTY = 0, SCTP_CLOSED = 1, SCTP_STATE_CLOSED = 0, SCTP_COOKIE_WAIT = 2, SCTP_STATE_COOKIE_WAIT = 1, SCTP_COOKIE_ECHOED = 3, SCTP_STATE_COOKIE_ECHOED = 2, SCTP_ESTABLISHED = 4, SCTP_STATE_ESTABLISHED = 3, SCTP_SHUTDOWN_PENDING = 5, SCTP_STATE_SHUTDOWN_PENDING = 4, SCTP_SHUTDOWN_SENT = 6, SCTP_STATE_SHUTDOWN_SENT = 5, SCTP_SHUTDOWN_RECEIVED = 7, SCTP_STATE_SHUTDOWN_RECEIVED = 6, SCTP_SHUTDOWN_ACK_SENT = 8, SCTP_STATE_SHUTDOWN_ACK_SENT = 7, }; } sctp_state_t; This header was later on also placed into the uapi, so that user space programs can compile without having , but the shipped with instead. While RFC6458 under 8.2.1.Association Status (SCTP_STATUS) says that sstat_state can range from SCTP_CLOSED to SCTP_SHUTDOWN_ACK_SENT, we nevertheless have a what it appears to be dummy SCTP_EMPTY state from the very early days. While it seems to do just nothing, commit 0b8f9e25b0aa ("sctp: remove completely unsed EMPTY state") did the right thing and removed this dead code. That however, causes an off-by-one when the user asks the SCTP stack via SCTP_STATUS API and checks for the current socket state thus yielding possibly undefined behaviour in applications as they expect the kernel to tell the right thing. The enumeration had to be changed however as based on the current socket state, we access a function pointer lookup-table through this. Therefore, I think the best way to deal with this is just to add a helper function sctp_assoc_to_state() to encapsulate the off-by-one quirk. Reported-by: Tristan Su Fixes: 0b8f9e25b0aa ("sctp: remove completely unsed EMPTY state") Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index eb71d49..634a2ab 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4243,7 +4243,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, transport = asoc->peer.primary_path; status.sstat_assoc_id = sctp_assoc2id(asoc); - status.sstat_state = asoc->state; + status.sstat_state = sctp_assoc_to_state(asoc); status.sstat_rwnd = asoc->peer.rwnd; status.sstat_unackdata = asoc->unack_data; -- cgit v1.1 From d79a61d646db950b68dd79ecc627cb5f11e0d8ac Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 29 Aug 2014 09:34:49 +0200 Subject: netfilter: NETFILTER_XT_TARGET_LOG selects NF_LOG_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_NETFILTER_XT_TARGET_LOG is not selected anymore when jumping from 3.16 to 3.17-rc1 if you don't set on the new NF_LOG_IPV4 and NF_LOG_IPV6 switches. Change this to select the three new symbols NF_LOG_COMMON, NF_LOG_IPV4 and NF_LOG_IPV6 instead, so NETFILTER_XT_TARGET_LOG remains enabled when moving from old to new kernels. Reported-by: RafaÅ‚ MiÅ‚ecki Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 05eb177..4bef6eb 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -747,7 +747,9 @@ config NETFILTER_XT_TARGET_LED config NETFILTER_XT_TARGET_LOG tristate "LOG target support" - depends on NF_LOG_IPV4 && NF_LOG_IPV6 + select NF_LOG_COMMON + select NF_LOG_IPV4 + select NF_LOG_IPV6 if IPV6 default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in -- cgit v1.1 From 41ad82f7f8f59a472c8e8ccca56a9c6bdf3c5092 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 2 Sep 2014 14:26:17 +0200 Subject: netfilter: fix missing dependencies in NETFILTER_XT_TARGET_LOG make defconfig reports: warning: (NETFILTER_XT_TARGET_LOG) selects NF_LOG_IPV6 which has unmet direct dependencies (NET && INET && IPV6 && NETFILTER && NETFILTER_ADVANCED) Fixes: d79a61d netfilter: NETFILTER_XT_TARGET_LOG selects NF_LOG_* Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/ipv6/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index cf0b88f..2812816 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -57,7 +57,7 @@ config NFT_REJECT_IPV6 config NF_LOG_IPV6 tristate "IPv6 packet logging" - depends on NETFILTER_ADVANCED + default m if NETFILTER_ADVANCED=n select NF_LOG_COMMON config NF_NAT_IPV6 -- cgit v1.1 From 4ee45ea05c8710c7ab8a5eb1a72700b874712746 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 2 Sep 2014 20:52:28 +0800 Subject: openvswitch: fix a memory leak The user_skb maybe be leaked if the operation on it failed and codes skipped into the label "out:" without calling genlmsg_unicast. Cc: Pravin Shelar Signed-off-by: Li RongQing Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 7228ec3..35d866f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -404,7 +404,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, { struct ovs_header *upcall; struct sk_buff *nskb = NULL; - struct sk_buff *user_skb; /* to be queued to userspace */ + struct sk_buff *user_skb = NULL; /* to be queued to userspace */ struct nlattr *nla; struct genl_info info = { .dst_sk = ovs_dp_get_net(dp)->genl_sock, @@ -494,9 +494,11 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); + user_skb = NULL; out: if (err) skb_tx_error(skb); + kfree_skb(user_skb); kfree_skb(nskb); return err; } -- cgit v1.1 From bd8c78e78d5011d8111bc2533ee73b13a3bd6c42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 30 Jul 2014 14:55:26 +0200 Subject: nl80211: clear skb cb before passing to netlink In testmode and vendor command reply/event SKBs we use the skb cb data to store nl80211 parameters between allocation and sending. This causes the code for CONFIG_NETLINK_MMAP to get confused, because it takes ownership of the skb cb data when the SKB is handed off to netlink, and it doesn't explicitly clear it. Clear the skb cb explicitly when we're done and before it gets passed to netlink to avoid this issue. Cc: stable@vger.kernel.org [this goes way back] Reported-by: Assaf Azulay Reported-by: David Spinadel Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df7b133..7257164 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6969,6 +6969,9 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp) struct nlattr *data = ((void **)skb->cb)[2]; enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE; + /* clear CB data for netlink core to own from now on */ + memset(skb->cb, 0, sizeof(skb->cb)); + nla_nest_end(skb, data); genlmsg_end(skb, hdr); @@ -9294,6 +9297,9 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb) void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; + /* clear CB data for netlink core to own from now on */ + memset(skb->cb, 0, sizeof(skb->cb)); + if (WARN_ON(!rdev->cur_cmd_info)) { kfree_skb(skb); return -EINVAL; -- cgit v1.1 From c5eba0b6f84eb4f0fdc1d8a4abc1c7d40db6e8a6 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 3 Sep 2014 17:43:45 +0800 Subject: openvswitch: distinguish between the dropped and consumed skb distinguish between the dropped and consumed skb, not assume the skb is consumed always Cc: Thomas Graf Cc: Pravin Shelar Signed-off-by: Li RongQing Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 35d866f..91d66b7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -265,8 +265,11 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.key = &key; upcall.userdata = NULL; upcall.portid = ovs_vport_find_upcall_portid(p, skb); - ovs_dp_upcall(dp, skb, &upcall); - consume_skb(skb); + error = ovs_dp_upcall(dp, skb, &upcall); + if (unlikely(error)) + kfree_skb(skb); + else + consume_skb(skb); stats_counter = &stats->n_missed; goto out; } -- cgit v1.1 From 785e21a89d77923852869f83ebd2689ec4d5ce54 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 3 Sep 2014 15:25:04 +0300 Subject: mac80211: use bss_conf->dtim_period instead of conf.ps_dtim_period sta_set_sinfo is obviously takes data for specific station. This specific station is attached to a specific virtual interface. Hence we should use the dtim_period from this virtual interface rather than the system wide dtim_period. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 441875f..a1e433b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1822,7 +1822,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; if (sdata->vif.bss_conf.use_short_slot) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; - sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period; + sinfo->bss_param.dtim_period = sdata->vif.bss_conf.dtim_period; sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; sinfo->sta_flags.set = 0; -- cgit v1.1 From a9ed4a2986e13011fcf4ed2d1a1647c53112f55b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 2 Sep 2014 10:29:29 +0200 Subject: ipv6: fix rtnl locking in setsockopt for anycast and multicast Calling setsockopt with IPV6_JOIN_ANYCAST or IPV6_LEAVE_ANYCAST triggers the assertion in addrconf_join_solict()/addrconf_leave_solict() ipv6_sock_ac_join(), ipv6_sock_ac_drop(), ipv6_sock_ac_close() need to take RTNL before calling ipv6_dev_ac_inc/dec. Same thing with ipv6_sock_mc_join(), ipv6_sock_mc_drop(), ipv6_sock_mc_close() before calling ipv6_dev_mc_inc/dec. This patch moves ASSERT_RTNL() up a level in the call stack. Signed-off-by: Cong Wang Signed-off-by: Sabrina Dubroca Reported-by: Tommi Rantala Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 15 +++++---------- net/ipv6/anycast.c | 12 ++++++++++++ net/ipv6/mcast.c | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0b239fc..aa0e135 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1690,14 +1690,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) addrconf_mod_dad_work(ifp, 0); } -/* Join to solicited addr multicast group. */ - +/* Join to solicited addr multicast group. + * caller must hold RTNL */ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; - ASSERT_RTNL(); - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1705,12 +1703,11 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) ipv6_dev_mc_inc(dev, &maddr); } +/* caller must hold RTNL */ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; - ASSERT_RTNL(); - if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1718,12 +1715,11 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &maddr); } +/* caller must hold RTNL */ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - ASSERT_RTNL(); - if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1732,12 +1728,11 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) ipv6_dev_ac_inc(ifp->idev->dev, &addr); } +/* caller must hold RTNL */ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - ASSERT_RTNL(); - if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 2101832..45b9d81 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -77,6 +77,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac->acl_next = NULL; pac->acl_addr = *addr; + rtnl_lock(); rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -137,6 +138,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) error: rcu_read_unlock(); + rtnl_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -171,13 +173,17 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) spin_unlock_bh(&ipv6_sk_ac_lock); + rtnl_lock(); rcu_read_lock(); dev = dev_get_by_index_rcu(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); rcu_read_unlock(); + rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); + if (!dev) + return -ENODEV; return 0; } @@ -198,6 +204,7 @@ void ipv6_sock_ac_close(struct sock *sk) spin_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; + rtnl_lock(); rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; @@ -212,6 +219,7 @@ void ipv6_sock_ac_close(struct sock *sk) pac = next; } rcu_read_unlock(); + rtnl_unlock(); } static void aca_put(struct ifacaddr6 *ac) @@ -233,6 +241,8 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr) struct rt6_info *rt; int err; + ASSERT_RTNL(); + idev = in6_dev_get(dev); if (idev == NULL) @@ -302,6 +312,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca, *prev_aca; + ASSERT_RTNL(); + write_lock_bh(&idev->lock); prev_aca = NULL; for (aca = idev->ac_list; aca; aca = aca->aca_next) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 617f095..a23b655 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -172,6 +172,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->next = NULL; mc_lst->addr = *addr; + rtnl_lock(); rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -185,6 +186,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (dev == NULL) { rcu_read_unlock(); + rtnl_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; } @@ -202,6 +204,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (err) { rcu_read_unlock(); + rtnl_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; } @@ -212,6 +215,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) spin_unlock(&ipv6_sk_mc_lock); rcu_read_unlock(); + rtnl_unlock(); return 0; } @@ -229,6 +233,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) if (!ipv6_addr_is_multicast(addr)) return -EINVAL; + rtnl_lock(); spin_lock(&ipv6_sk_mc_lock); for (lnk = &np->ipv6_mc_list; (mc_lst = rcu_dereference_protected(*lnk, @@ -252,12 +257,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); + rtnl_unlock(); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); return 0; } } spin_unlock(&ipv6_sk_mc_lock); + rtnl_unlock(); return -EADDRNOTAVAIL; } @@ -302,6 +310,7 @@ void ipv6_sock_mc_close(struct sock *sk) if (!rcu_access_pointer(np->ipv6_mc_list)) return; + rtnl_lock(); spin_lock(&ipv6_sk_mc_lock); while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { @@ -328,6 +337,7 @@ void ipv6_sock_mc_close(struct sock *sk) spin_lock(&ipv6_sk_mc_lock); } spin_unlock(&ipv6_sk_mc_lock); + rtnl_unlock(); } int ip6_mc_source(int add, int omode, struct sock *sk, @@ -845,6 +855,8 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) struct ifmcaddr6 *mc; struct inet6_dev *idev; + ASSERT_RTNL(); + /* we need to take a reference on idev */ idev = in6_dev_get(dev); @@ -916,6 +928,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifmcaddr6 *ma, **map; + ASSERT_RTNL(); + write_lock_bh(&idev->lock); for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) { if (ipv6_addr_equal(&ma->mca_addr, addr)) { -- cgit v1.1 From eed4d839b0cdf9d84b0a9bc63de90fd5e1e886fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 3 Sep 2014 14:12:55 +0200 Subject: l2tp: fix race while getting PMTU on PPP pseudo-wire Use dst_entry held by sk_dst_get() to retrieve tunnel's PMTU. The dst_mtu(__sk_dst_get(tunnel->sock)) call was racy. __sk_dst_get() could return NULL if tunnel->sock->sk_dst_cache was reset just before the call, thus making dst_mtu() dereference a NULL pointer: [ 1937.661598] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [ 1937.664005] IP: [] pppol2tp_connect+0x33d/0x41e [l2tp_ppp] [ 1937.664005] PGD daf0c067 PUD d9f93067 PMD 0 [ 1937.664005] Oops: 0000 [#1] SMP [ 1937.664005] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6table_filter ip6_tables iptable_filter ip_tables ebtable_nat ebtables x_tables udp_tunnel pppoe pppox ppp_generic slhc deflate ctr twofish_generic twofish_x86_64_3way xts lrw gf128mul glue_helper twofish_x86_64 twofish_common blowfish_generic blowfish_x86_64 blowfish_common des_generic cbc xcbc rmd160 sha512_generic hmac crypto_null af_key xfrm_algo 8021q garp bridge stp llc tun atmtcp clip atm ext3 mbcache jbd iTCO_wdt coretemp kvm_intel iTCO_vendor_support kvm pcspkr evdev ehci_pci lpc_ich mfd_core i5400_edac edac_core i5k_amb shpchp button processor thermal_sys xfs crc32c_generic libcrc32c dm_mod usbhid sg hid sr_mod sd_mod cdrom crc_t10dif crct10dif_common ata_generic ahci ata_piix tg3 libahci libata uhci_hcd ptp ehci_hcd pps_core usbcore scsi_mod libphy usb_common [last unloaded: l2tp_core] [ 1937.664005] CPU: 0 PID: 10022 Comm: l2tpstress Tainted: G O 3.17.0-rc1 #1 [ 1937.664005] Hardware name: HP ProLiant DL160 G5, BIOS O12 08/22/2008 [ 1937.664005] task: ffff8800d8fda790 ti: ffff8800c43c4000 task.ti: ffff8800c43c4000 [ 1937.664005] RIP: 0010:[] [] pppol2tp_connect+0x33d/0x41e [l2tp_ppp] [ 1937.664005] RSP: 0018:ffff8800c43c7de8 EFLAGS: 00010282 [ 1937.664005] RAX: ffff8800da8a7240 RBX: ffff8800d8c64600 RCX: 000001c325a137b5 [ 1937.664005] RDX: 8c6318c6318c6320 RSI: 000000000000010c RDI: 0000000000000000 [ 1937.664005] RBP: ffff8800c43c7ea8 R08: 0000000000000000 R09: 0000000000000000 [ 1937.664005] R10: ffffffffa048e2c0 R11: ffff8800d8c64600 R12: ffff8800ca7a5000 [ 1937.664005] R13: ffff8800c439bf40 R14: 000000000000000c R15: 0000000000000009 [ 1937.664005] FS: 00007fd7f610f700(0000) GS:ffff88011a600000(0000) knlGS:0000000000000000 [ 1937.664005] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1937.664005] CR2: 0000000000000020 CR3: 00000000d9d75000 CR4: 00000000000027e0 [ 1937.664005] Stack: [ 1937.664005] ffffffffa049da80 ffff8800d8fda790 000000000000005b ffff880000000009 [ 1937.664005] ffff8800daf3f200 0000000000000003 ffff8800c43c7e48 ffffffff81109b57 [ 1937.664005] ffffffff81109b0e ffffffff8114c566 0000000000000000 0000000000000000 [ 1937.664005] Call Trace: [ 1937.664005] [] ? pppol2tp_connect+0x235/0x41e [l2tp_ppp] [ 1937.664005] [] ? might_fault+0x9e/0xa5 [ 1937.664005] [] ? might_fault+0x55/0xa5 [ 1937.664005] [] ? rcu_read_unlock+0x1c/0x26 [ 1937.664005] [] SYSC_connect+0x87/0xb1 [ 1937.664005] [] ? sysret_check+0x1b/0x56 [ 1937.664005] [] ? trace_hardirqs_on_caller+0x145/0x1a1 [ 1937.664005] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 1937.664005] [] ? spin_lock+0x9/0xb [ 1937.664005] [] SyS_connect+0x9/0xb [ 1937.664005] [] system_call_fastpath+0x16/0x1b [ 1937.664005] Code: 10 2a 84 81 e8 65 76 bd e0 65 ff 0c 25 10 bb 00 00 4d 85 ed 74 37 48 8b 85 60 ff ff ff 48 8b 80 88 01 00 00 48 8b b8 10 02 00 00 <48> 8b 47 20 ff 50 20 85 c0 74 0f 83 e8 28 89 83 10 01 00 00 89 [ 1937.664005] RIP [] pppol2tp_connect+0x33d/0x41e [l2tp_ppp] [ 1937.664005] RSP [ 1937.664005] CR2: 0000000000000020 [ 1939.559375] ---[ end trace 82d44500f28f8708 ]--- Fixes: f34c4a35d879 ("l2tp: take PMTU from tunnel UDP socket") Signed-off-by: Guillaume Nault Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 13752d9..b704a93 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -755,7 +755,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, /* If PMTU discovery was enabled, use the MTU that was discovered */ dst = sk_dst_get(tunnel->sock); if (dst != NULL) { - u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock)); + u32 pmtu = dst_mtu(dst); + if (pmtu != 0) session->mtu = session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD; -- cgit v1.1 From c199105d154e029cd8c94cccd35bd073e64acc45 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Sep 2014 12:01:18 -0400 Subject: net-timestamp: only report sw timestamp if reporting bit is set The timestamping API has separate bits for generating and reporting timestamps. A software timestamp should only be reported for a packet when the packet has the relevant generation flag (SKBTX_..) set and the socket has reporting bit SOF_TIMESTAMPING_SOFTWARE set. The second check was accidentally removed. Reinstitute the original behavior. Tested: Without this patch, Documentation/networking/txtimestamp reports timestamps regardless of whether SOF_TIMESTAMPING_SOFTWARE is set. After the patch, it only reports them when the flag is set. Fixes: f24b9be5957b ("net-timestamp: extend SCM_TIMESTAMPING ancillary data struct") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 95ee7d8..4eb09b3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -734,8 +734,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, } memset(&tss, 0, sizeof(tss)); - if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE || - skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) && + if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && -- cgit v1.1 From f24062b07dda89b0e24fa48e7bc3865a725f5ee6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 3 Sep 2014 23:59:21 +0200 Subject: ipv6: fix a refcnt leak with peer addr There is no reason to take a refcnt before deleting the peer address route. It's done some lines below for the local prefix route because inet6_ifa_finish_destroy() will release it at the end. For the peer address route, we want to free it right now. This bug has been introduced by commit caeaba79009c ("ipv6: add support of peer address"). Signed-off-by: Nicolas Dichtel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index aa0e135..ce761c7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4772,11 +4772,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL, dev->ifindex, 1); - if (rt) { - dst_hold(&rt->dst); - if (ip6_del_rt(rt)) - dst_free(&rt->dst); - } + if (rt && ip6_del_rt(rt)) + dst_free(&rt->dst); } dst_hold(&ifp->rt->dst); -- cgit v1.1 From e7478dfc4656f4a739ed1b07cfd59c12f8eb112e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 3 Sep 2014 23:59:22 +0200 Subject: ipv6: use addrconf_get_prefix_route() to remove peer addr addrconf_get_prefix_route() ensures to get the right route in the right table. Signed-off-by: Nicolas Dichtel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ce761c7..fc1fac2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4768,10 +4768,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_solict(ifp->idev, &ifp->addr); if (!ipv6_addr_any(&ifp->peer_addr)) { struct rt6_info *rt; - struct net_device *dev = ifp->idev->dev; - rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL, - dev->ifindex, 1); + rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, + ifp->idev->dev, 0, 0); if (rt && ip6_del_rt(rt)) dst_free(&rt->dst); } -- cgit v1.1 From 84a59ca55f699d1d1fbfffd75445bcfe0c3daf06 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Sep 2014 09:47:21 +0200 Subject: netfilter: add explicit Kconfig for NETFILTER_XT_NAT Paul Bolle reports that 'select NETFILTER_XT_NAT' from the IPV4 and IPV6 NAT tables becomes noop since there is no Kconfig switch for it. Add the Kconfig switch to resolve this problem. Fixes: 8993cf8 netfilter: move NAT Kconfig switches out of the iptables scope Reported-by: Paul Bolle Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netfilter/Kconfig | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4bef6eb..b5c1d3a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -766,6 +766,14 @@ config NETFILTER_XT_TARGET_MARK (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). +config NETFILTER_XT_NAT + tristate '"SNAT and DNAT" targets support' + depends on NF_NAT + ---help--- + This option enables the SNAT and DNAT targets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NETMAP tristate '"NETMAP" target support' depends on NF_NAT -- cgit v1.1 From e793c0f70e9bdf4a2e71c151a1a3cf85c4db92ad Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 4 Sep 2014 23:44:36 +0900 Subject: net: treewide: Fix typo found in DocBook/networking.xml This patch fix spelling typo found in DocBook/networking.xml. It is because the neworking.xml is generated from comments in the source, I have to fix typo in comments within the source. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- net/core/gen_estimator.c | 2 +- net/core/gen_stats.c | 2 +- net/core/skbuff.c | 4 ++-- net/core/sock.c | 4 ++-- net/socket.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 488dd1a..fdbc9a8 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -775,7 +775,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) EXPORT_SYMBOL(__skb_checksum_complete); /** - * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. + * skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec. * @skb: skbuff * @hlen: hardware length * @iov: io vector diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 6b5b6e7..9d33dff 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -197,7 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * as destination. A new timer with the interval specified in the * configuration TLV is created. Upon each interval, the latest statistics * will be read from &bstats and the estimated rate will be stored in - * &rate_est with the statistics lock grabed during this period. + * &rate_est with the statistics lock grabbed during this period. * * Returns 0 on success or a negative error code. * diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 9d3d9e7..2ddbce4 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -206,7 +206,7 @@ EXPORT_SYMBOL(gnet_stats_copy_queue); * @st: application specific statistics data * @len: length of data * - * Appends the application sepecific statistics to the top level TLV created by + * Appends the application specific statistics to the top level TLV created by * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping * handle is in backward compatibility mode. * diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 163b673..da1378a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2647,7 +2647,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read); * skb_seq_read() will return the remaining part of the block. * * Note 1: The size of each block of data returned can be arbitrary, - * this limitation is the cost for zerocopy seqeuental + * this limitation is the cost for zerocopy sequential * reads of potentially non linear data. * * Note 2: Fragment lists within fragments are not implemented @@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(skb_find_text); /** * skb_append_datato_frags - append the user data to a skb * @sk: sock structure - * @skb: skb structure to be appened with user data. + * @skb: skb structure to be appended with user data. * @getfrag: call back function to be used for getting the user data * @from: pointer to user message iov * @length: length of the iov message diff --git a/net/core/sock.c b/net/core/sock.c index 2987057..d372b4b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -166,7 +166,7 @@ EXPORT_SYMBOL(sk_ns_capable); /** * sk_capable - Socket global capability test * @sk: Socket to use a capability on or through - * @cap: The global capbility to use + * @cap: The global capability to use * * Test to see if the opener of the socket had when the socket was * created and the current process has the capability @cap in all user @@ -183,7 +183,7 @@ EXPORT_SYMBOL(sk_capable); * @sk: Socket to use a capability on or through * @cap: The capability to use * - * Test to see if the opener of the socket had when the socke was created + * Test to see if the opener of the socket had when the socket was created * and the current process has the capability @cap over the network namespace * the socket is a member of. */ diff --git a/net/socket.c b/net/socket.c index 4eb09b3..2e2586e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2601,7 +2601,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) * * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the - * socket interface. The value ops->family coresponds to the + * socket interface. The value ops->family corresponds to the * socket system call protocol family. */ int sock_register(const struct net_proto_family *ops) -- cgit v1.1 From de185ab46cb02df9738b0d898b0c3a89181c5526 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 5 Sep 2014 14:33:00 -0700 Subject: ipv6: restore the behavior of ipv6_sock_ac_drop() It is possible that the interface is already gone after joining the list of anycast on this interface as we don't hold a refcount for the device, in this case we are safe to ignore the error. What's more important, for API compatibility we should not change this behavior for applications even if it were correct. Fixes: commit a9ed4a2986e13011 ("ipv6: fix rtnl locking in setsockopt for anycast and multicast") Cc: Sabrina Dubroca Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/anycast.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 45b9d81..ff2de7d 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -182,8 +182,6 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); - if (!dev) - return -ENODEV; return 0; } -- cgit v1.1 From 82d5e2b8b466d5bfc7c6278a7c04a53b9b287673 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Sep 2014 04:00:00 -0700 Subject: net: fix skb_page_frag_refill() kerneldoc In commit d9b2938aabf7 ("net: attempt a single high order allocation) I forgot to update kerneldoc, as @prio parameter was renamed to @gfp Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index d372b4b..9c3f823 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1866,7 +1866,7 @@ EXPORT_SYMBOL(sock_alloc_send_skb); * skb_page_frag_refill - check that a page_frag contains enough room * @sz: minimum size of the fragment we want to get * @pfrag: pointer to page_frag - * @prio: priority for memory allocation + * @gfp: priority for memory allocation * * Note: While this allocator tries to use high order pages, there is * no guarantee that allocations succeed. Therefore, @sz MUST be -- cgit v1.1 From 6a2a2b3ae0759843b22c929881cc184b00cc63ff Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 8 Sep 2014 14:49:59 -0700 Subject: net:socket: set msg_namelen to 0 if msg_name is passed as NULL in msghdr struct from userland. Linux manpage for recvmsg and sendmsg calls does not explicitly mention setting msg_namelen to 0 when msg_name passed set as NULL. When developers don't set msg_namelen member in msghdr, it might contain garbage value which will fail the validation check and sendmsg and recvmsg calls from kernel will return EINVAL. This will break old binaries and any code for which there is no access to source code. To fix this, we set msg_namelen to 0 when msg_name is passed as NULL from userland. Signed-off-by: Ani Sinha Signed-off-by: David S. Miller --- net/socket.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 2e2586e..4cdbc10 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1996,6 +1996,9 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; + if (kmsg->msg_name == NULL) + kmsg->msg_namelen = 0; + if (kmsg->msg_namelen < 0) return -EINVAL; -- cgit v1.1 From ed3bfdfdced76aa7edb0b05c0d739ee3a2a6e619 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Sep 2014 15:19:44 +0100 Subject: RxRPC: Fix missing __user annotation Fix a missing __user annotation in a cast of a user space pointer (found by checker). Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index b45d080..1b24191 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -1143,7 +1143,7 @@ static long rxrpc_read(const struct key *key, if (copy_to_user(xdr, (s), _l) != 0) \ goto fault; \ if (_l & 3 && \ - copy_to_user((u8 *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \ + copy_to_user((u8 __user *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \ goto fault; \ xdr += (_l + 3) >> 2; \ } while(0) -- cgit v1.1 From 73c3d4812b4c755efeca0140f606f83772a39ce4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 4 Aug 2014 07:01:54 -0700 Subject: libceph: gracefully handle large reply messages from the mon We preallocate a few of the message types we get back from the mon. If we get a larger message than we are expecting, fall back to trying to allocate a new one instead of blindly using the one we have. CC: stable@vger.kernel.org Signed-off-by: Sage Weil Reviewed-by: Ilya Dryomov --- net/ceph/mon_client.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 067d3af..61fcfc3 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1181,7 +1181,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, if (!m) { pr_info("alloc_msg unknown type %d\n", type); *skip = 1; + } else if (front_len > m->front_alloc_len) { + pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n", + front_len, m->front_alloc_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); + ceph_msg_put(m); + m = ceph_msg_new(type, front_len, GFP_NOFS, false); } + return m; } -- cgit v1.1 From 597cda357716a3cf8d994cb11927af917c8d71fa Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 8 Sep 2014 17:25:34 +0400 Subject: libceph: add process_one_ticket() helper Add a helper for processing individual cephx auth tickets. Needed for the next commit, which deals with allocating ticket buffers. (Most of the diff here is whitespace - view with git diff -b). Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth_x.c | 228 +++++++++++++++++++++++++++++------------------------- 1 file changed, 124 insertions(+), 104 deletions(-) (limited to 'net') diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 96238ba..0eb146d 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -129,17 +129,131 @@ static void remove_ticket_handler(struct ceph_auth_client *ac, kfree(th); } +static int process_one_ticket(struct ceph_auth_client *ac, + struct ceph_crypto_key *secret, + void **p, void *end, + void *dbuf, void *ticket_buf) +{ + struct ceph_x_info *xi = ac->private; + int type; + u8 tkt_struct_v, blob_struct_v; + struct ceph_x_ticket_handler *th; + void *dp, *dend; + int dlen; + char is_enc; + struct timespec validity; + struct ceph_crypto_key old_key; + void *tp, *tpend; + struct ceph_timespec new_validity; + struct ceph_crypto_key new_session_key; + struct ceph_buffer *new_ticket_blob; + unsigned long new_expires, new_renew_after; + u64 new_secret_id; + int ret; + + ceph_decode_need(p, end, sizeof(u32) + 1, bad); + + type = ceph_decode_32(p); + dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); + + tkt_struct_v = ceph_decode_8(p); + if (tkt_struct_v != 1) + goto bad; + + th = get_ticket_handler(ac, type); + if (IS_ERR(th)) { + ret = PTR_ERR(th); + goto out; + } + + /* blob for me */ + dlen = ceph_x_decrypt(secret, p, end, dbuf, + TEMP_TICKET_BUF_LEN); + if (dlen <= 0) { + ret = dlen; + goto out; + } + dout(" decrypted %d bytes\n", dlen); + dp = dbuf; + dend = dp + dlen; + + tkt_struct_v = ceph_decode_8(&dp); + if (tkt_struct_v != 1) + goto bad; + + memcpy(&old_key, &th->session_key, sizeof(old_key)); + ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); + if (ret) + goto out; + + ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); + ceph_decode_timespec(&validity, &new_validity); + new_expires = get_seconds() + validity.tv_sec; + new_renew_after = new_expires - (validity.tv_sec / 4); + dout(" expires=%lu renew_after=%lu\n", new_expires, + new_renew_after); + + /* ticket blob for service */ + ceph_decode_8_safe(p, end, is_enc, bad); + tp = ticket_buf; + if (is_enc) { + /* encrypted */ + dout(" encrypted ticket\n"); + dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf, + TEMP_TICKET_BUF_LEN); + if (dlen < 0) { + ret = dlen; + goto out; + } + dlen = ceph_decode_32(&tp); + } else { + /* unencrypted */ + ceph_decode_32_safe(p, end, dlen, bad); + ceph_decode_need(p, end, dlen, bad); + ceph_decode_copy(p, ticket_buf, dlen); + } + tpend = tp + dlen; + dout(" ticket blob is %d bytes\n", dlen); + ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); + blob_struct_v = ceph_decode_8(&tp); + new_secret_id = ceph_decode_64(&tp); + ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); + if (ret) + goto out; + + /* all is well, update our ticket */ + ceph_crypto_key_destroy(&th->session_key); + if (th->ticket_blob) + ceph_buffer_put(th->ticket_blob); + th->session_key = new_session_key; + th->ticket_blob = new_ticket_blob; + th->validity = new_validity; + th->secret_id = new_secret_id; + th->expires = new_expires; + th->renew_after = new_renew_after; + dout(" got ticket service %d (%s) secret_id %lld len %d\n", + type, ceph_entity_type_name(type), th->secret_id, + (int)th->ticket_blob->vec.iov_len); + xi->have_keys |= th->service; + +out: + return ret; + +bad: + ret = -EINVAL; + goto out; +} + static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, struct ceph_crypto_key *secret, void *buf, void *end) { - struct ceph_x_info *xi = ac->private; - int num; void *p = buf; - int ret; char *dbuf; char *ticket_buf; u8 reply_struct_v; + u32 num; + int ret; dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); if (!dbuf) @@ -150,112 +264,18 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, if (!ticket_buf) goto out_dbuf; - ceph_decode_need(&p, end, 1 + sizeof(u32), bad); - reply_struct_v = ceph_decode_8(&p); + ceph_decode_8_safe(&p, end, reply_struct_v, bad); if (reply_struct_v != 1) - goto bad; - num = ceph_decode_32(&p); - dout("%d tickets\n", num); - while (num--) { - int type; - u8 tkt_struct_v, blob_struct_v; - struct ceph_x_ticket_handler *th; - void *dp, *dend; - int dlen; - char is_enc; - struct timespec validity; - struct ceph_crypto_key old_key; - void *tp, *tpend; - struct ceph_timespec new_validity; - struct ceph_crypto_key new_session_key; - struct ceph_buffer *new_ticket_blob; - unsigned long new_expires, new_renew_after; - u64 new_secret_id; - - ceph_decode_need(&p, end, sizeof(u32) + 1, bad); - - type = ceph_decode_32(&p); - dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); - - tkt_struct_v = ceph_decode_8(&p); - if (tkt_struct_v != 1) - goto bad; - - th = get_ticket_handler(ac, type); - if (IS_ERR(th)) { - ret = PTR_ERR(th); - goto out; - } - - /* blob for me */ - dlen = ceph_x_decrypt(secret, &p, end, dbuf, - TEMP_TICKET_BUF_LEN); - if (dlen <= 0) { - ret = dlen; - goto out; - } - dout(" decrypted %d bytes\n", dlen); - dend = dbuf + dlen; - dp = dbuf; - - tkt_struct_v = ceph_decode_8(&dp); - if (tkt_struct_v != 1) - goto bad; + return -EINVAL; - memcpy(&old_key, &th->session_key, sizeof(old_key)); - ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); - if (ret) - goto out; + ceph_decode_32_safe(&p, end, num, bad); + dout("%d tickets\n", num); - ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); - ceph_decode_timespec(&validity, &new_validity); - new_expires = get_seconds() + validity.tv_sec; - new_renew_after = new_expires - (validity.tv_sec / 4); - dout(" expires=%lu renew_after=%lu\n", new_expires, - new_renew_after); - - /* ticket blob for service */ - ceph_decode_8_safe(&p, end, is_enc, bad); - tp = ticket_buf; - if (is_enc) { - /* encrypted */ - dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf, - TEMP_TICKET_BUF_LEN); - if (dlen < 0) { - ret = dlen; - goto out; - } - dlen = ceph_decode_32(&tp); - } else { - /* unencrypted */ - ceph_decode_32_safe(&p, end, dlen, bad); - ceph_decode_need(&p, end, dlen, bad); - ceph_decode_copy(&p, ticket_buf, dlen); - } - tpend = tp + dlen; - dout(" ticket blob is %d bytes\n", dlen); - ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); - blob_struct_v = ceph_decode_8(&tp); - new_secret_id = ceph_decode_64(&tp); - ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); + while (num--) { + ret = process_one_ticket(ac, secret, &p, end, + dbuf, ticket_buf); if (ret) goto out; - - /* all is well, update our ticket */ - ceph_crypto_key_destroy(&th->session_key); - if (th->ticket_blob) - ceph_buffer_put(th->ticket_blob); - th->session_key = new_session_key; - th->ticket_blob = new_ticket_blob; - th->validity = new_validity; - th->secret_id = new_secret_id; - th->expires = new_expires; - th->renew_after = new_renew_after; - dout(" got ticket service %d (%s) secret_id %lld len %d\n", - type, ceph_entity_type_name(type), th->secret_id, - (int)th->ticket_blob->vec.iov_len); - xi->have_keys |= th->service; } ret = 0; -- cgit v1.1 From c27a3e4d667fdcad3db7b104f75659478e0c68d8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 9 Sep 2014 19:39:15 +0400 Subject: libceph: do not hard code max auth ticket len We hard code cephx auth ticket buffer size to 256 bytes. This isn't enough for any moderate setups and, in case tickets themselves are not encrypted, leads to buffer overflows (ceph_x_decrypt() errors out, but ceph_decode_copy() doesn't - it's just a memcpy() wrapper). Since the buffer is allocated dynamically anyway, allocated it a bit later, at the point where we know how much is going to be needed. Fixes: http://tracker.ceph.com/issues/8979 Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth_x.c | 64 +++++++++++++++++++++++++------------------------------ 1 file changed, 29 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 0eb146d..de6662b 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -13,8 +13,6 @@ #include "auth_x.h" #include "auth_x_protocol.h" -#define TEMP_TICKET_BUF_LEN 256 - static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); static int ceph_x_is_authenticated(struct ceph_auth_client *ac) @@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, } static int ceph_x_decrypt(struct ceph_crypto_key *secret, - void **p, void *end, void *obuf, size_t olen) + void **p, void *end, void **obuf, size_t olen) { struct ceph_x_encrypt_header head; size_t head_len = sizeof(head); @@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, return -EINVAL; dout("ceph_x_decrypt len %d\n", len); - ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen, - *p, len); + if (*obuf == NULL) { + *obuf = kmalloc(len, GFP_NOFS); + if (!*obuf) + return -ENOMEM; + olen = len; + } + + ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len); if (ret) return ret; if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) @@ -131,18 +135,19 @@ static void remove_ticket_handler(struct ceph_auth_client *ac, static int process_one_ticket(struct ceph_auth_client *ac, struct ceph_crypto_key *secret, - void **p, void *end, - void *dbuf, void *ticket_buf) + void **p, void *end) { struct ceph_x_info *xi = ac->private; int type; u8 tkt_struct_v, blob_struct_v; struct ceph_x_ticket_handler *th; + void *dbuf = NULL; void *dp, *dend; int dlen; char is_enc; struct timespec validity; struct ceph_crypto_key old_key; + void *ticket_buf = NULL; void *tp, *tpend; struct ceph_timespec new_validity; struct ceph_crypto_key new_session_key; @@ -167,8 +172,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, } /* blob for me */ - dlen = ceph_x_decrypt(secret, p, end, dbuf, - TEMP_TICKET_BUF_LEN); + dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0); if (dlen <= 0) { ret = dlen; goto out; @@ -195,20 +199,25 @@ static int process_one_ticket(struct ceph_auth_client *ac, /* ticket blob for service */ ceph_decode_8_safe(p, end, is_enc, bad); - tp = ticket_buf; if (is_enc) { /* encrypted */ dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf, - TEMP_TICKET_BUF_LEN); + dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0); if (dlen < 0) { ret = dlen; goto out; } + tp = ticket_buf; dlen = ceph_decode_32(&tp); } else { /* unencrypted */ ceph_decode_32_safe(p, end, dlen, bad); + ticket_buf = kmalloc(dlen, GFP_NOFS); + if (!ticket_buf) { + ret = -ENOMEM; + goto out; + } + tp = ticket_buf; ceph_decode_need(p, end, dlen, bad); ceph_decode_copy(p, ticket_buf, dlen); } @@ -237,6 +246,8 @@ static int process_one_ticket(struct ceph_auth_client *ac, xi->have_keys |= th->service; out: + kfree(ticket_buf); + kfree(dbuf); return ret; bad: @@ -249,21 +260,10 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, void *buf, void *end) { void *p = buf; - char *dbuf; - char *ticket_buf; u8 reply_struct_v; u32 num; int ret; - dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); - if (!dbuf) - return -ENOMEM; - - ret = -ENOMEM; - ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); - if (!ticket_buf) - goto out_dbuf; - ceph_decode_8_safe(&p, end, reply_struct_v, bad); if (reply_struct_v != 1) return -EINVAL; @@ -272,22 +272,15 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, dout("%d tickets\n", num); while (num--) { - ret = process_one_ticket(ac, secret, &p, end, - dbuf, ticket_buf); + ret = process_one_ticket(ac, secret, &p, end); if (ret) - goto out; + return ret; } - ret = 0; -out: - kfree(ticket_buf); -out_dbuf: - kfree(dbuf); - return ret; + return 0; bad: - ret = -EINVAL; - goto out; + return -EINVAL; } static int ceph_x_build_authorizer(struct ceph_auth_client *ac, @@ -603,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th; int ret = 0; struct ceph_x_authorize_reply reply; + void *preply = &reply; void *p = au->reply_buf; void *end = p + sizeof(au->reply_buf); th = get_ticket_handler(ac, au->service); if (IS_ERR(th)) return PTR_ERR(th); - ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); + ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply)); if (ret < 0) return ret; if (ret != sizeof(reply)) -- cgit v1.1 From 381f4dca48d23e155b936b86ccd3ff12f073cf0f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 10 Sep 2014 23:23:02 +0200 Subject: ipv6: clean up anycast when an interface is destroyed If we try to rmmod the driver for an interface while sockets with setsockopt(JOIN_ANYCAST) are alive, some refcounts aren't cleaned up and we get stuck on: unregister_netdevice: waiting for ens3 to become free. Usage count = 1 If we LEAVE_ANYCAST/close everything before rmmod'ing, there is no problem. We need to perform a cleanup similar to the one for multicast in addrconf_ifdown(how == 1). Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 8 +++++--- net/ipv6/anycast.c | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fc1fac2..3342ee6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3094,11 +3094,13 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_unlock_bh(&idev->lock); - /* Step 5: Discard multicast list */ - if (how) + /* Step 5: Discard anycast and multicast list */ + if (how) { + ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - else + } else { ipv6_mc_down(idev); + } idev->tstamp = jiffies; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index ff2de7d..9a38684 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -351,6 +351,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) return __ipv6_dev_ac_dec(idev, addr); } +void ipv6_ac_destroy_dev(struct inet6_dev *idev) +{ + struct ifacaddr6 *aca; + + write_lock_bh(&idev->lock); + while ((aca = idev->ac_list) != NULL) { + idev->ac_list = aca->aca_next; + write_unlock_bh(&idev->lock); + + addrconf_leave_solict(idev, &aca->aca_addr); + + dst_hold(&aca->aca_rt->dst); + ip6_del_rt(aca->aca_rt); + + aca_put(aca); + + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); +} + /* * check if the interface has this anycast address * called with rcu_read_lock() -- cgit v1.1 From 20adfa1a81af00bf2027644507ad4fa9cd2849cf Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 12 Sep 2014 16:26:16 -0400 Subject: bridge: Check if vlan filtering is enabled only once. The bridge code checks if vlan filtering is enabled on both ingress and egress. When the state flip happens, it is possible for the bridge to currently be forwarding packets and forwarding behavior becomes non-deterministic. Bridge may drop packets on some interfaces, but not others. This patch solves this by caching the filtered state of the packet into skb_cb on ingress. The skb_cb is guaranteed to not be over-written between the time packet entres bridge forwarding path and the time it leaves it. On egress, we can then check the cached state to see if we need to apply filtering information. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/bridge/br_private.h | 3 +++ net/bridge/br_vlan.c | 14 ++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 62a7fa2..b6c04cb 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -309,6 +309,9 @@ struct br_input_skb_cb { int igmp; int mrouters_only; #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + bool vlan_filtered; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index e1bcd65..f645197 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -125,7 +125,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) goto out; /* Vlan filter table must be configured at this point. The @@ -164,8 +165,10 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ - if (!br->vlan_enabled) + if (!br->vlan_enabled) { + BR_INPUT_SKB_CB(skb)->vlan_filtered = false; return true; + } /* If there are no vlan in the permitted list, all packets are * rejected. @@ -173,6 +176,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) goto drop; + BR_INPUT_SKB_CB(skb)->vlan_filtered = true; proto = br->vlan_proto; /* If vlan tx offload is disabled on bridge device and frame was @@ -251,7 +255,8 @@ bool br_allowed_egress(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; if (!v) @@ -270,7 +275,8 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) struct net_bridge *br = p->br; struct net_port_vlans *v; - if (!br->vlan_enabled) + /* If filtering was disabled at input, let it pass. */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; v = rcu_dereference(p->vlan_info); -- cgit v1.1 From 635126b7ca13a01d6322fbf7bdc5d1c738d26807 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 12 Sep 2014 16:26:17 -0400 Subject: bridge: Allow clearing of pvid and untagged bitmap Currently, it is possible to modify the vlan filter configuration to add pvid or untagged support. For example: bridge vlan add vid 10 dev eth0 bridge vlan add vid 10 dev eth0 untagged pvid The second statement will modify vlan 10 to include untagged and pvid configuration. However, it is currently impossible to go backwards bridge vlan add vid 10 dev eth0 untagged pvid bridge vlan add vid 10 dev eth0 Here nothing happens. This patch correct this so that any modifiers not supplied are removed from the configuration. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f645197..4b86738 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -27,9 +27,13 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) { if (flags & BRIDGE_VLAN_INFO_PVID) __vlan_add_pvid(v, vid); + else + __vlan_delete_pvid(v, vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) set_bit(vid, v->untagged_bitmap); + else + clear_bit(vid, v->untagged_bitmap); } static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) -- cgit v1.1 From 7ce64c79c4decdeb1afe0bf2f6ef834b382871d1 Mon Sep 17 00:00:00 2001 From: "Alexander Y. Fomichev" Date: Mon, 15 Sep 2014 14:22:35 +0400 Subject: net: fix creation adjacent device symlinks __netdev_adjacent_dev_insert may add adjust device of different net namespace, without proper check it leads to emergence of broken sysfs links from/to devices in another namespace. Fix: rewrite netdev_adjacent_is_neigh_list macro as a function, move net_eq check into netdev_adjacent_is_neigh_list. (thanks David) related to: 4c75431ac3520631f1d9e74aa88407e6374dbbc4 Signed-off-by: Alexander Fomichev Signed-off-by: David S. Miller --- net/core/dev.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ab9a165..cf8a95f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4809,9 +4809,14 @@ static void netdev_adjacent_sysfs_del(struct net_device *dev, sysfs_remove_link(&(dev->dev.kobj), linkname); } -#define netdev_adjacent_is_neigh_list(dev, dev_list) \ - (dev_list == &dev->adj_list.upper || \ - dev_list == &dev->adj_list.lower) +static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) +{ + return (dev_list == &dev->adj_list.upper || + dev_list == &dev->adj_list.lower) && + net_eq(dev_net(dev), dev_net(adj_dev)); +} static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, @@ -4841,7 +4846,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, pr_debug("dev_hold for %s, because of link added from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - if (netdev_adjacent_is_neigh_list(dev, dev_list)) { + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); if (ret) goto free_adj; @@ -4862,7 +4867,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, return 0; remove_symlinks: - if (netdev_adjacent_is_neigh_list(dev, dev_list)) + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: kfree(adj); @@ -4895,8 +4900,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (netdev_adjacent_is_neigh_list(dev, dev_list) && - net_eq(dev_net(dev),dev_net(adj_dev))) + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); -- cgit v1.1 From c095f248e63ada504dd90c90baae673ae10ee3fe Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 15 Sep 2014 15:24:26 -0400 Subject: bridge: Fix br_should_learn to check vlan_enabled As Toshiaki Makita pointed out, the BRIDGE_INPUT_SKB_CB will not be initialized in br_should_learn() as that function is called only from br_handle_local_finish(). That is an input handler for link-local ethernet traffic so it perfectly correct to check br->vlan_enabled here. Reported-by: Toshiaki Makita Fixes: 20adfa1 bridge: Check if vlan filtering is enabled only once. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 4b86738..3ba57fc 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -280,7 +280,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) struct net_port_vlans *v; /* If filtering was disabled at input, let it pass. */ - if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) + if (!br->vlan_enabled) return true; v = rcu_dereference(p->vlan_info); -- cgit v1.1 From f92ee61982d6da15a9e49664ecd6405a15a2ee56 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 16 Sep 2014 10:08:40 +0200 Subject: xfrm: Generate blackhole routes only from route lookup functions Currently we genarate a blackhole route route whenever we have matching policies but can not resolve the states. Here we assume that dst_output() is called to kill the balckholed packets. Unfortunately this assumption is not true in all cases, so it is possible that these packets leave the system unwanted. We fix this by generating blackhole routes only from the route lookup functions, here we can guarantee a call to dst_output() afterwards. Fixes: 2774c131b1d ("xfrm: Handle blackhole route creation via afinfo.") Reported-by: Konstantinos Kolelis Signed-off-by: Steffen Klassert --- net/ipv4/route.c | 6 +++--- net/ipv6/ip6_output.c | 4 ++-- net/xfrm/xfrm_policy.c | 18 +++++++++++++++++- 3 files changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index eaa4b00..173e7ea 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2265,9 +2265,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, return rt; if (flp4->flowi4_proto) - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, - flowi4_to_flowi(flp4), - sk, 0); + rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0); return rt; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 315a55d..0a3448b2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1009,7 +1009,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); @@ -1041,7 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index beeed60..7505674 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2138,7 +2138,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - return make_blackhole(net, family, dst_orig); + return ERR_PTR(-EREMOTE); } err = -EAGAIN; @@ -2195,6 +2195,22 @@ dropdst: } EXPORT_SYMBOL(xfrm_lookup); +/* Callers of xfrm_lookup_route() must ensure a call to dst_output(). + * Otherwise we may send out blackholed packets. + */ +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) +{ + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); + + if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) + return make_blackhole(net, dst_orig->ops->family, dst_orig); + + return dst; +} +EXPORT_SYMBOL(xfrm_lookup_route); + static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { -- cgit v1.1 From b8c203b2d2fc961bafd53b41d5396bbcdec55998 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 16 Sep 2014 10:08:49 +0200 Subject: xfrm: Generate queueing routes only from route lookup functions Currently we genarate a queueing route if we have matching policies but can not resolve the states and the sysctl xfrm_larval_drop is disabled. Here we assume that dst_output() is called to kill the queued packets. Unfortunately this assumption is not true in all cases, so it is possible that these packets leave the system unwanted. We fix this by generating queueing routes only from the route lookup functions, here we can guarantee a call to dst_output() afterwards. Fixes: a0073fe18e71 ("xfrm: Add a state resolution packet queue") Reported-by: Konstantinos Kolelis Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7505674..fdde51f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,6 +39,11 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 +struct xfrm_flo { + struct dst_entry *dst_orig; + u8 flags; +}; + static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; @@ -1877,13 +1882,14 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) } static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, - struct dst_entry *dst, + struct xfrm_flo *xflo, const struct flowi *fl, int num_xfrms, u16 family) { int err; struct net_device *dev; + struct dst_entry *dst; struct dst_entry *dst1; struct xfrm_dst *xdst; @@ -1891,9 +1897,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, if (IS_ERR(xdst)) return xdst; - if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) + if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || + net->xfrm.sysctl_larval_drop || + num_xfrms <= 0) return xdst; + dst = xflo->dst_orig; dst1 = &xdst->u.dst; dst_hold(dst); xdst->route = dst; @@ -1935,7 +1944,7 @@ static struct flow_cache_object * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { - struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, i, err, pol_dead; @@ -1976,7 +1985,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, goto make_dummy_bundle; } - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xflo->dst_orig); if (IS_ERR(new_xdst)) { err = PTR_ERR(new_xdst); if (err != -EAGAIN) @@ -2010,7 +2020,7 @@ make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); + xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); @@ -2104,13 +2114,18 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, } if (xdst == NULL) { + struct xfrm_flo xflo; + + xflo.dst_orig = dst_orig; + xflo.flags = flags; + /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, dst_orig); + xfrm_bundle_lookup, &xflo); if (flo == NULL) goto nopol; if (IS_ERR(flo)) { @@ -2202,7 +2217,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, struct sock *sk, int flags) { - struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, + flags | XFRM_LOOKUP_QUEUE); if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); @@ -2476,7 +2492,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) skb_dst_force(skb); - dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; -- cgit v1.1 From dda3b191eb6c5a56d443723dcb71ade60d97c04f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 12 Sep 2014 21:49:28 +0200 Subject: net: rfkill: gpio: Enable module auto-loading for ACPI based switches For the ACPI based switches the MODULE_DEVICE_TABLE is missing to export the entries for module auto-loading. Signed-off-by: Marcel Holtmann Signed-off-by: John W. Linville --- net/rfkill/rfkill-gpio.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 02a86a2..5fa54dd 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -163,6 +163,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = { { "LNV4752", RFKILL_TYPE_GPS }, { }, }; +MODULE_DEVICE_TABLE(acpi, rfkill_acpi_match); #endif static struct platform_driver rfkill_gpio_driver = { -- cgit v1.1 From 9b67aa4a82492f128adfccc63e61ab57c1ce1dfd Mon Sep 17 00:00:00 2001 From: Samuel Gauthier Date: Thu, 18 Sep 2014 10:31:04 +0200 Subject: openvswitch: restore OVS_FLOW_CMD_NEW notifications Since commit fb5d1e9e127a ("openvswitch: Build flow cmd netlink reply only if needed."), the new flows are not notified to the listeners of OVS_FLOW_MCGROUP. This commit fixes the problem by using the genl function, ie genl_has_listerners() instead of netlink_has_listeners(). Signed-off-by: Samuel Gauthier Signed-off-by: Nicolas Dichtel Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 91d66b7..64dc864 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -78,11 +78,12 @@ static const struct genl_multicast_group ovs_dp_vport_multicast_group = { /* Check if need to build a reply message. * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ -static bool ovs_must_notify(struct genl_info *info, - const struct genl_multicast_group *grp) +static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, + unsigned int group) { return info->nlhdr->nlmsg_flags & NLM_F_ECHO || - netlink_has_listeners(genl_info_net(info)->genl_sock, 0); + genl_has_listeners(family, genl_info_net(info)->genl_sock, + group); } static void ovs_notify(struct genl_family *family, @@ -763,7 +764,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act { struct sk_buff *skb; - if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) + if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) return NULL; skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); -- cgit v1.1 From 257117862634d89de33fec74858b1a0ba5ab444b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Sep 2014 08:02:05 -0700 Subject: net: sched: shrink struct qdisc_skb_cb to 28 bytes We cannot make struct qdisc_skb_cb bigger without impacting IPoIB, or increasing skb->cb[] size. Commit e0f31d849867 ("flow_keys: Record IP layer protocol in skb_flow_dissect()") broke IPoIB. Only current offender is sch_choke, and this one do not need an absolutely precise flow key. If we store 17 bytes of flow key, its more than enough. (Its the actual size of flow_keys if it was a packed structure, but we might add new fields at the end of it later) Signed-off-by: Eric Dumazet Fixes: e0f31d849867 ("flow_keys: Record IP layer protocol in skb_flow_dissect()") Signed-off-by: David S. Miller --- net/sched/sch_choke.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ed30e43..fb666d1 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -133,10 +133,16 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) --sch->q.qlen; } +/* private part of skb->cb[] that a qdisc is allowed to use + * is limited to QDISC_CB_PRIV_LEN bytes. + * As a flow key might be too large, we store a part of it only. + */ +#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3) + struct choke_skb_cb { u16 classid; u8 keys_valid; - struct flow_keys keys; + u8 keys[QDISC_CB_PRIV_LEN - 3]; }; static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) @@ -163,22 +169,26 @@ static u16 choke_get_classid(const struct sk_buff *skb) static bool choke_match_flow(struct sk_buff *skb1, struct sk_buff *skb2) { + struct flow_keys temp; + if (skb1->protocol != skb2->protocol) return false; if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; - skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys); + skb_flow_dissect(skb1, &temp); + memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; - skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys); + skb_flow_dissect(skb2, &temp); + memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN); } return !memcmp(&choke_skb_cb(skb1)->keys, &choke_skb_cb(skb2)->keys, - sizeof(struct flow_keys)); + CHOKE_K_LEN); } /* -- cgit v1.1 From a35165ca101695aa2cc5a6300ef69ae60be39a49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Sep 2014 10:38:16 -0700 Subject: ipv4: do not use this_cpu_ptr() in preemptible context this_cpu_ptr() in preemptible context is generally bad Sep 22 05:05:55 br kernel: [ 94.608310] BUG: using smp_processor_id() in preemptible [00000000] code: ip/2261 Sep 22 05:05:55 br kernel: [ 94.608316] caller is tunnel_dst_set.isra.28+0x20/0x60 [ip_tunnel] Sep 22 05:05:55 br kernel: [ 94.608319] CPU: 3 PID: 2261 Comm: ip Not tainted 3.17.0-rc5 #82 We can simply use raw_cpu_ptr(), as preemption is safe in these contexts. Should fix https://bugzilla.kernel.org/show_bug.cgi?id=84991 Signed-off-by: Eric Dumazet Reported-by: Joe Fixes: 9a4aa9af447f ("ipv4: Use percpu Cache route in IP tunnels") Acked-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index afed1aa..bd41dd1 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -79,10 +79,10 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst, idst->saddr = saddr; } -static void tunnel_dst_set(struct ip_tunnel *t, +static noinline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst, __be32 saddr) { - __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr); + __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); } static void tunnel_dst_reset(struct ip_tunnel *t) @@ -106,7 +106,7 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, struct dst_entry *dst; rcu_read_lock(); - idst = this_cpu_ptr(t->dst_cache); + idst = raw_cpu_ptr(t->dst_cache); dst = rcu_dereference(idst->dst); if (dst && !atomic_inc_not_zero(&dst->__refcnt)) dst = NULL; -- cgit v1.1