From a839e463e84a02c0ea65ff61504b56a83e193078 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 13 Apr 2015 18:27:35 +0200 Subject: mac80211: Fix mac80211.h docbook comments A couple of enums in mac80211.h became structures recently, but the comments didn't follow suit, leading to errors like: Error(.//include/net/mac80211.h:367): Cannot parse enum! Documentation/DocBook/Makefile:93: recipe for target 'Documentation/DocBook/80211.xml' failed make[1]: *** [Documentation/DocBook/80211.xml] Error 1 Makefile:1361: recipe for target 'mandocs' failed make: *** [mandocs] Error 2 Fix the comments comments accordingly. Added a couple of other small comment fixes while I was there to silence other recently-added docbook warnings. Reported-by: Jim Davis Signed-off-by: Jonathan Corbet Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b4bef11..38a5fd7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -354,7 +354,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +388,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -401,9 +401,10 @@ struct ieee80211_mlme_event { /** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @u: union holding the above two fields */ struct ieee80211_event { enum ieee80211_event_type type; -- cgit v1.1 From 17c18bf880b2464851e5a2bca86521affc46c97e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2015 15:25:43 +0100 Subject: mac80211: add TX fastpath In order to speed up mac80211's TX path, add the "fast-xmit" cache that will cache the data frame 802.11 header and other data to be able to build the frame more quickly. This cache is rebuilt when external triggers imply changes, but a lot of the checks done per packet today are simplified away to the check for the cache. There's also a more detailed description in the code. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 38a5fd7..9001bd6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1796,6 +1796,10 @@ struct ieee80211_txq { * the driver returns 1. This also forces the driver to advertise its * supported cipher suites. * + * @IEEE80211_HW_SUPPORT_FAST_XMIT: The driver/hardware supports fast-xmit, + * this currently requires only the ability to calculate the duration + * for frames. + * * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface * queue mapping in order to use different queues (not just one per AC) * for different virtual interfaces. See the doc section on HW queue @@ -1844,7 +1848,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_WANT_MONITOR_VIF = 1<<14, IEEE80211_HW_NO_AUTO_VIF = 1<<15, IEEE80211_HW_SW_CRYPTO_CONTROL = 1<<16, - /* free slots */ + IEEE80211_HW_SUPPORT_FAST_XMIT = 1<<17, IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_QUEUE_CONTROL = 1<<20, -- cgit v1.1 From 680a0daba74fed0bf30530c9b3e7e706cf29855f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Apr 2015 16:58:25 +0200 Subject: mac80211: allow drivers to support S/G If drivers want to support S/G (really just gather DMA on TX) then we can now easily support this on the fast-xmit path since it just needs to write to the ethernet header (and already has a check for that being possible.) However, disallow this on the regular TX path (which has to handle fragmentation, software crypto, etc.) by calling skb_linearize(). Also allow the related HIGHDMA since that's not interesting to the code in mac80211 at all anyway. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9001bd6..0af7464 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1942,8 +1942,8 @@ enum ieee80211_hw_flags { * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * * @netdev_features: netdev features to be set in each netdev created - * from this HW. Note only HW checksum features are currently - * compatible with mac80211. Other feature bits will be rejected. + * from this HW. Note that not all features are usable with mac80211, + * other features will be rejected during HW registration. * * @uapsd_queues: This bitmap is included in (re)association frame to indicate * for each access category if it is uAPSD trigger-enabled and delivery- -- cgit v1.1 From df1404650ccbfeb76a84f301f22316be0d00a864 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 22 Apr 2015 14:40:58 +0200 Subject: mac80211: remove support for IFF_PROMISC This support is essentially useless as typically networks are encrypted, frames will be filtered by hardware, and rate scaling will be done with the intended recipient in mind. For real monitoring of the network, the monitor mode support should be used instead. Removing it removes a lot of corner cases. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0af7464..4feb74da 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2507,10 +2507,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * stack. It is always safe to pass more frames than requested, * but this has negative impact on power consumption. * - * @FIF_PROMISC_IN_BSS: promiscuous mode within your BSS, - * think of the BSS as your network segment and then this corresponds - * to the regular ethernet device promiscuous mode. - * * @FIF_ALLMULTI: pass all multicast frames, this is used if requested * by the user or if the hardware is not capable of filtering by * multicast address. @@ -2527,8 +2523,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * mac80211 needs to do and the amount of CPU wakeups, so you should * honour this flag if possible. * - * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * @FIF_CONTROL: pass control frames (except for PS Poll) addressed to this + * station * * @FIF_OTHER_BSS: pass frames destined to other BSSes * @@ -2538,7 +2534,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { - FIF_PROMISC_IN_BSS = 1<<0, FIF_ALLMULTI = 1<<1, FIF_FCSFAIL = 1<<2, FIF_PLCPFAIL = 1<<3, -- cgit v1.1 From 6382246e895fa0ae5162de7c1e5566b9719bdd26 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Apr 2015 22:53:37 +0300 Subject: mac80211: notify the driver upon BAR Rx When we receive a BAR, this typically means that our peer doesn't hear our Block-Acks or that we can't hear its frames. Either way, it is a good indication that the link is in a bad condition. This is why it can serve as a probe to the driver. Use the event_callback callback for this. Since more events with the same data will be added in the feature, the structure that describes the data attached to the event is called in a generic name: ieee80211_ba_event. This also means that from now on, the event_callback can't sleep. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4feb74da..0c3983b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -337,10 +337,12 @@ enum ieee80211_bss_change { * enum ieee80211_event_type - event to be notified to the low level driver * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver. * @MLME_EVENT: event related to MLME + * @BAR_RX_EVENT: a BAR was received */ enum ieee80211_event_type { RSSI_EVENT, MLME_EVENT, + BAR_RX_EVENT, }; /** @@ -400,17 +402,31 @@ struct ieee80211_mlme_event { }; /** + * struct ieee80211_ba_event - data attached for BlockAck related events + * @sta: pointer to the &ieee80211_sta to which this event relates + * @tid: the tid + * @ssn: the starting sequence number + */ +struct ieee80211_ba_event { + struct ieee80211_sta *sta; + u16 tid; + u16 ssn; +}; + +/** * struct ieee80211_event - event to be sent to the driver * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT - * @u: union holding the above two fields + * @ba: relevant if &type is %BAR_RX_EVENT + * @u:union holding the fields above */ struct ieee80211_event { enum ieee80211_event_type type; union { struct ieee80211_rssi_event rssi; struct ieee80211_mlme_event mlme; + struct ieee80211_ba_event ba; } u; }; @@ -3001,7 +3017,7 @@ enum ieee80211_reconfig_type { * The callback can sleep. * @event_callback: Notify driver about any event in mac80211. See * &enum ieee80211_event_type for the different types. - * The callback can sleep. + * The callback must be atomic. * * @release_buffered_frames: Release buffered frames according to the given * parameters. In the case where the driver buffers some frames for -- cgit v1.1 From b497de63ad5dcdae999c14444c4e7f53fd60119c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Apr 2015 22:53:38 +0300 Subject: mac80211: notify the driver on reordering buffer timeout When frames time out in the reordering buffer, it is a good indication that something went wrong and the driver may want to know about that to take action or trigger debug flows. It is pointless to notify the driver about each frame that is released. Notify each time the timer fires. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0c3983b..3314298 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -338,11 +338,15 @@ enum ieee80211_bss_change { * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver. * @MLME_EVENT: event related to MLME * @BAR_RX_EVENT: a BAR was received + * @BA_FRAME_TIMEOUT: Frames were released from the reordering buffer because + * they timed out. This won't be called for each frame released, but only + * once each time the timeout triggers. */ enum ieee80211_event_type { RSSI_EVENT, MLME_EVENT, BAR_RX_EVENT, + BA_FRAME_TIMEOUT, }; /** @@ -405,7 +409,7 @@ struct ieee80211_mlme_event { * struct ieee80211_ba_event - data attached for BlockAck related events * @sta: pointer to the &ieee80211_sta to which this event relates * @tid: the tid - * @ssn: the starting sequence number + * @ssn: the starting sequence number (for %BAR_RX_EVENT) */ struct ieee80211_ba_event { struct ieee80211_sta *sta; @@ -418,7 +422,7 @@ struct ieee80211_ba_event { * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT - * @ba: relevant if &type is %BAR_RX_EVENT + * @ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT * @u:union holding the fields above */ struct ieee80211_event { -- cgit v1.1 From 4b32b5ad31a68a661f761c76dfd0d076636d3ae9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 28 Apr 2015 13:03:06 -0700 Subject: ipv6: Stop rt6_info from using inet_peer's metrics inet_peer is indexed by the dst address alone. However, the fib6 tree could have multiple routing entries (rt6_info) for the same dst. For example, 1. A /128 dst via multiple gateways. 2. A RTF_CACHE route cloned from a /128 route. In the above cases, all of them will share the same metrics and step on each other. This patch will steer away from inet_peer's metrics and use dst_cow_metrics_generic() for everything. Change Highlights: 1. Remove rt6_cow_metrics() which currently acquires metrics from inet_peer for DST_HOST route (i.e. /128 route). 2. Add rt6i_pmtu to take care of the pmtu update to avoid creating a full size metrics just to override the RTAX_MTU. 3. After (2), the RTF_CACHE route can also share the metrics with its dst.from route, by: dst_init_metrics(&cache_rt->dst, dst_metrics_ptr(cache_rt->dst.from), true); 4. Stop creating RTF_CACHE route by cloning another RTF_CACHE route. Instead, directly clone from rt->dst. [ Currently, cloning from another RTF_CACHE is only possible during rt6_do_redirect(). Also, the old clone is removed from the tree immediately after the new clone is added. ] In case of cloning from an older redirect RTF_CACHE, it should work as before. In case of cloning from an older pmtu RTF_CACHE, this patch will forget the pmtu and re-learn it (if there is any) from the redirected route. The _rt6i_peer and DST_METRICS_FORCE_OVERWRITE will be removed in the next cleanup patch. Signed-off-by: Martin KaFai Lau Reviewed-by: Hannes Frederic Sowa Cc: Steffen Klassert Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 20e80fa..7383a8c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -124,6 +124,7 @@ struct rt6_info { unsigned long _rt6i_peer; u32 rt6i_metric; + u32 rt6i_pmtu; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; u8 rt6i_protocol; @@ -189,15 +190,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } -static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - struct dst_entry *new = (struct dst_entry *) from; - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(new); - rt->dst.from = new; -} - static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. -- cgit v1.1 From afc4eef80c92b199357db3570d3c9c7631d699ff Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 28 Apr 2015 13:03:07 -0700 Subject: ipv6: Remove DST_METRICS_FORCE_OVERWRITE and _rt6i_peer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _rt6i_peer is no longer needed after the last patch, 'ipv6: Stop rt6_info from using inet_peer's metrics'. DST_METRICS_FORCE_OVERWRITE is added by commit e5fd387ad5b3 ("ipv6: do not overwrite inetpeer metrics prematurely"). Since inetpeer is no longer used for metrics, this bit is also not needed. Signed-off-by: Martin KaFai Lau Reviewed-by: Hannes Frederic Sowa Cc: Michal Kubeček Cc: Steffen Klassert Signed-off-by: David S. Miller --- include/net/dst.h | 6 ------ include/net/ip6_fib.h | 31 ------------------------------- 2 files changed, 37 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 0fb99a2..22aa93f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -109,7 +109,6 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL -#define DST_METRICS_FORCE_OVERWRITE 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) @@ -120,11 +119,6 @@ static inline bool dst_metrics_read_only(const struct dst_entry *dst) return dst->_metrics & DST_METRICS_READ_ONLY; } -static inline void dst_metrics_set_force_overwrite(struct dst_entry *dst) -{ - dst->_metrics |= DST_METRICS_FORCE_OVERWRITE; -} - void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); static inline void dst_destroy_metrics_generic(struct dst_entry *dst) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7383a8c..e000180 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -121,7 +121,6 @@ struct rt6_info { struct rt6key rt6i_prefsrc; struct inet6_dev *rt6i_idev; - unsigned long _rt6i_peer; u32 rt6i_metric; u32 rt6i_pmtu; @@ -130,36 +129,6 @@ struct rt6_info { u8 rt6i_protocol; }; -static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt) -{ - return inetpeer_ptr(rt->_rt6i_peer); -} - -static inline bool rt6_has_peer(struct rt6_info *rt) -{ - return inetpeer_ptr_is_peer(rt->_rt6i_peer); -} - -static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - __inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base) -{ - inetpeer_init_ptr(&rt->_rt6i_peer, base); -} - -static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort) -{ - inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer); -} - static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; -- cgit v1.1 From 4749c3ef854e3a5d3dd3cc0ccd2dcb7e05d583bd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Apr 2015 12:12:00 +0200 Subject: net: sched: remove TC_MUNGED bits Not used. pedit sets TC_MUNGED when packet content was altered, but all the core does is unset MUNGED again and then set OK2MUNGE. And the latter isn't tested anywhere. So lets remove both TC_MUNGED and TC_OK2MUNGE. Signed-off-by: Florian Westphal Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6d778ef..994b5a0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -755,8 +755,6 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, if (n) { n->tc_verd = SET_TC_VERD(n->tc_verd, 0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); } return n; } -- cgit v1.1 From 82a584b7cd366511a22e37675b029cf2fb58e291 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 29 Apr 2015 15:33:21 -0700 Subject: ipv6: Flow label state ranges This patch divides the IPv6 flow label space into two ranges: 0-7ffff is reserved for flow label manager, 80000-fffff will be used for creating auto flow labels (per RFC6438). This only affects how labels are set on transmit, it does not affect receive. This range split can be disbaled by systcl. Background: IPv6 flow labels have been an unmitigated disappointment thus far in the lifetime of IPv6. Support in HW devices to use them for ECMP is lacking, and OSes don't turn them on by default. If we had these we could get much better hashing in IPv6 networks without resorting to DPI, possibly eliminating some of the motivations to to define new encaps in UDP just for getting ECMP. Unfortunately, the initial specfications of IPv6 did not clarify how they are to be used. There has always been a vague concept that these can be used for ECMP, flow hashing, etc. and we do now have a good standard how to this in RFC6438. The problem is that flow labels can be either stateful or stateless (as in RFC6438), and we are presented with the possibility that a stateless label may collide with a stateful one. Attempts to split the flow label space were rejected in IETF. When we added support in Linux for RFC6438, we could not turn on flow labels by default due to this conflict. This patch splits the flow label space and should give us a path to enabling auto flow labels by default for all IPv6 packets. This is an API change so we need to consider compatibility with existing deployment. The stateful range is chosen to be the lower values in hopes that most uses would have chosen small numbers. Once we resolve the stateless/stateful issue, we can proceed to look at enabling RFC6438 flow labels by default (starting with scaled testing). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ipv6.h | 9 +++++++-- include/net/netns/ipv6.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eec8ad3..53d25ef 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -239,8 +239,10 @@ struct ip6_flowlabel { struct net *fl_net; }; -#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) -#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) +#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWLABEL_STATELESS_FLAG cpu_to_be32(0x00080000) + #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 @@ -719,6 +721,9 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, hash ^= hash >> 12; flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + + if (net->ipv6.sysctl.flowlabel_state_ranges) + flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; } return flowlabel; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index d2527bf..8d93544 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -34,6 +34,7 @@ struct netns_sysctl_ipv6 { int fwmark_reflect; int idgen_retries; int idgen_delay; + int flowlabel_state_ranges; }; struct netns_ipv6 { -- cgit v1.1 From 2f59e1ebaa7f762c8825871b5486b5f5b4fa952f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 May 2015 11:30:17 -0700 Subject: net: Add flow_keys digest Some users of flow keys (well just sch_choke now) need to pass flow_keys in skbuff cb, and use them for exact comparisons of flows so that skb->hash is not sufficient. In order to increase size of the flow_keys structure, we introduce another structure for the purpose of passing flow keys in skbuff cb. We limit this structure to sixteen bytes, and we will technically treat this as a digest of flow_keys struct hence its name flow_keys_digest. In the first incaranation we just copy the flow_keys structure up to 16 bytes-- this is the same information previously passed in the cb. In the future, we'll adapt this for larger flow_keys and could use something like SHA-1 over the whole flow_keys to improve the quality of the digest. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_keys.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index dc8fd81..6d6ef62 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -42,4 +42,20 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 u32 flow_hash_from_keys(struct flow_keys *keys); unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, __be16 protocol); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + #endif -- cgit v1.1 From 9afd85c9e4552b276e2f4cfefd622bdeeffbbf26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 2 May 2015 14:01:07 +0200 Subject: net: Export IGMP/MLD message validation code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this patch, the IGMP and MLD message validation functions are moved from the bridge code to IPv4/IPv6 multicast files. Some small refactoring was done to enhance readibility and to iron out some differences in behaviour between the IGMP and MLD parsing code (e.g. the skb-cloning of MLD messages is now only done if necessary, just like the IGMP part always did). Finally, these IGMP and MLD message validation functions are exported so that not only the bridge can use it but batman-adv later, too. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 80456f7..def59d3 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -142,6 +142,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); void addrconf_dad_failure(struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, -- cgit v1.1 From f5c4ae07992ca64d8628a11439c184baf5595e4b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2015 12:09:01 +0200 Subject: mac80211: make LED trigger names const This is just a code cleanup, make the LED trigger names const as they're not expected to be modified by drivers. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3314298..8a3a7d7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3486,14 +3486,15 @@ enum ieee80211_tpt_led_trigger_flags { }; #ifdef CONFIG_MAC80211_LEDS -char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); -char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, - unsigned int flags, - const struct ieee80211_tpt_blink *blink_table, - unsigned int blink_table_len); +const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); +const char * +__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len); #endif /** * ieee80211_get_tx_led_name - get name of TX LED @@ -3507,7 +3508,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_tx_led_name(hw); @@ -3528,7 +3529,7 @@ static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_rx_led_name(hw); @@ -3549,7 +3550,7 @@ static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_assoc_led_name(hw); @@ -3570,7 +3571,7 @@ static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_radio_led_name(hw); @@ -3591,7 +3592,7 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) * * Note: This function must be called before ieee80211_register_hw(). */ -static inline char * +static inline const char * ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len) -- cgit v1.1 From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 May 2015 21:34:46 -0700 Subject: tcp: provide SYN headers for passive connections This patch allows a server application to get the TCP SYN headers for its passive connections. This is useful if the server is doing fingerprinting of clients based on SYN packet contents. Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN. The first is used on a socket to enable saving the SYN headers for child connections. This can be set before or after the listen() call. The latter is used to retrieve the SYN headers for passive connections, if the parent listener has enabled TCP_SAVE_SYN. TCP_SAVED_SYN is read once, it frees the saved SYN headers. The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP headers. Original patch was written by Tom Herbert, I changed it to not hold a full skb (and associated dst and conntracking reference). We have used such patch for about 3 years at Google. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/request_sock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9f4265c..87935ca 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -64,6 +64,7 @@ struct request_sock { struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; + u32 *saved_syn; u32 secid; u32 peer_secid; }; @@ -77,7 +78,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) req->rsk_ops = ops; sock_hold(sk_listener); req->rsk_listener = sk_listener; - + req->saved_syn = NULL; /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() */ @@ -104,6 +105,7 @@ static inline void reqsk_free(struct request_sock *req) req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); + kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } -- cgit v1.1 From 9352c19f639354f093cb5457315c01bcb94aa82a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Apr 2015 18:12:41 +0200 Subject: mac80211: extend get_tkip_seq to all keys Extend the function to read the TKIP IV32/IV16 to read the IV/PN for all ciphers in order to allow drivers with full hardware crypto to properly support this. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 79 +++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8a3a7d7..47b39c2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1502,6 +1502,40 @@ struct ieee80211_key_conf { }; /** + * struct ieee80211_key_seq - key sequence counter + * + * @tkip: TKIP data, containing IV32 and IV16 in host byte order + * @ccmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_cmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_gmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @gcmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + */ +struct ieee80211_key_seq { + union { + struct { + u32 iv32; + u16 iv16; + } tkip; + struct { + u8 pn[6]; + } ccmp; + struct { + u8 pn[6]; + } aes_cmac; + struct { + u8 pn[6]; + } aes_gmac; + struct { + u8 pn[6]; + } gcmp; + }; +}; + +/** * struct ieee80211_cipher_scheme - cipher scheme * * This structure contains a cipher scheme information defining @@ -2836,9 +2870,9 @@ enum ieee80211_reconfig_type { * Returns zero if statistics are available. * The callback can sleep. * - * @get_tkip_seq: If your device implements TKIP encryption in hardware this - * callback should be provided to read the TKIP transmit IVs (both IV32 - * and IV16) for the given key from hardware. + * @get_key_seq: If your device implements encryption in hardware and does + * IV/PN assignment then this callback should be provided to read the + * IV/PN for the given key from hardware. * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this @@ -3237,8 +3271,9 @@ struct ieee80211_ops { struct ieee80211_vif *vif); int (*get_stats)(struct ieee80211_hw *hw, struct ieee80211_low_level_stats *stats); - void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, - u32 *iv32, u16 *iv16); + void (*get_key_seq)(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq); int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -4273,40 +4308,6 @@ void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, u8 *k1, u8 *k2); /** - * struct ieee80211_key_seq - key sequence counter - * - * @tkip: TKIP data, containing IV32 and IV16 in host byte order - * @ccmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_cmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_gmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @gcmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - */ -struct ieee80211_key_seq { - union { - struct { - u32 iv32; - u16 iv16; - } tkip; - struct { - u8 pn[6]; - } ccmp; - struct { - u8 pn[6]; - } aes_cmac; - struct { - u8 pn[6]; - } aes_gmac; - struct { - u8 pn[6]; - } gcmp; - }; -}; - -/** * ieee80211_get_key_tx_seq - get key TX sequence counter * * @keyconf: the parameter passed with the set key -- cgit v1.1 From a31cf1c69e89e0c2d5515b04aca313f1014a714d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Apr 2015 18:21:58 +0200 Subject: mac80211: extend get_key() to return PN for all ciphers For ciphers not supported by mac80211, the function currently doesn't return any PN data. Fix this by extending the driver's get_key_seq() a little more to allow moving arbitrary PN data. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 47b39c2..67e0df1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1501,6 +1501,8 @@ struct ieee80211_key_conf { u8 key[0]; }; +#define IEEE80211_MAX_PN_LEN 16 + /** * struct ieee80211_key_seq - key sequence counter * @@ -1513,6 +1515,7 @@ struct ieee80211_key_conf { * reverse order than in packet) * @gcmp: PN data, most significant byte first (big endian, * reverse order than in packet) + * @hw: data for HW-only (e.g. cipher scheme) keys */ struct ieee80211_key_seq { union { @@ -1532,6 +1535,10 @@ struct ieee80211_key_seq { struct { u8 pn[6]; } gcmp; + struct { + u8 seq[IEEE80211_MAX_PN_LEN]; + u8 seq_len; + } hw; }; }; -- cgit v1.1 From 06f207fc541862ba8902ceda0ddeade6ea6bce72 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 6 May 2015 16:28:31 +0300 Subject: cfg80211: change GO_CONCURRENT to IR_CONCURRENT for STA The GO_CONCURRENT regulatory definition can be extended to station interfaces requesting to IR as part of TDLS off-channel operations. Rename the GO_CONCURRENT flag to IR_CONCURRENT and allow the added use-case. Change internal users of GO_CONCURRENT to use the new definition. Signed-off-by: Arik Nemtsov Reviewed-by: Johannes Berg Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f8d6813..d63ecec 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -111,7 +111,7 @@ enum ieee80211_band { * This may be due to the driver or due to regulatory bandwidth * restrictions. * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY - * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted @@ -129,7 +129,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_80MHZ = 1<<7, IEEE80211_CHAN_NO_160MHZ = 1<<8, IEEE80211_CHAN_INDOOR_ONLY = 1<<9, - IEEE80211_CHAN_GO_CONCURRENT = 1<<10, + IEEE80211_CHAN_IR_CONCURRENT = 1<<10, IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, }; -- cgit v1.1 From 21c8fe9915276d923f8c1e43434fd6d37a3b9aef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 May 2015 14:26:24 -0700 Subject: tcp: adjust window probe timers to safer values With the advent of small rto timers in datacenter TCP, (ip route ... rto_min x), the following can happen : 1) Qdisc is full, transmit fails. TCP sets a timer based on icsk_rto to retry the transmit, without exponential backoff. With low icsk_rto, and lot of sockets, all cpus are servicing timer interrupts like crazy. Intent of the code was to retry with a timer between 200 (TCP_RTO_MIN) and 500ms (TCP_RESOURCE_PROBE_INTERVAL) 2) Receivers can send zero windows if they don't drain their receive queue. TCP sends zero window probes, based on icsk_rto current value, with exponential backoff. With /proc/sys/net/ipv4/tcp_retries2 being 15 (or even smaller in some cases), sender can abort in less than one or two minutes ! If receiver stops the sender, it obviously doesn't care of very tight rto. Probability of dropping the ACK reopening the window is not worth the risk. Lets change the base timer to be at least 200ms (TCP_RTO_MIN) for these events (but not normal RTO based retransmits) A followup patch adds a new SNMP counter, as it would have helped a lot diagnosing this issue. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d204f3..7a2248a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1043,14 +1043,31 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) return tp->is_cwnd_limited; } -static inline void tcp_check_probe_timer(struct sock *sk) +/* Something is really bad, we could not queue an additional packet, + * because qdisc is full or receiver sent a 0 window. + * We do not want to add fuel to the fire, or abort too early, + * so make sure the timer we arm now is at least 200ms in the future, + * regardless of current icsk_rto value (as it could be ~2ms) + */ +static inline unsigned long tcp_probe0_base(const struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); + return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN); +} - if (!tp->packets_out && !icsk->icsk_pending) +/* Variant of inet_csk_rto_backoff() used for zero window probes */ +static inline unsigned long tcp_probe0_when(const struct sock *sk, + unsigned long max_when) +{ + u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff; + + return (unsigned long)min_t(u64, when, max_when); +} + +static inline void tcp_check_probe_timer(struct sock *sk) +{ + if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - icsk->icsk_rto, TCP_RTO_MAX); + tcp_probe0_base(sk), TCP_RTO_MAX); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) -- cgit v1.1 From e520af48c7e5acae5f17f82a79ba7ab7cf156f3b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 May 2015 14:26:25 -0700 Subject: tcp: add TCPWinProbe and TCPKeepAlive SNMP counters Diagnosing problems related to Window Probes has been hard because we lack a counter. TCPWinProbe counts the number of ACK packets a sender has to send at regular intervals to make sure a reverse ACK packet opening back a window had not been lost. TCPKeepAlive counts the number of ACK packets sent to keep TCP flows alive (SO_KEEPALIVE) Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7a2248a..b8ea128 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -527,7 +527,7 @@ int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); -int tcp_write_wakeup(struct sock *); +int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); -- cgit v1.1 From 7a0877d4b438886b72be61632eaa774d13262f70 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 7 May 2015 11:02:49 +0200 Subject: netns: rename peernet2id() to peernet2id_alloc() In a following commit, a new function will be introduced to only lookup for a nsid (no allocation if the nsid doesn't exist). To avoid confusion, the existing function is renamed. Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f733656..6d1e2ea 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -271,7 +271,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst __initconst #endif -int peernet2id(struct net *net, struct net *peer); +int peernet2id_alloc(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { -- cgit v1.1 From 59324cf35aba5336b611074028777838a963d03b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 7 May 2015 11:02:53 +0200 Subject: netlink: allow to listen "all" netns More accurately, listen all netns that have a nsid assigned into the netns where the netlink socket is opened. For this purpose, a netlink socket option is added: NETLINK_LISTEN_ALL_NSID. When this option is set on a netlink socket, this socket will receive netlink notifications from all netns that have a nsid assigned into the netns where the socket has been opened. The nsid is sent to userland via an anscillary data. With this patch, a daemon needs only one socket to listen many netns. This is useful when the number of netns is high. Because 0 is a valid value for a nsid, the field nsid_is_set indicates if the field nsid is valid or not. skb->cb is initialized to 0 on skb allocation, thus we are sure that we will never send a nsid 0 by error to the userland. Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6d1e2ea..3f850ac 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -272,6 +272,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif int peernet2id_alloc(struct net *net, struct net *peer); +int peernet2id(struct net *net, struct net *peer); +bool peernet_has_id(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { -- cgit v1.1 From 80ba92fa1a92dea128283f69f55b02242e213650 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 May 2015 15:05:12 -0700 Subject: codel: add ce_threshold attribute For DCTCP or similar ECN based deployments on fabrics with shallow buffers, hosts are responsible for a good part of the buffering. This patch adds an optional ce_threshold to codel & fq_codel qdiscs, so that DCTCP can have feedback from queuing in the host. A DCTCP enabled egress port simply have a queue occupancy threshold above which ECT packets get CE mark. In codel language this translates to a sojourn time, so that one doesn't have to worry about bytes or bandwidth but delays. This makes the host an active participant in the health of the whole network. This also helps experimenting DCTCP in a setup without DCTCP compliant fabric. On following example, ce_threshold is set to 1ms, and we can see from 'ldelay xxx us' that TCP is not trying to go around the 5ms codel target. Queue has more capacity to absorb inelastic bursts (say from UDP traffic), as queues are maintained to an optimal level. lpaa23:~# ./tc -s -d qd sh dev eth1 qdisc mq 1: dev eth1 root Sent 87910654696 bytes 58065331 pkt (dropped 0, overlimits 0 requeues 42961) backlog 3108242b 364p requeues 42961 qdisc codel 8063: dev eth1 parent 1:1 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms Sent 7363778701 bytes 4863809 pkt (dropped 0, overlimits 0 requeues 5503) rate 2348Mbit 193919pps backlog 255866b 46p requeues 5503 count 0 lastcount 0 ldelay 1.0ms drop_next 0us maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 72384 qdisc codel 8064: dev eth1 parent 1:2 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms Sent 7636486190 bytes 5043942 pkt (dropped 0, overlimits 0 requeues 5186) rate 2319Mbit 191538pps backlog 207418b 64p requeues 5186 count 0 lastcount 0 ldelay 694us drop_next 0us maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 69873 qdisc codel 8065: dev eth1 parent 1:3 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms Sent 11569360142 bytes 7641602 pkt (dropped 0, overlimits 0 requeues 5554) rate 3041Mbit 251096pps backlog 210446b 59p requeues 5554 count 0 lastcount 0 ldelay 889us drop_next 0us maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 37780 ... Signed-off-by: Eric Dumazet Cc: Florian Westphal Cc: Daniel Borkmann Cc: Glenn Judd Cc: Nandita Dukkipati Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/codel.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/codel.h b/include/net/codel.h index aeee280..8c0f78f 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -7,7 +7,7 @@ * Copyright (C) 2011-2012 Kathleen Nichols * Copyright (C) 2011-2012 Van Jacobson * Copyright (C) 2012 Michael D. Taht - * Copyright (C) 2012 Eric Dumazet + * Copyright (C) 2012,2015 Eric Dumazet * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -119,11 +119,13 @@ static inline u32 codel_time_to_us(codel_time_t val) /** * struct codel_params - contains codel parameters * @target: target queue size (in time units) + * @ce_threshold: threshold for marking packets with ECN CE * @interval: width of moving time window * @ecn: is Explicit Congestion Notification enabled */ struct codel_params { codel_time_t target; + codel_time_t ce_threshold; codel_time_t interval; bool ecn; }; @@ -159,17 +161,22 @@ struct codel_vars { * @maxpacket: largest packet we've seen so far * @drop_count: temp count of dropped packets in dequeue() * ecn_mark: number of packets we ECN marked instead of dropping + * ce_mark: number of packets CE marked because sojourn time was above ce_threshold */ struct codel_stats { u32 maxpacket; u32 drop_count; u32 ecn_mark; + u32 ce_mark; }; +#define CODEL_DISABLED_THRESHOLD INT_MAX + static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); params->target = MS2TIME(5); + params->ce_threshold = CODEL_DISABLED_THRESHOLD; params->ecn = false; } @@ -350,6 +357,9 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, vars->rec_inv_sqrt); } end: + if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && + INET_ECN_set_ce(skb)) + stats->ce_mark++; return skb; } #endif -- cgit v1.1 From 11aa9c28b4209242a9de0a661a7b3405adb568a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:09:13 -0500 Subject: net: Pass kern from net_proto_family.create to sk_alloc In preparation for changing how struct net is refcounted on kernel sockets pass the knowledge that we are creating a kernel socket from sock_create_kern through to sk_alloc. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/af_vsock.h | 2 +- include/net/llc_conn.h | 2 +- include/net/sock.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 172632d..db639a4 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -74,7 +74,7 @@ void vsock_pending_work(struct work_struct *work); struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, - gfp_t priority, unsigned short type); + gfp_t priority, unsigned short type, int kern); /**** TRANSPORT ****/ diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 0134681..fe994d2 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -96,7 +96,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index 3a4898e..d8dcf91 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1514,7 +1514,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void sk_free(struct sock *sk); void sk_release_kernel(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); -- cgit v1.1 From 26abe14379f8e2fa3fd1bcf97c9a7ad9364886fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:10:31 -0500 Subject: net: Modify sk_alloc to not reference count the netns of kernel sockets. Now that sk_alloc knows when a kernel socket is being allocated modify it to not reference count the network namespace of kernel sockets. Keep track of if a socket needs reference counting by adding a flag to struct sock called sk_net_refcnt. Update all of the callers of sock_create_kern to stop using sk_change_net and sk_release_kernel as those hacks are no longer needed, to avoid reference counting a kernel socket. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/inet_common.h | 2 +- include/net/sock.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 4a92423..279f835 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sk_release_kernel(sk); + sock_release(sk->sk_socket); } #endif diff --git a/include/net/sock.h b/include/net/sock.h index d8dcf91..9e6b2c0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -184,6 +184,7 @@ struct sock_common { unsigned char skc_reuse:4; unsigned char skc_reuseport:1; unsigned char skc_ipv6only:1; + unsigned char skc_net_refcnt:1; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -323,6 +324,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_reuseport __sk_common.skc_reuseport #define sk_ipv6only __sk_common.skc_ipv6only +#define sk_net_refcnt __sk_common.skc_net_refcnt #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot -- cgit v1.1 From affb9792f1d99e1e4d64411e147b648d65f2576e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:12:13 -0500 Subject: net: kill sk_change_net and sk_release_kernel These functions are no longer needed and no longer used kill them. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 9e6b2c0..d882f4c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1518,7 +1518,6 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); -void sk_release_kernel(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, @@ -2194,22 +2193,6 @@ void sock_net_set(struct sock *sk, struct net *net) write_pnet(&sk->sk_net, net); } -/* - * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. - * They should not hold a reference to a namespace in order to allow - * to stop it. - * Sockets after sk_change_net should be released using sk_release_kernel - */ -static inline void sk_change_net(struct sock *sk, struct net *net) -{ - struct net *current_net = sock_net(sk); - - if (!net_eq(current_net, net)) { - put_net(current_net); - sock_net_set(sk, net); - } -} - static inline struct sock *skb_steal_sock(struct sk_buff *skb) { if (skb->sk) { -- cgit v1.1 From 6791e4661c4bd3e9f193a84247f2c389578a4336 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 9 May 2015 00:01:55 -0700 Subject: bonding: Allow userspace to set actors' system_priority in AD system This patch allows user to randomize the system-priority in an ad-system. The allowed range is 1 - 0xFFFF while default value is 0xFFFF. If user does not specify this value, the system defaults to 0xFFFF, which is what it was before this patch. Following example code could set the value - # modprobe bonding mode=4 # sys_prio=$(( 1 + RANDOM + RANDOM )) # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar Reviewed-by: Nikolay Aleksandrov [jt: * fixed up style issues reported by checkpatch * changed how the default value is set in bond_check_params(), this makes the default consistent between what gets set for a new bond and what the default is claimed to be in the bonding options.] Signed-off-by: Jonathan Toppins Signed-off-by: David S. Miller --- include/net/bond_options.h | 1 + include/net/bonding.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index ea6546d..894002a 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -63,6 +63,7 @@ enum { BOND_OPT_LP_INTERVAL, BOND_OPT_SLAVES, BOND_OPT_TLB_DYNAMIC_LB, + BOND_OPT_AD_ACTOR_SYS_PRIO, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 78ed135..405cf87 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -136,6 +136,7 @@ struct bond_params { int packets_per_slave; int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; + u16 ad_actor_sys_prio; }; struct bond_parm_tbl { -- cgit v1.1 From 74514957552edd4661a4608618121f3c71d4e891 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 9 May 2015 00:01:56 -0700 Subject: bonding: Allow userspace to set actors' macaddr in an AD-system. In an AD system, the communication between actor and partner is the business between these two entities. In the current setup anyone on the same L2 can "guess" the LACPDU contents and then possibly send the spoofed LACPDUs and trick the partner causing connectivity issues for the AD system. This patch allows to use a random mac-address obscuring it's identity making it harder for someone in the L2 is do the same thing. This patch allows user-space to choose the mac-address for the AD-system. This mac-address can not be NULL or a Multicast. If the mac-address is set from user-space; kernel will honor it and will not overwrite it. In the absence (value from user space); the logic will default to using the masters' mac as the mac-address for the AD-system. It can be set using example code below - # modprobe bonding mode=4 # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ $(( (RANDOM & 0xFE) | 0x02 )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF ))) # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar Reviewed-by: Nikolay Aleksandrov [jt: fixed up style issues reported by checkpatch] Signed-off-by: Jonathan Toppins Signed-off-by: David S. Miller --- include/net/bond_options.h | 1 + include/net/bonding.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 894002a..eeeefa1 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -64,6 +64,7 @@ enum { BOND_OPT_SLAVES, BOND_OPT_TLB_DYNAMIC_LB, BOND_OPT_AD_ACTOR_SYS_PRIO, + BOND_OPT_AD_ACTOR_SYSTEM, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 405cf87..650f386 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -137,6 +137,7 @@ struct bond_params { int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; + u8 ad_actor_system[ETH_ALEN]; }; struct bond_parm_tbl { -- cgit v1.1 From d22a5fc0c32edcf5c3bb973ee8c9a2606ba500a8 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 9 May 2015 00:01:57 -0700 Subject: bonding: Implement user key part of port_key in an AD system. The port key has three components - user-key, speed-part, and duplex-part. The LSBit is for the duplex-part, next 5 bits are for the speed while the remaining 10 bits are the user defined key bits. Get these 10 bits from the user-space (through the SysFs interface) and use it to form the admin port-key. Allowed range for the user-key is 0 - 1023 (10 bits). If it is not provided then use zero for the user-key-bits (default). It can set using following example code - # modprobe bonding mode=4 # usr_port_key=$(( RANDOM & 0x3FF )) # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar Reviewed-by: Nikolay Aleksandrov [jt: * fixed up style issues reported by checkpatch * fixed up context from change in ad_actor_sys_prio patch] Signed-off-by: Jonathan Toppins Signed-off-by: David S. Miller --- include/net/bond_options.h | 1 + include/net/bonding.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index eeeefa1..c28aca2 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -65,6 +65,7 @@ enum { BOND_OPT_TLB_DYNAMIC_LB, BOND_OPT_AD_ACTOR_SYS_PRIO, BOND_OPT_AD_ACTOR_SYSTEM, + BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 650f386..20defc0 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -137,6 +137,7 @@ struct bond_params { int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; + u16 ad_user_port_key; u8 ad_actor_system[ETH_ALEN]; }; -- cgit v1.1 From b396cca6fafccf16206a5d041d59c9e6b65b6f5a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 May 2015 09:06:56 -0700 Subject: net: sched: deprecate enqueue_root() Only left enqueue_root() user is netem, and it looks not necessary : qdisc_skb_cb(skb)->pkt_len is preserved after one skb_clone() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 994b5a0..1b0a2e8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -501,12 +501,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) return sch->enqueue(skb, sch); } -static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) -{ - qdisc_skb_cb(skb)->pkt_len = skb->len; - return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; -} - static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; -- cgit v1.1 From ebb9a03a590e2325f747be43c8db450e92509501 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 May 2015 09:47:46 -0700 Subject: switchdev: s/netdev_switch_/switchdev_/ and s/NETDEV_SWITCH_/SWITCHDEV_/ Turned out that "switchdev" sticks. So just unify all related terms to use this prefix. Signed-off-by: Jiri Pirko Signed-off-by: Scott Feldman Acked-by: Roopa Prabhu Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/net/switchdev.h | 111 ++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 55 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d2e69ee..cd921fa 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -43,124 +43,125 @@ struct swdev_ops { u8 tos, u8 type, u32 tb_id); }; -enum netdev_switch_notifier_type { - NETDEV_SWITCH_FDB_ADD = 1, - NETDEV_SWITCH_FDB_DEL, +enum switchdev_notifier_type { + SWITCHDEV_FDB_ADD = 1, + SWITCHDEV_FDB_DEL, }; -struct netdev_switch_notifier_info { +struct switchdev_notifier_info { struct net_device *dev; }; -struct netdev_switch_notifier_fdb_info { - struct netdev_switch_notifier_info info; /* must be first */ +struct switchdev_notifier_fdb_info { + struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; }; static inline struct net_device * -netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info) +switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { return info->dev; } #ifdef CONFIG_NET_SWITCHDEV -int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid); -int netdev_switch_port_stp_update(struct net_device *dev, u8 state); -int register_netdev_switch_notifier(struct notifier_block *nb); -int unregister_netdev_switch_notifier(struct notifier_block *nb); -int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info); -int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void netdev_switch_fib_ipv4_abort(struct fib_info *fi); +int switchdev_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid); +int switchdev_port_stp_update(struct net_device *dev, u8 state); +int register_switchdev_notifier(struct notifier_block *nb); +int unregister_switchdev_notifier(struct notifier_block *nb); +int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info); +int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id); +int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); +void switchdev_fib_ipv4_abort(struct fib_info *fi); #else -static inline int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static inline int switchdev_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid) { return -EOPNOTSUPP; } -static inline int netdev_switch_port_stp_update(struct net_device *dev, - u8 state) +static inline int switchdev_port_stp_update(struct net_device *dev, + u8 state) { return -EOPNOTSUPP; } -static inline int register_netdev_switch_notifier(struct notifier_block *nb) +static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; } -static inline int unregister_netdev_switch_notifier(struct notifier_block *nb) +static inline int unregister_switchdev_notifier(struct notifier_block *nb) { return 0; } -static inline int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info) +static inline int call_switchdev_notifiers(unsigned long val, + struct net_device *dev, + struct switchdev_notifier_info *info) { return NOTIFY_DONE; } -static inline int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return -EOPNOTSUPP; } -static inline int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return -EOPNOTSUPP; } -static inline int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return 0; } -static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return 0; } -static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { return 0; } -static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { return 0; } -static inline void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) { } -- cgit v1.1 From 9d47c0a2d958e06322c88245749278633d333cca Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 May 2015 09:47:47 -0700 Subject: switchdev: s/swdev_/switchdev_/ Turned out that "switchdev" sticks. So just unify all related terms to use this prefix. Signed-off-by: Jiri Pirko Signed-off-by: Scott Feldman Acked-by: Roopa Prabhu Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/net/switchdev.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index cd921fa..97b556d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -19,28 +19,28 @@ struct fib_info; /** * struct switchdev_ops - switchdev operations * - * @swdev_parent_id_get: Called to get an ID of the switch chip this port + * @switchdev_parent_id_get: Called to get an ID of the switch chip this port * is part of. If driver implements this, it indicates that it * represents a port of a switch chip. * - * @swdev_port_stp_update: Called to notify switch device port of bridge + * @switchdev_port_stp_update: Called to notify switch device port of bridge * port STP state change. * - * @swdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. + * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. * - * @swdev_fib_ipv4_del: Called to delete IPv4 route from switch device. + * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. */ -struct swdev_ops { - int (*swdev_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*swdev_port_stp_update)(struct net_device *dev, u8 state); - int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, - u32 tb_id); - int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); +struct switchdev_ops { + int (*switchdev_parent_id_get)(struct net_device *dev, + struct netdev_phys_item_id *psid); + int (*switchdev_port_stp_update)(struct net_device *dev, u8 state); + int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, + u32 tb_id); + int (*switchdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); }; enum switchdev_notifier_type { -- cgit v1.1 From 3094333d9089d43e8b8f0418676fa6ae06c27b51 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:48 -0700 Subject: switchdev: introduce get/set attrs ops Add two new swdev ops for get/set switch port attributes. Most swdev interactions on a port are gets or sets on port attributes, so rather than adding ops for each attribute, let's define clean get/set ops for all attributes, and then we can have clear, consistent rules on how attributes propagate on stacked devs. Add the basic algorithms for get/set attr ops. Use the same recusive algo to walk lower devs we've used for STP updates, for example. For get, compare attr value for each lower dev and only return success if attr values match across all lower devs. For sets, set the same attr value for all lower devs. We'll use a two-phase prepare-commit transaction model for sets. In the first phase, the driver(s) are asked if attr set is OK. If all OK, the commit attr set in second phase. A driver would NACK the prepare phase if it can't set the attr due to lack of resources or support, within it's control. RTNL lock must be held across both phases because we'll recurse all lower devs first in prepare phase, and then recurse all lower devs again in commit phase. If any lower dev fails the prepare phase, we need to abort the transaction for all lower devs. If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to indicate get/set only work on port (lowest) device. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 97b556d..2820438 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -14,6 +14,25 @@ #include #include +#define SWITCHDEV_F_NO_RECURSE BIT(0) + +enum switchdev_trans { + SWITCHDEV_TRANS_NONE, + SWITCHDEV_TRANS_PREPARE, + SWITCHDEV_TRANS_ABORT, + SWITCHDEV_TRANS_COMMIT, +}; + +enum switchdev_attr_id { + SWITCHDEV_ATTR_UNDEFINED, +}; + +struct switchdev_attr { + enum switchdev_attr_id id; + enum switchdev_trans trans; + u32 flags; +}; + struct fib_info; /** @@ -23,6 +42,10 @@ struct fib_info; * is part of. If driver implements this, it indicates that it * represents a port of a switch chip. * + * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). + * + * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). + * * @switchdev_port_stp_update: Called to notify switch device port of bridge * port STP state change. * @@ -33,6 +56,10 @@ struct fib_info; struct switchdev_ops { int (*switchdev_parent_id_get)(struct net_device *dev, struct netdev_phys_item_id *psid); + int (*switchdev_port_attr_get)(struct net_device *dev, + struct switchdev_attr *attr); + int (*switchdev_port_attr_set)(struct net_device *dev, + struct switchdev_attr *attr); int (*switchdev_port_stp_update)(struct net_device *dev, u8 state); int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, int dst_len, struct fib_info *fi, @@ -68,6 +95,10 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) int switchdev_parent_id_get(struct net_device *dev, struct netdev_phys_item_id *psid); +int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr); +int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr); int switchdev_port_stp_update(struct net_device *dev, u8 state); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); @@ -95,6 +126,18 @@ static inline int switchdev_parent_id_get(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + return -EOPNOTSUPP; +} + static inline int switchdev_port_stp_update(struct net_device *dev, u8 state) { -- cgit v1.1 From f8e20a9f87d33865cc1d67f13da0db8d457fc3c9 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:49 -0700 Subject: switchdev: convert parent_id_get to switchdev attr get Switch ID is just a gettable port attribute. Convert switchdev op switchdev_parent_id_get to a switchdev attr. Note: for sysfs and netlink interfaces, SWITCHDEV_ATTR_PORT_PARENT_ID is called with SWITCHDEV_F_NO_RECUSE to limit switch ID user-visiblity to only port netdevs. So when a port is stacked under bond/bridge, the user can only query switch id via the switch ports, but not via the upper devices Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 2820438..93316e7 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -25,12 +25,16 @@ enum switchdev_trans { enum switchdev_attr_id { SWITCHDEV_ATTR_UNDEFINED, + SWITCHDEV_ATTR_PORT_PARENT_ID, }; struct switchdev_attr { enum switchdev_attr_id id; enum switchdev_trans trans; u32 flags; + union { + struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ + }; }; struct fib_info; @@ -38,10 +42,6 @@ struct fib_info; /** * struct switchdev_ops - switchdev operations * - * @switchdev_parent_id_get: Called to get an ID of the switch chip this port - * is part of. If driver implements this, it indicates that it - * represents a port of a switch chip. - * * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). * * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). @@ -54,8 +54,6 @@ struct fib_info; * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. */ struct switchdev_ops { - int (*switchdev_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); int (*switchdev_port_attr_get)(struct net_device *dev, struct switchdev_attr *attr); int (*switchdev_port_attr_set)(struct net_device *dev, @@ -93,8 +91,6 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) #ifdef CONFIG_NET_SWITCHDEV -int switchdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid); int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, @@ -120,12 +116,6 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi); #else -static inline int switchdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) -{ - return -EOPNOTSUPP; -} - static inline int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { -- cgit v1.1 From 3563606258cf3b8f02eabddb1cb45a94c44d9611 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:51 -0700 Subject: switchdev: convert STP update to switchdev attr set STP update is just a settable port attribute, so convert switchdev_port_stp_update to an attr set. For DSA, the prepare phase is skipped and STP updates are only done in the commit phase. This is because currently the DSA drivers don't need to allocate any memory for STP updates and the STP update will not fail to HW (unless something horrible goes wrong on the MDIO bus, in which case the prepare phase wouldn't have been able to predict anyway). Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 93316e7..aec5e49 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -26,6 +26,7 @@ enum switchdev_trans { enum switchdev_attr_id { SWITCHDEV_ATTR_UNDEFINED, SWITCHDEV_ATTR_PORT_PARENT_ID, + SWITCHDEV_ATTR_PORT_STP_STATE, }; struct switchdev_attr { @@ -34,6 +35,7 @@ struct switchdev_attr { u32 flags; union { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ + u8 stp_state; /* PORT_STP_STATE */ }; }; @@ -46,9 +48,6 @@ struct fib_info; * * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). * - * @switchdev_port_stp_update: Called to notify switch device port of bridge - * port STP state change. - * * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. * * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. @@ -58,7 +57,6 @@ struct switchdev_ops { struct switchdev_attr *attr); int (*switchdev_port_attr_set)(struct net_device *dev, struct switchdev_attr *attr); - int (*switchdev_port_stp_update)(struct net_device *dev, u8 state); int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, @@ -95,7 +93,6 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr); -int switchdev_port_stp_update(struct net_device *dev, u8 state); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, @@ -128,12 +125,6 @@ static inline int switchdev_port_attr_set(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_port_stp_update(struct net_device *dev, - u8 state) -{ - return -EOPNOTSUPP; -} - static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; -- cgit v1.1 From 491d0f1533ac750260406dbf84cdad44fd3d8a29 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:52 -0700 Subject: switchdev: introduce switchdev add/del obj ops Like switchdev attr get/set, add new switchdev obj add/del. switchdev objs will be things like VLANs or FIB entries, so add/del fits better for objects than get/set used for attributes. Use same two-phase prepare-commit transaction model as in attr set. Signed-off-by: Scott Feldman Acked-by: Sridhar Samudrala Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index aec5e49..4f43300 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -41,6 +41,15 @@ struct switchdev_attr { struct fib_info; +enum switchdev_obj_id { + SWITCHDEV_OBJ_UNDEFINED, +}; + +struct switchdev_obj { + enum switchdev_obj_id id; + enum switchdev_trans trans; +}; + /** * struct switchdev_ops - switchdev operations * @@ -48,6 +57,10 @@ struct fib_info; * * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). * + * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). + * + * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). + * * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. * * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. @@ -57,6 +70,10 @@ struct switchdev_ops { struct switchdev_attr *attr); int (*switchdev_port_attr_set)(struct net_device *dev, struct switchdev_attr *attr); + int (*switchdev_port_obj_add)(struct net_device *dev, + struct switchdev_obj *obj); + int (*switchdev_port_obj_del)(struct net_device *dev, + struct switchdev_obj *obj); int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, @@ -93,6 +110,8 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr); +int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, @@ -125,6 +144,18 @@ static inline int switchdev_port_attr_set(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_obj_del(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; -- cgit v1.1 From 6fc3016da7c1587aa59e71f8c4dbc4cf1343eab2 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:53 -0700 Subject: switchdev: add port vlan obj VLAN obj has flags (PVID and untagged) as well as start and end vid ranges. The switchdev driver can optimize programing the device using the ranges. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 4f43300..e598c2d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -43,11 +43,19 @@ struct fib_info; enum switchdev_obj_id { SWITCHDEV_OBJ_UNDEFINED, + SWITCHDEV_OBJ_PORT_VLAN, }; struct switchdev_obj { enum switchdev_obj_id id; enum switchdev_trans trans; + union { + struct switchdev_obj_vlan { /* PORT_VLAN */ + u16 flags; + u16 vid_start; + u16 vid_end; + } vlan; + }; }; /** -- cgit v1.1 From 6004c86718998aee1337efd3b087d6e17284632d Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:55 -0700 Subject: switchdev: add bridge port flags attr rocker: use switchdev get/set attr for bridge port flags Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e598c2d..6cf6de1 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -27,6 +27,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_UNDEFINED, SWITCHDEV_ATTR_PORT_PARENT_ID, SWITCHDEV_ATTR_PORT_STP_STATE, + SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, }; struct switchdev_attr { @@ -36,6 +37,7 @@ struct switchdev_attr { union { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ + unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ }; }; -- cgit v1.1 From e71f220b342d78cfb8ee9f1b60f1351f7183f2a5 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:58 -0700 Subject: switchdev: remove old switchdev_port_bridge_setlink New attr-based bridge_setlink can recurse lower devs and recover on err, so remove old wrapper (including ndo_dflt_switchdev_port_bridge_setlink). Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 6cf6de1..ce5ceb2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -132,8 +132,6 @@ int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id); int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, @@ -204,13 +202,6 @@ static inline int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, return 0; } -static inline int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return 0; -} - static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, -- cgit v1.1 From 5c34e0221423aeabc0b085adc5fccda3f91e2c49 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:00 -0700 Subject: switchdev: add new switchdev_port_bridge_dellink Same change as setlink. Provide the wrapper op for SELF ndo_bridge_dellink and call into the switchdev driver to delete afspec VLANs. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index ce5ceb2..8ffadca 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -164,6 +164,12 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + return -EOPNOTSUPP; +} + static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; -- cgit v1.1 From 87a5dae59e7abaad911ab719caa5548dd6df5557 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:02 -0700 Subject: switchdev: remove unused switchdev_port_bridge_dellink Now we can remove old wrappers for dellink. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8ffadca..397b1e6 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -130,8 +130,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id); int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, @@ -164,12 +162,6 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - return -EOPNOTSUPP; -} - static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -201,13 +193,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev, return -EOPNOTSUPP; } -static inline int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return 0; -} - static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, -- cgit v1.1 From 8793d0a664a8a2c5e18e929c1f995c784c105705 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:04 -0700 Subject: switchdev: add new switchdev_port_bridge_getlink Like bridge_setlink, add switchdev wrapper to handle bridge_getlink and call into port driver to get port attrs. For now, only BR_LEARNING and BR_LEARNING_SYNC are returned. To add more, we'll probably want to break away from ndo_dflt_bridge_getlink() and build the netlink skb directly in the switchdev code. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 397b1e6..e081d67 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -126,6 +126,9 @@ int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info); +int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags); int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, @@ -179,6 +182,13 @@ static inline int call_switchdev_notifiers(unsigned long val, return NOTIFY_DONE; } +static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, + u32 seq, struct net_device *dev, + u32 filter_mask, int nlflags) +{ + return -EOPNOTSUPP; +} + static inline int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) -- cgit v1.1 From 58c2cb16b116d7feace621bd6b647bbabacfa225 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:06 -0700 Subject: switchdev: convert fib_ipv4_add/del over to switchdev_port_obj_add/del The IPv4 FIB ops convert nicely to the switchdev objs and we're left with only four switchdev ops: port get/set and port add/del. Other objs will follow, such as FDB. So go ahead and convert IPv4 FIB over to switchdev obj for consistency, anticipating more objs to come. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e081d67..3b217b4 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -46,6 +46,7 @@ struct fib_info; enum switchdev_obj_id { SWITCHDEV_OBJ_UNDEFINED, SWITCHDEV_OBJ_PORT_VLAN, + SWITCHDEV_OBJ_IPV4_FIB, }; struct switchdev_obj { @@ -57,6 +58,15 @@ struct switchdev_obj { u16 vid_start; u16 vid_end; } vlan; + struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 nlflags; + u32 tb_id; + } ipv4_fib; }; }; @@ -70,10 +80,6 @@ struct switchdev_obj { * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). - * - * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. - * - * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -84,13 +90,6 @@ struct switchdev_ops { struct switchdev_obj *obj); int (*switchdev_port_obj_del)(struct net_device *dev, struct switchdev_obj *obj); - int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, - u32 tb_id); - int (*switchdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); }; enum switchdev_notifier_type { -- cgit v1.1 From 9449c3cd90472141cf081af88181a56163ff7132 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 12 May 2015 18:29:44 +0800 Subject: net: make skb_dst_pop routine static As xfrm_output_one() is the only caller of skb_dst_pop(), we should make skb_dst_pop() localized. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- include/net/dst.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 22aa93f..2bc73f8a 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -349,18 +349,6 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, __skb_tunnel_rx(skb, dev, net); } -/* Children define the path of the packet through the - * Linux networking. Thus, destinations are stackable. - */ - -static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) -{ - struct dst_entry *child = dst_clone(skb_dst(skb)->child); - - skb_dst_drop(skb); - return child; -} - int dst_discard_sk(struct sock *sk, struct sk_buff *skb); static inline int dst_discard(struct sk_buff *skb) { -- cgit v1.1 From 5eb764edee52e837638b8d55ceace2c68e248cd2 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Tue, 12 May 2015 23:03:53 -0700 Subject: switchdev: align comment with other comments in block Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 3b217b4..9f9a7cc5 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -53,7 +53,7 @@ struct switchdev_obj { enum switchdev_obj_id id; enum switchdev_trans trans; union { - struct switchdev_obj_vlan { /* PORT_VLAN */ + struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; u16 vid_start; u16 vid_end; -- cgit v1.1 From 42275bd8fcb351f951781d8882f359d25976824b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 13 May 2015 11:16:50 -0700 Subject: switchdev: don't use anonymous union on switchdev attr/obj structs Older gcc versions (e.g. gcc version 4.4.6) don't like anonymous unions which was causing build issues on the newly added switchdev attr/obj structs. Fix this by using named union on structs. Signed-off-by: Scott Feldman Reported-by: Or Gerlitz Signed-off-by: David S. Miller --- include/net/switchdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 9f9a7cc5..ea5b1c2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -38,7 +38,7 @@ struct switchdev_attr { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ - }; + } u; }; struct fib_info; @@ -67,7 +67,7 @@ struct switchdev_obj { u32 nlflags; u32 tb_id; } ipv4_fib; - }; + } u; }; /** -- cgit v1.1 From e578d9c02587d57bfa7b560767c698a668a468c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 May 2015 19:50:41 +0200 Subject: net: sched: use counter to break reclassify loops Seems all we want here is to avoid endless 'goto reclassify' loop. tc_classify_compat even resets this counter when something other than TC_ACT_RECLASSIFY is returned, so this skb-counter doesn't break hypothetical loops induced by something other than perpetual TC_ACT_RECLASSIFY return values. skb_act_clone is now identical to skb_clone, so just use that. Tested with following (bogus) filter: tc filter add dev eth0 parent ffff: \ protocol ip u32 match u32 0 0 police rate 10Kbit burst \ 64000 mtu 1500 action reclassify Acked-by: Daniel Borkmann Signed-off-by: Florian Westphal Acked-by: Alexei Starovoitov Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1b0a2e8..2738f6f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -739,21 +739,6 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) return rtab->data[slot]; } -#ifdef CONFIG_NET_CLS_ACT -static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, - int action) -{ - struct sk_buff *n; - - n = skb_clone(skb, gfp_mask); - - if (n) { - n->tc_verd = SET_TC_VERD(n->tc_verd, 0); - } - return n; -} -#endif - struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; -- cgit v1.1 From 1bd758eb1cab2fa5b71a23f9e5d3c8076f4ed650 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:07 +0200 Subject: net: change name of flow_dissector header to match the .c file name add couple of empty lines on the way. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 68 ++++++++++++++++++++++++++++++++++++++++++++ include/net/flow_keys.h | 61 --------------------------------------- include/net/ip.h | 2 +- include/net/ipv6.h | 2 +- 4 files changed, 70 insertions(+), 63 deletions(-) create mode 100644 include/net/flow_dissector.h delete mode 100644 include/net/flow_keys.h (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h new file mode 100644 index 0000000..5e99a7b --- /dev/null +++ b/include/net/flow_dissector.h @@ -0,0 +1,68 @@ +#ifndef _NET_FLOW_DISSECTOR_H +#define _NET_FLOW_DISSECTOR_H + +/* struct flow_keys: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + * @ports: port numbers of Transport header + * port16[0]: src port number + * port16[1]: dst port number + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + * All the members, except thoff, are in network byte order. + */ +struct flow_keys { + /* (src,dst) must be grouped, in the same way than in IP header */ + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + u16 thoff; + __be16 n_proto; + u8 ip_proto; +}; + +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + void *data, __be16 proto, int nhoff, int hlen); + +static inline bool skb_flow_dissect(const struct sk_buff *skb, + struct flow_keys *flow) +{ + return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); +} + +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); + +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, + int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} + +u32 flow_hash_from_keys(struct flow_keys *keys); + +unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, + __be16 protocol); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + +#endif diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h deleted file mode 100644 index 6d6ef62..0000000 --- a/include/net/flow_keys.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _NET_FLOW_KEYS_H -#define _NET_FLOW_KEYS_H - -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, - void *data, __be16 proto, int nhoff, int hlen); -static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) -{ - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); -} -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} -u32 flow_hash_from_keys(struct flow_keys *keys); -unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, - __be16 protocol); - -/* struct flow_keys_digest: - * - * This structure is used to hold a digest of the full flow keys. This is a - * larger "hash" of a flow to allow definitively matching specific flows where - * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so - * that it can by used in CB of skb (see sch_choke for an example). - */ -#define FLOW_KEYS_DIGEST_LEN 16 -struct flow_keys_digest { - u8 data[FLOW_KEYS_DIGEST_LEN]; -}; - -void make_flow_keys_digest(struct flow_keys_digest *digest, - const struct flow_keys *flow); - -#endif diff --git a/include/net/ip.h b/include/net/ip.h index d14af7e..562eb65 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -31,7 +31,7 @@ #include #include #include -#include +#include struct sock; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 53d25ef..9932b86 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define SIN6_LEN_RFC2133 24 -- cgit v1.1 From b0a31431b4d81fb1ccc36ce64ce3fe6a0aca4031 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:08 +0200 Subject: flow_dissector: remove unused function flow_get_hlen declaration commit 56193d1bce ("net: Add function for parsing the header length out of linear ethernet frames") added this function declaration but it is defined nowhere. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 5e99a7b..118ae69 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -47,9 +47,6 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, u32 flow_hash_from_keys(struct flow_keys *keys); -unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, - __be16 protocol); - /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a -- cgit v1.1 From 10b89ee43e849544eddfe34e535341fc077464ec Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:09 +0200 Subject: net: move *skb_get_poff declarations into correct header Since these functions are defined in flow_dissector.c, move header declarations from skbuff.h into flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 118ae69..4570cca 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -46,6 +46,9 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); /* struct flow_keys_digest: * -- cgit v1.1 From 9c684b5083bc191c4b7b189c73d75587e167a474 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:11 +0200 Subject: net: move __skb_get_hash function declaration to flow_dissector.h Since the definition of the function is in flow_dissector.c, it makes sense to have the declaration in flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 4570cca..e4ee761 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -46,6 +46,7 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, } u32 flow_hash_from_keys(struct flow_keys *keys); +void __skb_get_hash(struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); -- cgit v1.1 From fbff949e3bc7f3f7d9e8b3ef4855ec7138276a25 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:15 +0200 Subject: flow_dissector: introduce programable flow_dissector Introduce dissector infrastructure which allows user to specify which parts of skb he wants to dissect. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 61 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index e4ee761..20239e8 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -1,6 +1,64 @@ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H +/** + * struct flow_dissector_key_basic: + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + */ +struct flow_dissector_key_basic { + u16 thoff; + __be16 n_proto; + u8 ip_proto; +}; + +/** + * struct flow_dissector_key_addrs: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + */ +struct flow_dissector_key_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + __be32 src; + __be32 dst; +}; + +/** + * flow_dissector_key_tp_ports: + * @ports: port numbers of Transport header + * port16[0]: src port number + * port16[1]: dst port number + */ +struct flow_dissector_key_ports { + union { + __be32 ports; + __be16 port16[2]; + }; +}; + +enum flow_dissector_key_id { + FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ + FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + + FLOW_DISSECTOR_KEY_MAX, +}; + +struct flow_dissector_key { + enum flow_dissector_key_id key_id; + size_t offset; /* offset of struct flow_dissector_key_* + in target the struct */ +}; + +struct flow_dissector { + unsigned int used_keys; /* each bit repesents presence of one key id */ + unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; +}; + /* struct flow_keys: * @src: source ip address in case of IPv4 * For IPv6 it contains 32bit hash of src address @@ -27,6 +85,9 @@ struct flow_keys { u8 ip_proto; }; +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count); bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, __be16 proto, int nhoff, int hlen); -- cgit v1.1 From 06635a35d13d42b95422bba6633f175245cc644e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:16 +0200 Subject: flow_dissect: use programable dissector in skb_flow_dissect and friends Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 63 ++++++++++++++++++++++++-------------------- include/net/ip.h | 8 +++--- include/net/ipv6.h | 8 +++--- 3 files changed, 42 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 20239e8..0c8d406 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,42 +59,47 @@ struct flow_dissector { unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, __be16 proto, int nhoff, int hlen); static inline bool skb_flow_dissect(const struct sk_buff *skb, - struct flow_keys *flow) + struct flow_dissector *flow_dissector, + void *target_container) +{ + return __skb_flow_dissect(skb, flow_dissector, target_container, + NULL, 0, 0, 0); +} + +struct flow_keys { + struct flow_dissector_key_addrs addrs; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_basic basic; +}; + +extern struct flow_dissector flow_keys_dissector; +extern struct flow_dissector flow_keys_buf_dissector; + +static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, + struct flow_keys *flow) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(skb, &flow_keys_dissector, flow, + NULL, 0, 0, 0); +} + +static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, + void *data, __be16 proto, + int nhoff, int hlen) { - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, + data, proto, nhoff, hlen); } __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, diff --git a/include/net/ip.h b/include/net/ip.h index 562eb65..b0443d4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; - keys.src = inet->inet_saddr; - keys.dst = inet->inet_daddr; - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = inet->inet_saddr; + keys.addrs.dst = inet->inet_daddr; + keys.ports.port16[0] = inet->inet_sport; + keys.ports.port16[1] = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9932b86..9eed976 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; - keys.src = (__force __be32)ipv6_addr_hash(&np->saddr); - keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); + keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + keys.ports.port16[0] = inet->inet_sport; + keys.ports.port16[1] = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } -- cgit v1.1 From c3f8eaeb6ea501bd0e2e424f5dfecf952b12a06f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:17 +0200 Subject: flow_dissector: add missing header includes Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 0c8d406..1add891 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -1,6 +1,9 @@ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H +#include +#include + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset -- cgit v1.1 From b924933cbbfbdcaa2831a39780c116ec6e48c397 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:18 +0200 Subject: flow_dissector: introduce support for ipv6 addressses So far, only hashes made out of ipv6 addresses could be dissected. This patch introduces support for dissection of full ipv6 addresses. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 1add891..586b123 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -3,6 +3,7 @@ #include #include +#include /** * struct flow_dissector_key_basic: @@ -42,11 +43,23 @@ struct flow_dissector_key_ports { }; }; +/** + * struct flow_dissector_key_ipv6_addrs: + * @src: source ip address + * @dst: destination ip address + */ +struct flow_dissector_key_ipv6_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + struct in6_addr src; + struct in6_addr dst; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_MAX, }; -- cgit v1.1 From 67a900cc0436d74e7ff89042371760def087680d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:19 +0200 Subject: flow_dissector: introduce support for Ethernet addresses Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 586b123..5eac987 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -4,6 +4,7 @@ #include #include #include +#include /** * struct flow_dissector_key_basic: @@ -54,12 +55,24 @@ struct flow_dissector_key_ipv6_addrs { struct in6_addr dst; }; +/** + * struct flow_dissector_key_eth_addrs: + * @src: source Ethernet address + * @dst: destination Ethernet address + */ +struct flow_dissector_key_eth_addrs { + /* (dst,src) must be grouped, in the same way than in ETH header */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_MAX, }; -- cgit v1.1 From 59346afe7a5548ab3e9730aeff33993faa76abbe Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:20 +0200 Subject: flow_dissector: change port array into src, dst tuple Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 9 ++++++--- include/net/ip.h | 4 ++-- include/net/ipv6.h | 4 ++-- 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 5eac987..bac9c14 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -34,13 +34,16 @@ struct flow_dissector_key_addrs { /** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number + * src: source port number + * dst: destination port number */ struct flow_dissector_key_ports { union { __be32 ports; - __be16 port16[2]; + struct { + __be16 src; + __be16 dst; + }; }; }; diff --git a/include/net/ip.h b/include/net/ip.h index b0443d4..0ed6d76 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -362,8 +362,8 @@ static inline void inet_set_txhash(struct sock *sk) keys.addrs.src = inet->inet_saddr; keys.addrs.dst = inet->inet_daddr; - keys.ports.port16[0] = inet->inet_sport; - keys.ports.port16[1] = inet->inet_dport; + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9eed976..aab8190 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -700,8 +700,8 @@ static inline void ip6_set_txhash(struct sock *sk) keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); - keys.ports.port16[0] = inet->inet_sport; - keys.ports.port16[1] = inet->inet_dport; + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } -- cgit v1.1 From 35d32e8fe4ab44180e46a0dd54abea6985398d00 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 13 May 2015 12:57:27 -0400 Subject: geneve: move definition of geneve_hdr() to geneve.h This is a static inline with identical definitions in multiple places... Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- include/net/geneve.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/geneve.h b/include/net/geneve.h index 14fb8d3..2a0543a 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,6 +62,11 @@ struct genevehdr { struct geneve_opt options[]; }; +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + #ifdef CONFIG_INET struct geneve_sock; -- cgit v1.1 From 264ea103a7473f51aced838e68ed384ea2c759f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 May 2015 14:26:56 -0700 Subject: tcp: syncookies: extend validity range Now we allow storing more request socks per listener, we might hit syncookie mode less often and hit following bug in our stack : When we send a burst of syncookies, then exit this mode, tcp_synq_no_recent_overflow() can return false if the ACK packets coming from clients are coming three seconds after the end of syncookie episode. This is a way too strong requirement and conflicts with rest of syncookie code which allows ACK to be aged up to 2 minutes. Perfectly valid ACK packets are dropped just because clients might be in a crowded wifi environment or on another planet. So let's fix this, and also change tcp_synq_overflow() to not dirty a cache line for every syncookie we send, as we are under attack. Signed-off-by: Eric Dumazet Acked-by: Florian Westphal Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index b8ea128..7ace6ac 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -326,18 +326,6 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) bool tcp_check_oom(struct sock *sk, int shift); -/* syncookies: remember time of last synqueue overflow */ -static inline void tcp_synq_overflow(struct sock *sk) -{ - tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies; -} - -/* syncookies: no recent synqueue overflow on this listening socket? */ -static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) -{ - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK); -} extern struct proto tcp_prot; @@ -483,13 +471,35 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if * the counter advances immediately after a cookie is generated). */ -#define MAX_SYNCOOKIE_AGE 2 +#define MAX_SYNCOOKIE_AGE 2 +#define TCP_SYNCOOKIE_PERIOD (60 * HZ) +#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD) + +/* syncookies: remember time of last synqueue overflow + * But do not dirty this field too often (once per second is enough) + */ +static inline void tcp_synq_overflow(struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long now = jiffies; + + if (time_after(now, last_overflow + HZ)) + tcp_sk(sk)->rx_opt.ts_recent_stamp = now; +} + +/* syncookies: no recent synqueue overflow on this listening socket? */ +static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + + return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); +} static inline u32 tcp_cookie_time(void) { u64 val = get_jiffies_64(); - do_div(val, 60 * HZ); + do_div(val, TCP_SYNCOOKIE_PERIOD); return val; } -- cgit v1.1 From d53a2aa3a116609c7db8799da31541c4ba5999eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 08:52:19 -0700 Subject: net: fix sparse error in csum_replace4() make C=2 CF=-D__CHECK_ENDIAN__ net/ipv4/netfilter/nf_nat_l3proto_ipv4.o CHECK net/ipv4/netfilter/nf_nat_l3proto_ipv4.c include/net/checksum.h:125:64: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:64: expected restricted __wsum [usertype] addend include/net/checksum.h:125:64: got restricted __be32 [usertype] from include/net/checksum.h:125:71: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:71: expected restricted __wsum [usertype] addend include/net/checksum.h:125:71: got restricted __be32 [usertype] to include/net/checksum.h:125:64: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:64: expected restricted __wsum [usertype] addend include/net/checksum.h:125:64: got restricted __be32 [usertype] from include/net/checksum.h:125:71: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:71: expected restricted __wsum [usertype] addend include/net/checksum.h:125:71: got restricted __be32 [usertype] to Fixes: 4565af0d406b ("net: optimise csum_replace4()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/checksum.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/checksum.h b/include/net/checksum.h index 0a55ac7..2d1d73c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -122,7 +122,9 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { - *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from), to)); + __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); + + *sum = csum_fold(csum_add(tmp, (__force __wsum)to)); } /* Implements RFC 1624 (Incremental Internet Checksum) -- cgit v1.1 From 1a24e04e4b50939daa3041682b38b82c896ca438 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 12:39:25 -0700 Subject: net: fix sk_mem_reclaim_partial() sk_mem_reclaim_partial() goal is to ensure each socket has one SK_MEM_QUANTUM forward allocation. This is needed both for performance and better handling of memory pressure situations in follow up patches. SK_MEM_QUANTUM is currently a page, but might be reduced to 4096 bytes as some arches have 64KB pages. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index d882f4c..4581a60 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1368,7 +1368,7 @@ static inline struct inode *SOCK_INODE(struct socket *socket) * Functions for memory accounting */ int __sk_mem_schedule(struct sock *sk, int size, int kind); -void __sk_mem_reclaim(struct sock *sk); +void __sk_mem_reclaim(struct sock *sk, int amount); #define SK_MEM_QUANTUM ((int)PAGE_SIZE) #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) @@ -1409,7 +1409,7 @@ static inline void sk_mem_reclaim(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc); } static inline void sk_mem_reclaim_partial(struct sock *sk) @@ -1417,7 +1417,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1); } static inline void sk_mem_charge(struct sock *sk, int size) -- cgit v1.1 From a6c5ea4ccf0033591e6e476d7a273c0074c07aa7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 12:39:26 -0700 Subject: tcp: rename sk_forced_wmem_schedule() to sk_forced_mem_schedule() We plan to use sk_forced_wmem_schedule() in input path as well, so make it non static and rename it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7ace6ac..841691a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -311,6 +311,8 @@ static inline bool tcp_out_of_memory(struct sock *sk) return false; } +void sk_forced_mem_schedule(struct sock *sk, int size); + static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { struct percpu_counter *ocp = sk->sk_prot->orphan_count; -- cgit v1.1 From b8da51ebb1aa93908350f95efae73aecbc2e266c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 12:39:27 -0700 Subject: tcp: introduce tcp_under_memory_pressure() Introduce an optimized version of sk_under_memory_pressure() for TCP. Our intent is to use it in fast paths. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 841691a..0d85223 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -286,6 +286,14 @@ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; +/* optimized version of sk_under_memory_pressure() for TCP sockets */ +static inline bool tcp_under_memory_pressure(const struct sock *sk) +{ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return !!sk->sk_cgrp->memory_pressure; + + return tcp_memory_pressure; +} /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). -- cgit v1.1 From 45d4122ca7cdb3a4b91f392605cd22cfa75f1d99 Mon Sep 17 00:00:00 2001 From: "Samudrala, Sridhar" Date: Wed, 13 May 2015 21:55:43 -0700 Subject: switchdev: add support for fdb add/del/dump via switchdev_port_obj ops. - introduce port fdb obj and generic switchdev_port_fdb_add/del/dump() - use switchdev_port_fdb_add/del/dump in rocker/team/bonding ndo ops. - add support for fdb obj in switchdev_port_obj_add/del/dump() - switch rocker to implement fdb ops via switchdev_ops v3: updated to sync with named union changes. Signed-off-by: Sridhar Samudrala Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/switchdev.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index ea5b1c2..437f8fe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -47,11 +47,13 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_UNDEFINED, SWITCHDEV_OBJ_PORT_VLAN, SWITCHDEV_OBJ_IPV4_FIB, + SWITCHDEV_OBJ_PORT_FDB, }; struct switchdev_obj { enum switchdev_obj_id id; enum switchdev_trans trans; + int (*cb)(struct net_device *dev, struct switchdev_obj *obj); union { struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; @@ -67,6 +69,10 @@ struct switchdev_obj { u32 nlflags; u32 tb_id; } ipv4_fib; + struct switchdev_obj_fdb { /* PORT_FDB */ + const unsigned char *addr; + u16 vid; + } fdb; } u; }; @@ -80,6 +86,8 @@ struct switchdev_obj { * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). + * + * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj). */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -90,6 +98,8 @@ struct switchdev_ops { struct switchdev_obj *obj); int (*switchdev_port_obj_del)(struct net_device *dev, struct switchdev_obj *obj); + int (*switchdev_port_obj_dump)(struct net_device *dev, + struct switchdev_obj *obj); }; enum switchdev_notifier_type { @@ -121,6 +131,7 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj); int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, @@ -137,6 +148,15 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id); void switchdev_fib_ipv4_abort(struct fib_info *fi); +int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 nlm_flags); +int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid); +int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx); #else @@ -164,6 +184,12 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -221,6 +247,30 @@ static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) { } +static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 vid, u16 nlm_flags) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, + int idx) +{ + return -EOPNOTSUPP; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ -- cgit v1.1 From de133464c9e70808d3e5a861294bc55940988178 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 15 May 2015 14:47:32 -0700 Subject: netns: make nsid_lock per net The spinlock is used to protect netns_ids which is per net, so there is no need to use a global spinlock. Cc: Nicolas Dichtel Signed-off-by: Cong Wang Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 3f850ac..72eb237 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -58,6 +58,7 @@ struct net { struct list_head exit_list; /* Use only net_mutex */ struct user_namespace *user_ns; /* Owning user namespace */ + spinlock_t nsid_lock; struct idr netns_ids; struct ns_common ns; -- cgit v1.1 From 5cf422808244ca8f1177c72fe6e1ce8322794b57 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 15 May 2015 14:15:35 -0700 Subject: ipv4: introduce frag_expire_skip_icmp() Improve readability of skip ICMP for de-fragmentation expiration logic. This change will also make the logic easier to maintain when the following patches in this series are applied. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- include/net/ip.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 0ed6d76..43f6f39 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -478,6 +478,16 @@ enum ip_defrag_users { IP_DEFRAG_MACVLAN, }; +/* Return true if the value of 'user' is between 'lower_bond' + * and 'upper_bond' inclusively. + */ +static inline bool ip_defrag_user_in_between(u32 user, + enum ip_defrag_users lower_bond, + enum ip_defrag_users upper_bond) +{ + return user >= lower_bond && user <= upper_bond; +} + int ip_defrag(struct sk_buff *skb, u32 user); #ifdef CONFIG_INET struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user); -- cgit v1.1 From 49d16b23cd1e61c028ee088c5a64e9ac6a9c6147 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 15 May 2015 14:15:37 -0700 Subject: bridge_netfilter: No ICMP packet on IPv4 fragmentation error When bridge netfilter re-fragments an IP packet for output, all packets that can not be re-fragmented to their original input size should be silently discarded. However, current bridge netfilter output path generates an ICMP packet with 'size exceeded MTU' message for such packets, this is a bug. This patch refactors the ip_fragment() API to allow two separate use cases. The bridge netfilter user case will not send ICMP, the routing output will, as before. Signed-off-by: Andy Zhou Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 43f6f39..cd7a6a4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); -int ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)); +int ip_do_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); int ip_do_nat(struct sk_buff *skb); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); -- cgit v1.1 From 1a19cb680be0d4b06ce9a9d6516b8f45f544d3e8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:39 +0200 Subject: ieee802154: change transmit power to s32 This patch change the transmit power from s8 to s32. This prepares to store a mbm value instead dbm inside the transmit power variable. The old interface keep the a s8 dbm value, which should be backward compatibility when assign s8 to s32. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 2 +- include/net/mac802154.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 6ea16c8..47804cd 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -85,7 +85,7 @@ struct wpan_phy { u8 current_channel; u8 current_page; u32 channels_supported[IEEE802154_MAX_PAGE + 1]; - s8 transmit_power; + s32 transmit_power; struct wpan_phy_cca cca; __le64 perm_extended_addr; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 7df28a4..400e4e8 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -213,7 +213,7 @@ struct ieee802154_ops { int (*set_hw_addr_filt)(struct ieee802154_hw *hw, struct ieee802154_hw_addr_filt *filt, unsigned long changed); - int (*set_txpower)(struct ieee802154_hw *hw, s8 dbm); + int (*set_txpower)(struct ieee802154_hw *hw, s32 dbm); int (*set_lbt)(struct ieee802154_hw *hw, bool on); int (*set_cca_mode)(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca); -- cgit v1.1 From e2eb173aaacd1a1bcd255d3e74ffb719e47eeadb Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:40 +0200 Subject: ieee802154: change transmit power to mbm This patch change the handling of transmit power level from dbm to mbm. This prepares to handle floating point transmit power levels values. The old netlink 802.15.4 will convert the dbm value to mbm for handling backward compatibility. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 1 + include/net/mac802154.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 47804cd..b5b3f9f 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -85,6 +85,7 @@ struct wpan_phy { u8 current_channel; u8 current_page; u32 channels_supported[IEEE802154_MAX_PAGE + 1]; + /* current transmit_power in mBm */ s32 transmit_power; struct wpan_phy_cca cca; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 400e4e8..e863a85 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -171,7 +171,7 @@ struct ieee802154_hw { * Returns either zero, or negative errno. * * set_txpower: - * Set radio transmit power in dB. Called with pib_lock held. + * Set radio transmit power in mBm. Called with pib_lock held. * Returns either zero, or negative errno. * * set_lbt @@ -213,7 +213,7 @@ struct ieee802154_ops { int (*set_hw_addr_filt)(struct ieee802154_hw *hw, struct ieee802154_hw_addr_filt *filt, unsigned long changed); - int (*set_txpower)(struct ieee802154_hw *hw, s32 dbm); + int (*set_txpower)(struct ieee802154_hw *hw, s32 mbm); int (*set_lbt)(struct ieee802154_hw *hw, bool on); int (*set_cca_mode)(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca); -- cgit v1.1 From 32b23550ad64d9676f2218b3d5de46bacf98ef1d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:41 +0200 Subject: ieee802154: change cca ed level to mbm This patch change the handling of cca energy detection level from dbm to mbm. This prepares to handle floating point cca energy detection levels values. The old netlink 802.15.4 will convert the dbm value to mbm for handling backward compatibility. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 1 + include/net/mac802154.h | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index b5b3f9f..9ced2c9 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -91,6 +91,7 @@ struct wpan_phy { __le64 perm_extended_addr; + /* current cca ed threshold in mBm */ s32 cca_ed_level; /* PHY depended MAC PIB values */ diff --git a/include/net/mac802154.h b/include/net/mac802154.h index e863a85..71e2456 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -184,7 +184,7 @@ struct ieee802154_hw { * Returns either zero, or negative errno. * * set_cca_ed_level - * Sets the CCA energy detection threshold in dBm. Called with pib_lock + * Sets the CCA energy detection threshold in mBm. Called with pib_lock * held. * Returns either zero, or negative errno. * @@ -217,8 +217,7 @@ struct ieee802154_ops { int (*set_lbt)(struct ieee802154_hw *hw, bool on); int (*set_cca_mode)(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca); - int (*set_cca_ed_level)(struct ieee802154_hw *hw, - s32 level); + int (*set_cca_ed_level)(struct ieee802154_hw *hw, s32 mbm); int (*set_csma_params)(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries); int (*set_frame_retries)(struct ieee802154_hw *hw, -- cgit v1.1 From 72f655e44db9c7e835ceba96dc03cbe979d3f80d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:42 +0200 Subject: ieee802154: introduce wpan_phy_supported This patch introduce the wpan_phy_supported struct for wpan_phy. There is currently no way to check if a transceiver can handle IEEE 802.15.4 complaint values. With this struct we can check before if the transceiver supports these values before sending to driver layer. Signed-off-by: Alexander Aring Suggested-by: Phoebe Buckheister Acked-by: Varka Bhadram Cc: Alan Ott Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 9ced2c9..1941d7a 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -61,6 +61,10 @@ struct cfg802154_ops { struct wpan_dev *wpan_dev, bool mode); }; +struct wpan_phy_supported { + u32 channels[IEEE802154_MAX_PAGE + 1]; +}; + struct wpan_phy_cca { enum nl802154_cca_modes mode; enum nl802154_cca_opts opt; @@ -84,7 +88,7 @@ struct wpan_phy { */ u8 current_channel; u8 current_page; - u32 channels_supported[IEEE802154_MAX_PAGE + 1]; + struct wpan_phy_supported supported; /* current transmit_power in mBm */ s32 transmit_power; struct wpan_phy_cca cca; -- cgit v1.1 From fea3318d20776a94afeea0460c6ee9904e60569e Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:43 +0200 Subject: ieee802154: add several phy supported handling This patch adds support for phy supported handling for all other already existing handling 802.15.4 functionality. We assume now a fully 802.15.4 complaint transceiver at phy allocation. If a transceiver can support 802.15.4 default values only, then the values should be overwirtten by values the transceiver supports. If the transceiver doesn't set the according hardware flags, we assume the 802.15.4 defaults now which cannot be changed. Signed-off-by: Alexander Aring Suggested-by: Phoebe Buckheister Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 26 +++++++++++++++++++++++++- include/net/nl802154.h | 22 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 1941d7a..23abd08 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -61,8 +61,32 @@ struct cfg802154_ops { struct wpan_dev *wpan_dev, bool mode); }; +static inline bool +wpan_phy_supported_bool(bool b, enum nl802154_supported_bool_states st) +{ + switch (st) { + case NL802154_SUPPORTED_BOOL_TRUE: + return b; + case NL802154_SUPPORTED_BOOL_FALSE: + return !b; + case NL802154_SUPPORTED_BOOL_BOTH: + return true; + default: + WARN_ON(1); + } + + return false; +} + struct wpan_phy_supported { - u32 channels[IEEE802154_MAX_PAGE + 1]; + u32 channels[IEEE802154_MAX_PAGE + 1], + cca_modes, cca_opts; + enum nl802154_supported_bool_states lbt; + u8 min_minbe, max_minbe, min_maxbe, max_maxbe, + min_csma_backoffs, max_csma_backoffs; + s8 min_frame_retries, max_frame_retries; + size_t tx_powers_size, cca_ed_levels_size; + const s32 *tx_powers, *cca_ed_levels; }; struct wpan_phy_cca { diff --git a/include/net/nl802154.h b/include/net/nl802154.h index f8b5bc9..0552771 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -162,4 +162,26 @@ enum nl802154_cca_opts { NL802154_CCA_OPT_ATTR_MAX = __NL802154_CCA_OPT_ATTR_AFTER_LAST - 1 }; +/** + * enum nl802154_supported_bool_states - bool states for bool capability entry + * + * @NL802154_SUPPORTED_BOOL_FALSE: indicates to set false + * @NL802154_SUPPORTED_BOOL_TRUE: indicates to set true + * @__NL802154_SUPPORTED_BOOL_INVALD: reserved + * @NL802154_SUPPORTED_BOOL_BOTH: indicates to set true and false + * @__NL802154_SUPPORTED_BOOL_AFTER_LAST: Internal + * @NL802154_SUPPORTED_BOOL_MAX: highest value for bool states + */ +enum nl802154_supported_bool_states { + NL802154_SUPPORTED_BOOL_FALSE, + NL802154_SUPPORTED_BOOL_TRUE, + /* to handle them in a mask */ + __NL802154_SUPPORTED_BOOL_INVALD, + NL802154_SUPPORTED_BOOL_BOTH, + + /* keep last */ + __NL802154_SUPPORTED_BOOL_AFTER_LAST, + NL802154_SUPPORTED_BOOL_MAX = __NL802154_SUPPORTED_BOOL_AFTER_LAST - 1 +}; + #endif /* __NL802154_H */ -- cgit v1.1 From 791021bf13ec9d0fc14bfd8c9c4b368ace568239 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:44 +0200 Subject: mac802154: check for really changes This patch adds check if the value is really changed inside pib/mib. If a transceiver do support only one value for e.g. max_be then this will also handle that the driver layer doesn't need to care about handling to set one value only. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 23abd08..37abc16 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -94,6 +94,18 @@ struct wpan_phy_cca { enum nl802154_cca_opts opt; }; +static inline bool +wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) +{ + if (a->mode != b->mode) + return false; + + if (a->mode == NL802154_CCA_ENERGY_CARRIER) + return a->opt == b->opt; + + return true; +} + struct wpan_phy { struct mutex pib_lock; -- cgit v1.1 From edea8f7c75ec6c238130bd7e74d9f6f4c26e97b0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:46 +0200 Subject: cfg802154: introduce wpan phy flags This patch introduce a flag property for the wpan phy structure. The current flag settings in ieee802154_hw are accessable in mac802154 layer only which is okay for flags which indicates MAC handling which are done by phy. For real PHY layer settings like cca mode, transmit power, cca energy detection level. The difference between these flags are that the MAC handling flags are only handled in mac802154/HardMac layer e.g. on an interface up. The phy settings are direct netlink calls from nl802154 into the driver layer and the nl802154 need to have a chance to check if the driver supports this handling before sending to the next layer. We also check now on PHY flags while dumping and setting pib attributes. In comparing with MIB attributes the 802.15.4 gives us an default value which we assume when a transceiver implement less functionality. In case of MIB settings the nl802154 layer doesn't need to check on the ieee802154_hw flags then. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 16 ++++++++++++++++ include/net/mac802154.h | 29 +++++++---------------------- 2 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 37abc16..a12c6c52 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -106,6 +106,20 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) return true; } +/** + * @WPAN_PHY_FLAG_TRANSMIT_POWER: Indicates that transceiver will support + * transmit power setting. + * @WPAN_PHY_FLAG_CCA_ED_LEVEL: Indicates that transceiver will support cca ed + * level setting. + * @WPAN_PHY_FLAG_CCA_MODE: Indicates that transceiver will support cca mode + * setting. + */ +enum wpan_phy_flags { + WPAN_PHY_FLAG_TXPOWER = BIT(1), + WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2), + WPAN_PHY_FLAG_CCA_MODE = BIT(3), +}; + struct wpan_phy { struct mutex pib_lock; @@ -117,6 +131,8 @@ struct wpan_phy { */ const void *privid; + u32 flags; + /* * This is a PIB according to 802.15.4-2011. * We do not provide timing-related variables, as they diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 71e2456..9605c7f 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -89,41 +89,26 @@ struct ieee802154_hw { #define IEEE802154_HW_TX_OMIT_CKSUM 0x00000001 /* Indicates that receiver will autorespond with ACK frames. */ #define IEEE802154_HW_AACK 0x00000002 -/* Indicates that transceiver will support transmit power setting. */ -#define IEEE802154_HW_TXPOWER 0x00000004 /* Indicates that transceiver will support listen before transmit. */ -#define IEEE802154_HW_LBT 0x00000008 -/* Indicates that transceiver will support cca mode setting. */ -#define IEEE802154_HW_CCA_MODE 0x00000010 -/* Indicates that transceiver will support cca ed level setting. */ -#define IEEE802154_HW_CCA_ED_LEVEL 0x00000020 +#define IEEE802154_HW_LBT 0x00000004 /* Indicates that transceiver will support csma (max_be, min_be, csma retries) * settings. */ -#define IEEE802154_HW_CSMA_PARAMS 0x00000040 +#define IEEE802154_HW_CSMA_PARAMS 0x00000008 /* Indicates that transceiver will support ARET frame retries setting. */ -#define IEEE802154_HW_FRAME_RETRIES 0x00000080 +#define IEEE802154_HW_FRAME_RETRIES 0x00000010 /* Indicates that transceiver will support hardware address filter setting. */ -#define IEEE802154_HW_AFILT 0x00000100 +#define IEEE802154_HW_AFILT 0x00000020 /* Indicates that transceiver will support promiscuous mode setting. */ -#define IEEE802154_HW_PROMISCUOUS 0x00000200 +#define IEEE802154_HW_PROMISCUOUS 0x00000040 /* Indicates that receiver omits FCS. */ -#define IEEE802154_HW_RX_OMIT_CKSUM 0x00000400 +#define IEEE802154_HW_RX_OMIT_CKSUM 0x00000080 /* Indicates that receiver will not filter frames with bad checksum. */ -#define IEEE802154_HW_RX_DROP_BAD_CKSUM 0x00000800 +#define IEEE802154_HW_RX_DROP_BAD_CKSUM 0x00000100 /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ #define IEEE802154_HW_OMIT_CKSUM (IEEE802154_HW_TX_OMIT_CKSUM | \ IEEE802154_HW_RX_OMIT_CKSUM) -/* This groups the most common CSMA support fields into one. */ -#define IEEE802154_HW_CSMA (IEEE802154_HW_CCA_MODE | \ - IEEE802154_HW_CCA_ED_LEVEL | \ - IEEE802154_HW_CSMA_PARAMS) - -/* This groups the most common ARET support fields into one. */ -#define IEEE802154_HW_ARET (IEEE802154_HW_CSMA | \ - IEEE802154_HW_FRAME_RETRIES) - /* struct ieee802154_ops - callbacks from mac802154 to the driver * * This structure contains various callbacks that the driver may -- cgit v1.1 From 65318680c97cca15e3678148b3a5acaa33e991ec Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:47 +0200 Subject: ieee802154: add iftypes capability This patch adds capability flags for supported interface types. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index a12c6c52..11bbf17 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -80,7 +80,7 @@ wpan_phy_supported_bool(bool b, enum nl802154_supported_bool_states st) struct wpan_phy_supported { u32 channels[IEEE802154_MAX_PAGE + 1], - cca_modes, cca_opts; + cca_modes, cca_opts, iftypes; enum nl802154_supported_bool_states lbt; u8 min_minbe, max_minbe, min_maxbe, max_maxbe, min_csma_backoffs, max_csma_backoffs; -- cgit v1.1 From 0e66545701014814360b08a7a43ca652f82b6e5a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:53 +0200 Subject: nl802154: add support for dump phy capabilities This patch add support to nl802154 to dump all phy capabilities which is inside the wpan_phy_supported struct. Also we introduce a new method to dumping supported channels. The new method will offer a easier interface and has lesser netlink traffic. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/nl802154.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'include/net') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 0552771..0badebd 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -100,6 +100,8 @@ enum nl802154_attrs { NL802154_ATTR_EXTENDED_ADDR, + NL802154_ATTR_WPAN_PHY_CAPS, + /* add attributes here, update the policy in nl802154.c */ __NL802154_ATTR_AFTER_LAST, @@ -120,6 +122,61 @@ enum nl802154_iftype { }; /** + * enum nl802154_wpan_phy_capability_attr - wpan phy capability attributes + * + * @__NL802154_CAP_ATTR_INVALID: attribute number 0 is reserved + * @NL802154_CAP_ATTR_CHANNELS: a nested attribute for nl802154_channel_attr + * @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for + * nl802154_wpan_phy_tx_power + * @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level + * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maxmimum value for cca_ed_level + * @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags + * @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags + * @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value + * @NL802154_CAP_ATTR_MAX_MINBE: maximum of minbe value + * @NL802154_CAP_ATTR_MIN_MAXBE: minimum of maxbe value + * @NL802154_CAP_ATTR_MAX_MINBE: maximum of maxbe value + * @NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS: minimum of csma backoff value + * @NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS: maximum of csma backoffs value + * @NL802154_CAP_ATTR_MIN_FRAME_RETRIES: minimum of frame retries value + * @NL802154_CAP_ATTR_MAX_FRAME_RETRIES: maximum of frame retries value + * @NL802154_CAP_ATTR_IFTYPES: nl802154_iftype flags + * @NL802154_CAP_ATTR_LBT: nl802154_supported_bool_states flags + * @NL802154_CAP_ATTR_MAX: highest cap attribute currently defined + * @__NL802154_CAP_ATTR_AFTER_LAST: internal use + */ +enum nl802154_wpan_phy_capability_attr { + __NL802154_CAP_ATTR_INVALID, + + NL802154_CAP_ATTR_IFTYPES, + + NL802154_CAP_ATTR_CHANNELS, + NL802154_CAP_ATTR_TX_POWERS, + + NL802154_CAP_ATTR_CCA_ED_LEVELS, + NL802154_CAP_ATTR_CCA_MODES, + NL802154_CAP_ATTR_CCA_OPTS, + + NL802154_CAP_ATTR_MIN_MINBE, + NL802154_CAP_ATTR_MAX_MINBE, + + NL802154_CAP_ATTR_MIN_MAXBE, + NL802154_CAP_ATTR_MAX_MAXBE, + + NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS, + NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS, + + NL802154_CAP_ATTR_MIN_FRAME_RETRIES, + NL802154_CAP_ATTR_MAX_FRAME_RETRIES, + + NL802154_CAP_ATTR_LBT, + + /* keep last */ + __NL802154_CAP_ATTR_AFTER_LAST, + NL802154_CAP_ATTR_MAX = __NL802154_CAP_ATTR_AFTER_LAST - 1 +}; + +/** * enum nl802154_cca_modes - cca modes * * @__NL802154_CCA_INVALID: cca mode number 0 is reserved -- cgit v1.1 From 492135557dc090a1abb2cfbe1a412757e3ed68ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 May 2015 21:04:22 +0200 Subject: tcp: add rfc3168, section 6.1.1.1. fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This work as a follow-up of commit f7b3bec6f516 ("net: allow setting ecn via routing table") and adds RFC3168 section 6.1.1.1. fallback for outgoing ECN connections. In other words, this work adds a retry with a non-ECN setup SYN packet, as suggested from the RFC on the first timeout: [...] A host that receives no reply to an ECN-setup SYN within the normal SYN retransmission timeout interval MAY resend the SYN and any subsequent SYN retransmissions with CWR and ECE cleared. [...] Schematic client-side view when assuming the server is in tcp_ecn=2 mode, that is, Linux default since 2009 via commit 255cac91c3c9 ("tcp: extend ECN sysctl to allow server-side only ECN"): 1) Normal ECN-capable path: SYN ECE CWR -----> <----- SYN ACK ECE ACK -----> 2) Path with broken middlebox, when client has fallback: SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) SYN -----> <----- SYN ACK ACK -----> In case we would not have the fallback implemented, the middlebox drop point would basically end up as: SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) In any case, it's rather a smaller percentage of sites where there would occur such additional setup latency: it was found in end of 2014 that ~56% of IPv4 and 65% of IPv6 servers of Alexa 1 million list would negotiate ECN (aka tcp_ecn=2 default), 0.42% of these webservers will fail to connect when trying to negotiate with ECN (tcp_ecn=1) due to timeouts, which the fallback would mitigate with a slight latency trade-off. Recent related paper on this topic: Brian Trammell, Mirja Kühlewind, Damiano Boppart, Iain Learmonth, Gorry Fairhurst, and Richard Scheffenegger: "Enabling Internet-Wide Deployment of Explicit Congestion Notification." Proc. PAM 2015, New York. http://ecn.ethz.ch/ecn-pam15.pdf Thus, when net.ipv4.tcp_ecn=1 is being set, the patch will perform RFC3168, section 6.1.1.1. fallback on timeout. For users explicitly not wanting this which can be in DC use case, we add a net.ipv4.tcp_ecn_fallback knob that allows for disabling the fallback. tp->ecn_flags are not being cleared in tcp_ecn_clear_syn() on output, but rather we let tcp_ecn_rcv_synack() take that over on input path in case a SYN ACK ECE was delayed. Thus a spurious SYN retransmission will not prevent ECN being negotiated eventually in that case. Reference: https://www.ietf.org/proceedings/92/slides/slides-92-iccrg-1.pdf Reference: https://www.ietf.org/proceedings/89/slides/slides-89-tsvarea-1.pdf Signed-off-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Mirja Kühlewind Signed-off-by: Brian Trammell Cc: Eric Dumazet Cc: Dave That Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 614a49b..6848b8b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -77,6 +77,8 @@ struct netns_ipv4 { struct local_ports ip_local_ports; int sysctl_tcp_ecn; + int sysctl_tcp_ecn_fallback; + int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; int sysctl_ip_nonlocal_bind; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0d85223..2bb2bad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -712,6 +712,8 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 +#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. -- cgit v1.1 From 06b2c61c92a9942769ee8da22d3ce8b8b935c038 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 19 May 2015 12:41:47 -0700 Subject: ip: remove unused function prototype ip_do_nat() function was removed prior to kernel 3.4. Remove the unnecessary function prototype as well. Reported-by: Florian Westphal Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- include/net/ip.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index cd7a6a4..7921a36 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -110,7 +110,6 @@ int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); int ip_do_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)); -int ip_do_nat(struct sk_buff *skb); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); int ip_local_out_sk(struct sock *sk, struct sk_buff *skb); -- cgit v1.1 From f8bdbb584749420da1a7fea8cc1df18e5c2c4d6c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 May 2015 15:04:53 +0200 Subject: mac80211: add missing drv_priv description for TXQ struct The kernel-doc description for the drv_priv member of struct ieee80211_txq was missing, leading to errors. Add a suitable description to fix that. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 67e0df1..887fe95 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1728,6 +1728,7 @@ struct ieee80211_tx_control { * @sta: station table entry, %NULL for per-vif queue * @tid: the TID for this queue (unused for per-vif queue) * @ac: the AC for this queue + * @drv_priv: driver private area, sized by hw->txq_data_size * * The driver can obtain packets from this queue by calling * ieee80211_tx_dequeue(). -- cgit v1.1 From eb9344781a2f8381ed60cd9e662d9ced2d168ecb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2015 13:26:55 -0700 Subject: tcp: add a force_schedule argument to sk_stream_alloc_skb() In commit 8e4d980ac215 ("tcp: fix behavior for epoll edge trigger") we fixed a possible hang of TCP sockets under memory pressure, by allowing sk_stream_alloc_skb() to use sk_forced_mem_schedule() if no packet is in socket write queue. It turns out there are other cases where we want to force memory schedule : tcp_fragment() & tso_fragment() need to split a big TSO packet into two smaller ones. If we block here because of TCP memory pressure, we can effectively block TCP socket from sending new data. If no further ACK is coming, this hang would be definitive, and socket has no chance to effectively reduce its memory usage. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 4581a60..26c1c31 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2025,7 +2025,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) } } -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp); +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule); /** * sk_page_frag - return an appropriate page_frag -- cgit v1.1 From f5af1f57a2914e290de40e2c93716da8885c4965 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 May 2015 10:59:01 -0700 Subject: inet_hashinfo: remove bsocket counter We no longer need bsocket atomic counter, as inet_csk_get_port() calls bind_conflict() regardless of its value, after commit 2b05ad33e1e624e ("tcp: bind() fix autoselection to share ports") This patch removes overhead of maintaining this counter and double inet_csk_get_port() calls under pressure. Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Flavio Leitner Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 73fe0f9..774d241 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -148,8 +148,6 @@ struct inet_hashinfo { */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; - - atomic_t bsockets; }; static inline struct inet_ehash_bucket *inet_ehash_bucket( -- cgit v1.1 From 4a3a8c0c3a613e481bea931f0d65dc4a7efaa9b9 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 22 May 2015 17:43:52 +0200 Subject: mac802154: remove pib lock This patch removes the pib lock which is now replaced by rtnl lock. The new interface already use the rtnl lock only. Nevertheless this patch will fix issues while using new and old interface at the same time. Signed-off-by: Alexander Aring Reviewed-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 11bbf17..c6aa1d2 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -121,8 +121,6 @@ enum wpan_phy_flags { }; struct wpan_phy { - struct mutex pib_lock; - /* If multiple wpan_phys are registered and you're handed e.g. * a regular netdev with assigned ieee802154_ptr, you won't * know whether it points to a wpan_phy your driver has registered -- cgit v1.1 From 344f8c119df742f2bf7098cf8fc326351f583249 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 22 May 2015 17:43:53 +0200 Subject: mac802154: use atomic ops for sequence incrementation This patch will use atomic operations for sequence number incrementation while MAC header generation. Upper layers like af_802154 or 6LoWPAN could call this function in a parallel context while generating 802.15.4 MAC header before queuing into wpan interfaces transmit queue. Signed-off-by: Alexander Aring Reviewed-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 4 ++-- include/net/ieee802154_netdev.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index c6aa1d2..4de59aa 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -177,9 +177,9 @@ struct wpan_dev { __le64 extended_addr; /* MAC BSN field */ - u8 bsn; + atomic_t bsn; /* MAC DSN field */ - u8 dsn; + atomic_t dsn; u8 min_be; u8 max_be; diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 94a2970..144fefb 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -431,7 +431,6 @@ struct ieee802154_mlme_ops { */ __le16 (*get_pan_id)(const struct net_device *dev); __le16 (*get_short_addr)(const struct net_device *dev); - u8 (*get_dsn)(const struct net_device *dev); }; static inline struct ieee802154_mlme_ops * -- cgit v1.1 From c947f7e1e31a708f5a4ea8c1a627bec578cd9223 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 22 May 2015 17:43:54 +0200 Subject: mac802154: remove mib lock This patch removes the mib lock. The new locking mechanism is to protect the mib values with the rtnl lock. Note that this isn't always necessary if we have an interface up the most mib values are readonly (e.g. address settings). With this behaviour we can remove locking in hotpath like frame parsing completely. It depends on context if we need to hold the rtnl lock or not, this makes the callbacks of ieee802154_mlme_ops unnecessary because these callbacks hols always the locks. Signed-off-by: Alexander Aring Reviewed-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 144fefb..84a72a1 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -422,15 +422,6 @@ struct ieee802154_mlme_ops { struct ieee802154_mac_params *params); struct ieee802154_llsec_ops *llsec; - - /* The fields below are required. */ - - /* - * FIXME: these should become the part of PIB/MIB interface. - * However we still don't have IB interface of any kind - */ - __le16 (*get_pan_id)(const struct net_device *dev); - __le16 (*get_short_addr)(const struct net_device *dev); }; static inline struct ieee802154_mlme_ops * -- cgit v1.1 From 286c2349f6665c3e67f464a5faa14a0e28be4842 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:55:56 -0700 Subject: ipv6: Clean up ipv6_select_ident() and ip6_fragment() This patch changes the ipv6_select_ident() signature to return a fragment id instead of taking a whole frag_hdr as a param to only set the frag_hdr->identification. It also cleans up ip6_fragment() to obtain the fragment id at the beginning instead of using multiple "if" later to check fragment id has been generated or not. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index aab8190..8c4f881 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,8 +671,7 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, - struct rt6_info *rt); +u32 ipv6_select_ident(struct net *net, struct rt6_info *rt); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); -- cgit v1.1 From fd0273d7939f2ce3247f6aac5f6b9a0135d4cd39 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:55:57 -0700 Subject: ipv6: Remove external dependency on rt6i_dst and rt6i_src This patch removes the assumptions that the returned rt is always a RTF_CACHE entry with the rt6i_dst and rt6i_src containing the destination and source address. The dst and src can be recovered from the calling site. We may consider to rename (rt6i_dst, rt6i_src) to (rt6i_key_dst, rt6i_key_src) later. Signed-off-by: Martin KaFai Lau Reviewed-by: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8c4f881..b950a20 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,7 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -u32 ipv6_select_ident(struct net *net, struct rt6_info *rt); +u32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); -- cgit v1.1 From 2647a9b07032c5a95ddee1fcb65d95bddbc6b7f9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:55:58 -0700 Subject: ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST When creating a RTF_CACHE route, RTF_ANYCAST is set based on rt6i_dst. Also, rt6i_gateway is always set to the nexthop while the nexthop could be a gateway or the rt6i_dst.addr. After removing the rt6i_dst and rt6i_src dependency in the last patch, we also need to stop the caller from depending on rt6i_gateway and RTF_ANYCAST. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5e19206..4caf7d6 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -163,11 +163,14 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } -static inline bool ipv6_anycast_destination(const struct sk_buff *skb) +static inline bool ipv6_anycast_destination(const struct dst_entry *dst, + const struct in6_addr *daddr) { - struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)dst; - return rt->rt6i_flags & RTF_ANYCAST; + return rt->rt6i_flags & RTF_ANYCAST || + (rt->rt6i_dst.plen != 128 && + ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } int ip6_fragment(struct sock *sk, struct sk_buff *skb, @@ -194,9 +197,15 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk) inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } -static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) +static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, + struct in6_addr *daddr) { - return &rt->rt6i_gateway; + if (rt->rt6i_flags & RTF_GATEWAY) + return &rt->rt6i_gateway; + else if (rt->rt6i_flags & RTF_CACHE) + return &rt->rt6i_dst.addr; + else + return daddr; } #endif -- cgit v1.1 From 45e4fd26683c9a5f88600d91b08a484f7f09226a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:00 -0700 Subject: ipv6: Only create RTF_CACHE routes after encountering pmtu exception This patch creates a RTF_CACHE routes only after encountering a pmtu exception. After ip6_rt_update_pmtu() has inserted the RTF_CACHE route to the fib6 tree, the rt->rt6i_node->fn_sernum is bumped which will fail the ip6_dst_check() and trigger a relookup. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4caf7d6..784ee3d 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -202,7 +202,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, { if (rt->rt6i_flags & RTF_GATEWAY) return &rt->rt6i_gateway; - else if (rt->rt6i_flags & RTF_CACHE) + else if (unlikely(rt->rt6i_flags & RTF_CACHE)) return &rt->rt6i_dst.addr; else return daddr; -- cgit v1.1 From b197df4f0f3782782e9ea8996e91b65ae33e8dd9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:01 -0700 Subject: ipv6: Add rt6_get_cookie() function Instead of doing the rt6->rt6i_node check whenever we need to get the route's cookie. Refactor it into rt6_get_cookie(). It is a prep work to handle FLOWI_FLAG_KNOWN_NH and also percpu rt6_info later. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ include/net/ip6_route.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e000180..a4bece6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -159,6 +159,11 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } +static inline u32 rt6_get_cookie(const struct rt6_info *rt) +{ + return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; +} + static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 784ee3d..297629a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -145,7 +145,7 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr; #endif - np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + np->dst_cookie = rt6_get_cookie(rt); } static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, -- cgit v1.1 From 3da59bd94583d1239e4fbdee452265a160b9cd71 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:03 -0700 Subject: ipv6: Create RTF_CACHE clone when FLOWI_FLAG_KNOWN_NH is set This patch always creates RTF_CACHE clone with DST_NOCACHE when FLOWI_FLAG_KNOWN_NH is set so that the rt6i_dst is set to the fl6->daddr. Signed-off-by: Martin KaFai Lau Acked-by: Julian Anastasov Tested-by: Julian Anastasov Cc: Hannes Frederic Sowa Cc: Steffen Klassert Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a4bece6..5556111 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -161,6 +161,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + if (unlikely(rt->dst.flags & DST_NOCACHE)) + rt = (struct rt6_info *)(rt->dst.from); + return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; } -- cgit v1.1 From 8d0b94afdca84598912347e61defa846a0988d04 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:04 -0700 Subject: ipv6: Keep track of DST_NOCACHE routes in case of iface down/unregister This patch keeps track of the DST_NOCACHE routes in a list and replaces its dev with loopback during the iface down/unregister event. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5556111..cc8f03c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -120,6 +120,9 @@ struct rt6_info { struct rt6key rt6i_src; struct rt6key rt6i_prefsrc; + struct list_head rt6i_uncached; + struct uncached_list *rt6i_uncached_list; + struct inet6_dev *rt6i_idev; u32 rt6i_metric; -- cgit v1.1 From d52d3997f843ffefaa8d8462790ffcaca6c74192 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:06 -0700 Subject: ipv6: Create percpu rt6_info After the patch 'ipv6: Only create RTF_CACHE routes after encountering pmtu exception', we need to compensate the performance hit (bouncing dst->__refcnt). Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index cc8f03c..3b76849 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -124,6 +124,7 @@ struct rt6_info { struct uncached_list *rt6i_uncached_list; struct inet6_dev *rt6i_idev; + struct rt6_info * __percpu *rt6i_pcpu; u32 rt6i_metric; u32 rt6i_pmtu; @@ -164,7 +165,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { - if (unlikely(rt->dst.flags & DST_NOCACHE)) + if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE)) rt = (struct rt6_info *)(rt->dst.from); return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; -- cgit v1.1 From 7f1598678d4c05e3e085bf780a5ab3119637ac3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 May 2015 16:02:21 -0700 Subject: ipv6: ipv6_select_ident() returns a __be32 ipv6_select_ident() returns a 32bit value in network order. Fixes: 286c2349f666 ("ipv6: Clean up ipv6_select_ident() and ip6_fragment()") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ipv6.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b950a20..35d485c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,9 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -u32 ipv6_select_ident(struct net *net, - const struct in6_addr *daddr, - const struct in6_addr *saddr); +__be32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); -- cgit v1.1 From 80279fb7ba5b71981a60988b0307afa43f78f6b1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 May 2015 16:22:20 +0200 Subject: cfg80211: properly send NL80211_ATTR_DISCONNECTED_BY_AP in disconnect When we disconnect from the AP, drivers call cfg80211_disconnect(). This doesn't know whether the disconnection was initiated locally or by the AP though, which can cause problems with the supplicant, for example with WPS. This issue obviously doesn't show up with any mac80211 based driver since mac80211 doesn't call this function. Fix this by requiring drivers to indicate whether the disconnect is locally generated or not. I've tried to update the drivers, but may not have gotten the values correct, and some drivers may currently not be able to report correct values. In case of doubt I left it at false, which is the current behaviour. For libertas, make adjustments as indicated by Dan Williams. Reported-by: Matthieu Mauger Tested-by: Matthieu Mauger Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d63ecec..a741678 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4575,13 +4575,15 @@ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, * @ie: information elements of the deauth/disassoc frame (may be %NULL) * @ie_len: length of IEs * @reason: reason code for the disconnection, set it to 0 if unknown + * @locally_generated: disconnection was requested locally * @gfp: allocation flags * * After it calls this function, the driver should enter an idle state * and not try to connect to any AP any more. */ void cfg80211_disconnected(struct net_device *dev, u16 reason, - const u8 *ie, size_t ie_len, gfp_t gfp); + const u8 *ie, size_t ie_len, + bool locally_generated, gfp_t gfp); /** * cfg80211_ready_on_channel - notification of remain_on_channel start -- cgit v1.1 From ebddf1a8d78aa3436353fae75c4396e50cb2d6cf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 26 May 2015 18:41:20 +0200 Subject: netfilter: nf_tables: allow to bind table to net_device This patch adds the internal NFT_AF_NEEDS_DEV flag to indicate that you must attach this table to a net_device. This change is required by the follow up patch that introduces the new netdev table. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e6bcf55..3d6f48c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -819,6 +819,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @name: name of the table + * @dev: this table is bound to this device (if any) */ struct nft_table { struct list_head list; @@ -828,6 +829,11 @@ struct nft_table { u32 use; u16 flags; char name[NFT_TABLE_MAXNAMELEN]; + struct net_device *dev; +}; + +enum nft_af_flags { + NFT_AF_NEEDS_DEV = (1 << 0), }; /** @@ -838,6 +844,7 @@ struct nft_table { * @nhooks: number of hooks in this family * @owner: module owner * @tables: used internally + * @flags: family flags * @nops: number of hook ops in this family * @hook_ops_init: initialization function for chain hook ops * @hooks: hookfn overrides for packet validation @@ -848,6 +855,7 @@ struct nft_af_info { unsigned int nhooks; struct module *owner; struct list_head tables; + u32 flags; unsigned int nops; void (*hook_ops_init)(struct nf_hook_ops *, unsigned int); -- cgit v1.1 From ed6c4136f1571bd6ab362afc3410905a8a69ca42 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 26 May 2015 18:41:40 +0200 Subject: netfilter: nf_tables: add netdev table to filter from ingress This allows us to create netdev tables that contain ingress chains. Use skb_header_pointer() as we may see shared sk_buffs at this stage. This change provides access to the existing nf_tables features from the ingress hook. Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index eee608b..c807811 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -13,6 +13,7 @@ struct netns_nftables { struct nft_af_info *inet; struct nft_af_info *arp; struct nft_af_info *bridge; + struct nft_af_info *netdev; unsigned int base_seq; u8 gencursor; }; -- cgit v1.1 From d0997b44c9081071e39417b188f7db6d1ea37341 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 25 May 2015 15:38:33 +0300 Subject: ieee802154: Remove ieee802154_reduced_mlme_ops references. As there doesn't seem to be a definition of it or any users of it. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 84a72a1..0a87975 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -430,10 +430,4 @@ ieee802154_mlme_ops(const struct net_device *dev) return dev->ml_priv; } -static inline struct ieee802154_reduced_mlme_ops * -ieee802154_reduced_mlme_ops(const struct net_device *dev) -{ - return dev->ml_priv; -} - #endif -- cgit v1.1 From 095dc8e0c3686d586a01a50abc3e1bb9ac633054 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 May 2015 07:55:34 -0700 Subject: tcp: fix/cleanup inet_ehash_locks_alloc() If tcp ehash table is constrained to a very small number of buckets (eg boot parameter thash_entries=128), then we can crash if spinlock array has more entries. While we are at it, un-inline inet_ehash_locks_alloc() and make following changes : - Budget 2 cache lines per cpu worth of 'spinlocks' - Try to kmalloc() the array to avoid extra TLB pressure. (Most servers at Google allocate 8192 bytes for this hash table) - Get rid of various #ifdef Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 47 +++---------------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 774d241..b73c88a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -164,52 +163,12 @@ static inline spinlock_t *inet_ehash_lockp( return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; } -static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) -{ - unsigned int i, size = 256; -#if defined(CONFIG_PROVE_LOCKING) - unsigned int nr_pcpus = 2; -#else - unsigned int nr_pcpus = num_possible_cpus(); -#endif - if (nr_pcpus >= 4) - size = 512; - if (nr_pcpus >= 8) - size = 1024; - if (nr_pcpus >= 16) - size = 2048; - if (nr_pcpus >= 32) - size = 4096; - if (sizeof(spinlock_t) != 0) { -#ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) - hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); - else -#endif - hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), - GFP_KERNEL); - if (!hashinfo->ehash_locks) - return ENOMEM; - for (i = 0; i < size; i++) - spin_lock_init(&hashinfo->ehash_locks[i]); - } - hashinfo->ehash_locks_mask = size - 1; - return 0; -} +int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo); static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) { - if (hashinfo->ehash_locks) { -#ifdef CONFIG_NUMA - unsigned int size = (hashinfo->ehash_locks_mask + 1) * - sizeof(spinlock_t); - if (size > PAGE_SIZE) - vfree(hashinfo->ehash_locks); - else -#endif - kfree(hashinfo->ehash_locks); - hashinfo->ehash_locks = NULL; - } + kvfree(hashinfo->ehash_locks); + hashinfo->ehash_locks = NULL; } struct inet_bind_bucket * -- cgit v1.1 From 0f999b09f5c1b135e840501840dbcd01fad66f79 Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Wed, 27 May 2015 09:10:54 +0530 Subject: ieee802154: add set transmit power support This patch adds transmission power setting support for IEEE-802.15.4 devices via nl802154. Signed-off-by: Varka Bhadram Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 4de59aa..2e3bb01 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -44,6 +44,7 @@ struct cfg802154_ops { int (*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel); int (*set_cca_mode)(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca); + int (*set_tx_power)(struct wpan_phy *wpan_phy, s32 power); int (*set_pan_id)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id); int (*set_short_addr)(struct wpan_phy *wpan_phy, -- cgit v1.1 From d6b915e29f4adea94bc02ba7675bb4f84e6a1abd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 22 May 2015 16:32:51 +0200 Subject: ip_fragment: don't forward defragmented DF packet We currently always send fragments without DF bit set. Thus, given following setup: mtu1500 - mtu1500:1400 - mtu1400:1280 - mtu1280 A R1 R2 B Where R1 and R2 run linux with netfilter defragmentation/conntrack enabled, then if Host A sent a fragmented packet _with_ DF set to B, R1 will respond with icmp too big error if one of these fragments exceeded 1400 bytes. However, if R1 receives fragment sizes 1200 and 100, it would forward the reassembled packet without refragmenting, i.e. R2 will send an icmp error in response to a packet that was never sent, citing mtu that the original sender never exceeded. The other minor issue is that a refragmentation on R1 will conceal the MTU of R2-B since refragmentation does not set DF bit on the fragments. This modifies ip_fragment so that we track largest fragment size seen both for DF and non-DF packets, and set frag_max_size to the largest value. If the DF fragment size is larger or equal to the non-df one, we will consider the packet a path mtu probe: We set DF bit on the reassembled skb and also tag it with a new IPCB flag to force refragmentation even if skb fits outdev mtu. We will also set DF bit on each fragment in this case. Joint work with Hannes Frederic Sowa. Reported-by: Jesse Gross Signed-off-by: Florian Westphal Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- include/net/ip.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 8d17655..e1300b3 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,7 +43,7 @@ enum { * @len: total length of the original datagram * @meat: length of received fragments so far * @flags: fragment queue flags - * @max_size: (ipv4 only) maximum received fragment size with IP_DF set + * @max_size: maximum received fragment size * @net: namespace that this frag belongs to */ struct inet_frag_queue { diff --git a/include/net/ip.h b/include/net/ip.h index 7921a36..9b976cf 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -45,6 +45,7 @@ struct inet_skb_parm { #define IPSKB_FRAG_COMPLETE BIT(3) #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) +#define IPSKB_FRAG_PMTU BIT(6) u16 frag_max_size; }; -- cgit v1.1 From b69644c1c72e179738dd5c7e52e99d8550189472 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 27 May 2015 13:42:10 +0200 Subject: nl802154: add support to set cca ed level This patch adds support for setting the current cca ed level value over nl802154. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 2e3bb01..290a9a6 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -44,6 +44,7 @@ struct cfg802154_ops { int (*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel); int (*set_cca_mode)(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca); + int (*set_cca_ed_level)(struct wpan_phy *wpan_phy, s32 ed_level); int (*set_tx_power)(struct wpan_phy *wpan_phy, s32 power); int (*set_pan_id)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id); -- cgit v1.1 From ed2dfd900992aa7b6b3d0abd8ec9a7e9d2c7f827 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 May 2015 11:34:37 -0700 Subject: tcp/dccp: warn user for preferred ip_local_port_range After commit 07f4c90062f8f ("tcp/dccp: try to not exhaust ip_local_port_range in connect()") it is advised to have an even number of ports described in /proc/sys/net/ipv4/ip_local_port_range This means start/end values should have a different parity. Let's warn sysadmins of this, so that they can update their settings if they want to. Suggested-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6848b8b..c68926b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -19,6 +19,7 @@ struct sock; struct local_ports { seqlock_t lock; int range[2]; + bool warned; }; struct ping_group_range { -- cgit v1.1 From db388a567ff9600debc2433c1fddf79a8fc38b21 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 1 Jun 2015 15:36:51 +0200 Subject: mac80211: move TX PN to public part of key struct For drivers supporting TSO or similar features, but that still have PN assignment in software, there's a need to have some memory to store the current PN value. As mac80211 already stores this and it's somewhat complicated to add a per-driver area to the key struct (due to the dynamic sizing thereof) it makes sense to just move the TX PN to the keyconf, i.e. the public part of the key struct. As TKIP is more complicated and we won't able to offload it in this way right now (fast-xmit is skipped for TKIP unless the HW does it all, and our hardware needs MMIC calculation in software) I've not moved that for now - it's possible but requires exposing a lot of the internal TKIP state. As an bonus side effect, we can remove a lot of code by assuming the keyseq struct has a certain layout - with BUILD_BUG_ON to verify it. This might also improve performance, since now TX and RX no longer share a cacheline. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 887fe95..39e864b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1479,6 +1479,9 @@ enum ieee80211_key_flags { * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. * @cipher: The key's cipher suite selector. + * @tx_pn: PN used for TX on non-TKIP keys, may be used by the driver + * as well if it needs to do software PN assignment by itself + * (e.g. due to TSO) * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -1491,6 +1494,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { + atomic64_t tx_pn; u32 cipher; u8 icv_len; u8 iv_len; -- cgit v1.1 From 3b79af973cf42de059d0e90e20fd145d7ed8c5c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 1 Jun 2015 23:14:59 +0200 Subject: mac80211: stop using pointers as userspace cookies Even if the pointers are really only accessible to root and used pretty much only by wpa_supplicant, this is still not great; even for debugging it'd be easier to have something that's easier to read and guaranteed to never get reused. With the recent change to make mac80211 create an ack_skb for the mgmt-tx path this becomes possible, only the client probe method needs to also allocate an ack_skb, and we can store the cookie in that skb. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 39e864b..7466c55 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -875,6 +875,9 @@ struct ieee80211_tx_info { /* 4 bytes free */ } control; struct { + u64 cookie; + } ack; + struct { struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; s32 ack_signal; u8 ampdu_ack_len; -- cgit v1.1 From ea1b2b45f513c6f9ee49b465b1a9281feb783532 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2015 20:15:49 +0200 Subject: mac80211: remove short slot/short preamble incapable flags There are no drivers setting IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE or IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE, so any code using the two flags is dead; it's also exceedingly unlikely that any new driver could ever need to set these flags. The wcn36xx code is almost certainly broken, but this preserves the previous behaviour. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7466c55..68a3cc9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -446,12 +446,8 @@ struct ieee80211_event { * @ibss_creator: indicates if a new IBSS network is being created * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection - * @use_short_preamble: use 802.11b short preamble; - * if the hardware cannot handle this it must set the - * IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE hardware flag - * @use_short_slot: use short slot time (only relevant for ERP); - * if the hardware cannot handle this it must set the - * IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag + * @use_short_preamble: use 802.11b short preamble + * @use_short_slot: use short slot time (only relevant for ERP) * @dtim_period: num of beacons before the next DTIM, for beaconing, * valid in station mode only if after the driver was notified * with the %BSS_CHANGED_BEACON_INFO flag, will be non-zero then. @@ -1784,13 +1780,6 @@ struct ieee80211_txq { * multicast frames when there are power saving stations so that * the driver can fetch them with ieee80211_get_buffered_bc(). * - * @IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE: - * Hardware is not capable of short slot operation on the 2.4 GHz band. - * - * @IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE: - * Hardware is not capable of receiving frames with short preamble on - * the 2.4 GHz band. - * * @IEEE80211_HW_SIGNAL_UNSPEC: * Hardware can provide signal values but we don't know its units. We * expect values between 0 and @max_signal. @@ -1903,8 +1892,6 @@ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING = 1<<2, - IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE = 1<<3, - IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE = 1<<4, IEEE80211_HW_SIGNAL_UNSPEC = 1<<5, IEEE80211_HW_SIGNAL_DBM = 1<<6, IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC = 1<<7, -- cgit v1.1 From c526a467671960922b5cb5fc385a1813602526bc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2015 20:32:00 +0200 Subject: mac80211: rename single hw-scan flag to follow naming convention The naming convention is to always have the flags prefixed with IEEE80211_HW_ so they're 'namespaced', make this flag follow it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 68a3cc9..e09a32c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1885,7 +1885,7 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_CLONED_SKBS: The driver will never modify the payload * or tailroom of TX skbs without copying them first. * - * @IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands + * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands * in one command, mac80211 doesn't have to run separate scans per band. */ enum ieee80211_hw_flags { @@ -1917,7 +1917,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, IEEE80211_HW_CHANCTX_STA_CSA = 1<<28, IEEE80211_HW_SUPPORTS_CLONED_SKBS = 1<<29, - IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS = 1<<30, + IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS = 1<<30, }; /** -- cgit v1.1 From 133be0264f28e59d772c6a259349ba3ee2b183b3 Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Thu, 4 Jun 2015 13:07:36 +0530 Subject: nl802154: export supported commands This patch will export the supported commands by the devices to the userspace. This will be useful to check if HardMAC drivers can support a specific command or not. Signed-off-by: Varka Bhadram Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/nl802154.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 0badebd..6fc231e 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -102,6 +102,8 @@ enum nl802154_attrs { NL802154_ATTR_WPAN_PHY_CAPS, + NL802154_ATTR_SUPPORTED_COMMANDS, + /* add attributes here, update the policy in nl802154.c */ __NL802154_ATTR_AFTER_LAST, -- cgit v1.1 From 42aecaa9bb2bd57eb8d61b4565cee5d3640863fb Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:39 -0700 Subject: net: Get skb hash over flow_keys structure This patch changes flow hashing to use jhash2 over the flow_keys structure instead just doing jhash_3words over src, dst, and ports. This method will allow us take more input into the hashing function so that we can include full IPv6 addresses, VLAN, flow labels etc. without needing to resort to xor'ing which makes for a poor hash. Acked-by: Jiri Pirko Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 21 ++++++++++++++++++--- include/net/ip.h | 2 ++ include/net/ipv6.h | 2 ++ 3 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index bac9c14..cba6a10 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -7,15 +7,24 @@ #include /** + * struct flow_dissector_key_control: + * @thoff: Transport header offset + */ +struct flow_dissector_key_control { + u16 thoff; + u16 padding; +}; + +/** * struct flow_dissector_key_basic: * @thoff: Transport header offset * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) */ struct flow_dissector_key_basic { - u16 thoff; __be16 n_proto; u8 ip_proto; + u8 padding; }; /** @@ -70,6 +79,7 @@ struct flow_dissector_key_eth_addrs { }; enum flow_dissector_key_id { + FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ @@ -109,11 +119,16 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, } struct flow_keys { - struct flow_dissector_key_addrs addrs; - struct flow_dissector_key_ports ports; + struct flow_dissector_key_control control; +#define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_addrs addrs; }; +#define FLOW_KEYS_HASH_OFFSET \ + offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) + extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_buf_dissector; diff --git a/include/net/ip.h b/include/net/ip.h index 9b976cf..16cfc87 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -360,6 +360,8 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; + memset(&keys, 0, sizeof(keys)); + keys.addrs.src = inet->inet_saddr; keys.addrs.dst = inet->inet_daddr; keys.ports.src = inet->inet_sport; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 35d485c..474ca46 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -699,6 +699,8 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; + memset(&keys, 0, sizeof(keys)); + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); keys.ports.src = inet->inet_sport; -- cgit v1.1 From c3f8324188fa80178f20c8209b492ca6191177e8 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:40 -0700 Subject: net: Add full IPv6 addresses to flow_keys This patch adds full IPv6 addresses into flow_keys and uses them as input to the flow hash function. The implementation supports either IPv4 or IPv6 addresses in a union, and selector is used to determine how may words to input to jhash2. We also add flow_get_u32_dst and flow_get_u32_src functions which are used to get a u32 representation of the source and destination addresses. For IPv6, ipv6_addr_hash is called. These functions retain getting the legacy values of src and dst in flow_keys. With this patch, Ethertype and IP protocol are now included in the flow hash input. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 52 ++++++++++++++++++++++++++++---------------- include/net/ip.h | 19 ++++++++++++++-- include/net/ipv6.h | 21 ++++++++++++++++-- 3 files changed, 69 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index cba6a10..306d461 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -12,7 +12,7 @@ */ struct flow_dissector_key_control { u16 thoff; - u16 padding; + u16 addr_type; }; /** @@ -28,19 +28,40 @@ struct flow_dissector_key_basic { }; /** - * struct flow_dissector_key_addrs: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address + * struct flow_dissector_key_ipv4_addrs: + * @src: source ip address + * @dst: destination ip address */ -struct flow_dissector_key_addrs { +struct flow_dissector_key_ipv4_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; __be32 dst; }; /** + * struct flow_dissector_key_ipv6_addrs: + * @src: source ip address + * @dst: destination ip address + */ +struct flow_dissector_key_ipv6_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + struct in6_addr src; + struct in6_addr dst; +}; + +/** + * struct flow_dissector_key_addrs: + * @v4addrs: IPv4 addresses + * @v6addrs: IPv6 addresses + */ +struct flow_dissector_key_addrs { + union { + struct flow_dissector_key_ipv4_addrs v4addrs; + struct flow_dissector_key_ipv6_addrs v6addrs; + }; +}; + +/** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header * src: source port number @@ -56,16 +77,6 @@ struct flow_dissector_key_ports { }; }; -/** - * struct flow_dissector_key_ipv6_addrs: - * @src: source ip address - * @dst: destination ip address - */ -struct flow_dissector_key_ipv6_addrs { - /* (src,dst) must be grouped, in the same way than in IP header */ - struct in6_addr src; - struct in6_addr dst; -}; /** * struct flow_dissector_key_eth_addrs: @@ -81,10 +92,10 @@ struct flow_dissector_key_eth_addrs { enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ - FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ + FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ - FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_MAX, @@ -129,6 +140,9 @@ struct flow_keys { #define FLOW_KEYS_HASH_OFFSET \ offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) +__be32 flow_get_u32_src(const struct flow_keys *flow); +__be32 flow_get_u32_dst(const struct flow_keys *flow); + extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_buf_dissector; diff --git a/include/net/ip.h b/include/net/ip.h index 16cfc87..0750a18 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -355,6 +355,20 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0); } +/* copy IPv4 saddr & daddr to flow_keys, possibly using 64bit load/store + * Equivalent to : flow->v4addrs.src = iph->saddr; + * flow->v4addrs.dst = iph->daddr; + */ +static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, + const struct iphdr *iph) +{ + BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != + offsetof(typeof(flow->addrs), v4addrs.src) + + sizeof(flow->addrs.v4addrs.src)); + memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; +} + static inline void inet_set_txhash(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); @@ -362,8 +376,9 @@ static inline void inet_set_txhash(struct sock *sk) memset(&keys, 0, sizeof(keys)); - keys.addrs.src = inet->inet_saddr; - keys.addrs.dst = inet->inet_daddr; + keys.addrs.v4addrs.src = inet->inet_saddr; + keys.addrs.v4addrs.dst = inet->inet_daddr; + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; keys.ports.src = inet->inet_sport; keys.ports.dst = inet->inet_dport; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 474ca46..82dbdb0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -692,6 +692,20 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, return hlimit; } +/* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store + * Equivalent to : flow->v6addrs.src = iph->saddr; + * flow->v6addrs.dst = iph->daddr; + */ +static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, + const struct ipv6hdr *iph) +{ + BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != + offsetof(typeof(flow->addrs), v6addrs.src) + + sizeof(flow->addrs.v6addrs.src)); + memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs)); + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; +} + #if IS_ENABLED(CONFIG_IPV6) static inline void ip6_set_txhash(struct sock *sk) { @@ -701,8 +715,11 @@ static inline void ip6_set_txhash(struct sock *sk) memset(&keys, 0, sizeof(keys)); - keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); - keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + memcpy(&keys.addrs.v6addrs.src, &np->saddr, + sizeof(keys.addrs.v6addrs.src)); + memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr, + sizeof(keys.addrs.v6addrs.dst)); + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; keys.ports.src = inet->inet_sport; keys.ports.dst = inet->inet_dport; -- cgit v1.1 From 9f24908901c5f4b1e3b07548106b1790af933476 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:41 -0700 Subject: net: Add keys for TIPC address Add a new flow key for TIPC addresses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 306d461..3ee606a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -50,6 +50,14 @@ struct flow_dissector_key_ipv6_addrs { }; /** + * struct flow_dissector_key_tipc_addrs: + * @srcnode: source node address + */ +struct flow_dissector_key_tipc_addrs { + __be32 srcnode; +}; + +/** * struct flow_dissector_key_addrs: * @v4addrs: IPv4 addresses * @v6addrs: IPv6 addresses @@ -58,6 +66,7 @@ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; + struct flow_dissector_key_tipc_addrs tipcaddrs; }; }; @@ -97,6 +106,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ + FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_MAX, }; -- cgit v1.1 From 45b47fd00ca14df869979dbbe14324625ec93552 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:42 -0700 Subject: net: Get rid of IPv6 hash addresses flow keys We don't need to return the IPv6 address hash as part of flow keys. In general, using the IPv6 address hash is risky in a hash value since the underlying use of xor provides no entropy. If someone really needs the hash value they can get it from the full IPv6 addresses in flow keys (e.g. from flow_get_u32_src). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 3ee606a..59f00f9 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -103,7 +103,6 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ -- cgit v1.1 From d34af823ff401c312541aa613c49ea4b872bde9e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:43 -0700 Subject: net: Add VLAN ID to flow_keys In flow_dissector set vlan_id in flow_keys when VLAN is found. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 59f00f9..08480fb 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -27,6 +27,10 @@ struct flow_dissector_key_basic { u8 padding; }; +struct flow_dissector_key_tags { + u32 vlan_id:12; +}; + /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address @@ -106,6 +110,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_MAX, }; @@ -142,6 +147,7 @@ struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; + struct flow_dissector_key_tags tags; struct flow_dissector_key_ports ports; struct flow_dissector_key_addrs addrs; }; -- cgit v1.1 From 87ee9e52ffeb168803a76cc07734425227cc2268 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:44 -0700 Subject: net: Add IPv6 flow label to flow_keys In flow_dissector set the flow label in flow_keys for IPv6. This also removes the shortcircuiting of flow dissection when a non-zero label is present, the flow label can be considered to provide additional entropy for a hash. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 08480fb..14d8483 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -28,7 +28,8 @@ struct flow_dissector_key_basic { }; struct flow_dissector_key_tags { - u32 vlan_id:12; + u32 vlan_id:12, + flow_label:20; }; /** @@ -111,6 +112,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_MAX, }; -- cgit v1.1 From 1fdd512c92003cf2d671ba22753d13302bf8cd1d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:45 -0700 Subject: net: Add GRE keyid in flow_keys In flow dissector if a GRE header contains a keyid this is saved in the new keyid field of flow_keys. The GRE keyid is then represented in the flow hash function input. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 14d8483..6c5e8d2 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -32,6 +32,10 @@ struct flow_dissector_key_tags { flow_label:20; }; +struct flow_dissector_key_keyid { + __be32 keyid; +}; + /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address @@ -113,6 +117,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MAX, }; @@ -150,6 +155,7 @@ struct flow_keys { #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; struct flow_dissector_key_tags tags; + struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_addrs addrs; }; -- cgit v1.1 From b3baa0fbd02a1a9d493d8cb92ae4a4491b9e9d13 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:46 -0700 Subject: mpls: Add MPLS entropy label in flow_keys In flow dissector if an MPLS header contains an entropy label this is saved in the new keyid field of flow_keys. The entropy label is then represented in the flow hash function input. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 6c5e8d2..1a8c224 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -118,6 +118,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MAX, }; -- cgit v1.1 From 90c337da1524863838658078ec34241f45d8394d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 6 Jun 2015 21:17:57 -0700 Subject: inet: add IP_BIND_ADDRESS_NO_PORT to overcome bind(0) limitations When an application needs to force a source IP on an active TCP socket it has to use bind(IP, port=x). As most applications do not want to deal with already used ports, x is often set to 0, meaning the kernel is in charge to find an available port. But kernel does not know yet if this socket is going to be a listener or be connected. It has very limited choices (no full knowledge of final 4-tuple for a connect()) With limited ephemeral port range (about 32K ports), it is very easy to fill the space. This patch adds a new SOL_IP socket option, asking kernel to ignore the 0 port provided by application in bind(IP, port=0) and only remember the given IP address. The port will be automatically chosen at connect() time, in a way that allows sharing a source port as long as the 4-tuples are unique. This new feature is available for both IPv4 and IPv6 (Thanks Neal) Tested: Wrote a test program and checked its behavior on IPv4 and IPv6. strace(1) shows sequences of bind(IP=127.0.0.2, port=0) followed by connect(). Also getsockname() show that the port is still 0 right after bind() but properly allocated after connect(). socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 setsockopt(5, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 connect(5, {sa_family=AF_INET, sin_port=htons(53174), sin_addr=inet_addr("127.0.0.3")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(38050), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 IPv6 test : socket(PF_INET6, SOCK_STREAM, IPPROTO_IP) = 7 setsockopt(7, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 connect(7, {sa_family=AF_INET6, sin6_port=htons(57300), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(60964), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 I was able to bind()/connect() a million concurrent IPv4 sockets, instead of ~32000 before patch. lpaa23:~# ulimit -n 1000010 lpaa23:~# ./bind --connect --num-flows=1000000 & 1000000 sockets lpaa23:~# grep TCP /proc/net/sockstat TCP: inuse 2000063 orphan 0 tw 47 alloc 2000157 mem 66 Check that a given source port is indeed used by many different connections : lpaa23:~# ss -t src :40000 | head -10 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 0 127.0.0.2:40000 127.0.202.33:44983 ESTAB 0 0 127.0.0.2:40000 127.2.27.240:44983 ESTAB 0 0 127.0.0.2:40000 127.2.98.5:44983 ESTAB 0 0 127.0.0.2:40000 127.0.124.196:44983 ESTAB 0 0 127.0.0.2:40000 127.2.139.38:44983 ESTAB 0 0 127.0.0.2:40000 127.1.59.80:44983 ESTAB 0 0 127.0.0.2:40000 127.3.6.228:44983 ESTAB 0 0 127.0.0.2:40000 127.0.38.53:44983 ESTAB 0 0 127.0.0.2:40000 127.1.197.10:44983 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b6c3737..47eb67b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -187,6 +187,7 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; + __u8 bind_address_no_port:1; __u8 rcv_tos; __u8 convert_csum; int uc_index; -- cgit v1.1 From ed65963ba0a2bdc330b1d7183f930d1c6a0a6685 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:46 +0200 Subject: mac802154: remove unneeded vif struct This patch removes the virtual interface structure from sub if data struct, because it isn't used anywhere. This structure could be useful for give per interface information at softmac driver layer. Nevertheless there exist no use case currently and it contains the interface type information currently. This information is also stored inside wpan dev which is now used to check on the wpan dev interface type. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Acked-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 9605c7f..80a9e60 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -56,13 +56,6 @@ struct ieee802154_hw_addr_filt { u8 pan_coord; }; -struct ieee802154_vif { - int type; - - /* must be last */ - u8 drv_priv[0] __aligned(sizeof(void *)); -}; - struct ieee802154_hw { /* filled by the driver */ int extra_tx_headroom; @@ -73,7 +66,6 @@ struct ieee802154_hw { struct ieee802154_hw_addr_filt hw_filt; void *priv; struct wpan_phy *phy; - size_t vif_data_size; }; /* Checksum is in hardware and is omitted from a packet -- cgit v1.1 From 6b70a43c7e0202cf285c864bc9f20f607c42e432 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:47 +0200 Subject: mac802154: cleanup address filtering flags This patch changes the address filtering flags to enums and setting the flag values with the BIT macro. Additional this patch changes the commenting style for matching usual kernel style. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 80a9e60..845e4f8 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -31,18 +31,29 @@ */ #define MAC802154_FRAME_HARD_HEADER_LEN (2 + 1 + 20 + 14) -/* The following flags are used to indicate changed address settings from +/** + * enum ieee802154_hw_addr_filt_flags - hardware address filtering flags + * + * The following flags are used to indicate changed address settings from * the stack to the hardware. + * + * @IEEE802154_AFILT_SADDR_CHANGED: Indicates that the short address will be + * change. + * + * @IEEE802154_AFILT_IEEEADDR_CHANGED: Indicates that the extended address + * will be change. + * + * @IEEE802154_AFILT_PANID_CHANGED: Indicates that the pan id will be change. + * + * @IEEE802154_AFILT_PANC_CHANGED: Indicates that the address filter will + * do frame address filtering as a pan coordinator. */ - -/* indicates that the Short Address changed */ -#define IEEE802154_AFILT_SADDR_CHANGED 0x00000001 -/* indicates that the IEEE Address changed */ -#define IEEE802154_AFILT_IEEEADDR_CHANGED 0x00000002 -/* indicates that the PAN ID changed */ -#define IEEE802154_AFILT_PANID_CHANGED 0x00000004 -/* indicates that PAN Coordinator status changed */ -#define IEEE802154_AFILT_PANC_CHANGED 0x00000008 +enum ieee802154_hw_addr_filt_flags { + IEEE802154_AFILT_SADDR_CHANGED = BIT(1), + IEEE802154_AFILT_IEEEADDR_CHANGED = BIT(2), + IEEE802154_AFILT_PANID_CHANGED = BIT(3), + IEEE802154_AFILT_PANC_CHANGED = BIT(4), +}; struct ieee802154_hw_addr_filt { __le16 pan_id; /* Each independent PAN selects a unique -- cgit v1.1 From f265be3d124a5b62e5a339685b6cfaa0292f1250 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:48 +0200 Subject: mac802154: remove aack hw flag This patch removes the hardware auto acknowdledge flag which indicates that the transceiver supports this handling. This flag is never evaluated inside mac802154 and all transceivers should support this handling by default per hardware. Suggested-by: Lennert Buytenhek Cc: Alan Ott Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Acked-by: Stefan Schmidt Acked-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 845e4f8..a20ba28 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -90,8 +90,6 @@ struct ieee802154_hw { /* Indicates that xmitter will add FCS on it's own. */ #define IEEE802154_HW_TX_OMIT_CKSUM 0x00000001 -/* Indicates that receiver will autorespond with ACK frames. */ -#define IEEE802154_HW_AACK 0x00000002 /* Indicates that transceiver will support listen before transmit. */ #define IEEE802154_HW_LBT 0x00000004 /* Indicates that transceiver will support csma (max_be, min_be, csma retries) -- cgit v1.1 From bcbfd2078d9b11277d9c9ce0c30ba73c750503c9 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:49 +0200 Subject: mac802154: cleanup ieee802154 hardware flags This patch changes the ieee802154 hardware flags to enums and setting the flag values with the BIT macro. Additional this patch changes the commenting style for matching usual kernel style. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 56 +++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index a20ba28..c21a700 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -79,32 +79,48 @@ struct ieee802154_hw { struct wpan_phy *phy; }; -/* Checksum is in hardware and is omitted from a packet +/** + * enum ieee802154_hw_flags - hardware flags * - * These following flags are used to indicate hardware capabilities to + * These flags are used to indicate hardware capabilities to * the stack. Generally, flags here should have their meaning * done in a way that the simplest hardware doesn't need setting * any particular flags. There are some exceptions to this rule, * however, so you are advised to review these flags carefully. + * + * @IEEE802154_HW_TX_OMIT_CKSUM: Indicates that xmitter will add FCS on it's + * own. + * + * @IEEE802154_HW_LBT: Indicates that transceiver will support listen before + * transmit. + * + * @IEEE802154_HW_CSMA_PARAMS: Indicates that transceiver will support csma + * parameters (max_be, min_be, backoff exponents). + * + * @IEEE802154_HW_FRAME_RETRIES: Indicates that transceiver will support ARET + * frame retries setting. + * + * @IEEE802154_HW_AFILT: Indicates that transceiver will support hardware + * address filter setting. + * + * @IEEE802154_HW_PROMISCUOUS: Indicates that transceiver will support + * promiscuous mode setting. + * + * @IEEE802154_HW_RX_OMIT_CKSUM: Indicates that receiver omits FCS. + * + * @IEEE802154_HW_RX_DROP_BAD_CKSUM: Indicates that receiver will not filter + * frames with bad checksum. */ - -/* Indicates that xmitter will add FCS on it's own. */ -#define IEEE802154_HW_TX_OMIT_CKSUM 0x00000001 -/* Indicates that transceiver will support listen before transmit. */ -#define IEEE802154_HW_LBT 0x00000004 -/* Indicates that transceiver will support csma (max_be, min_be, csma retries) - * settings. */ -#define IEEE802154_HW_CSMA_PARAMS 0x00000008 -/* Indicates that transceiver will support ARET frame retries setting. */ -#define IEEE802154_HW_FRAME_RETRIES 0x00000010 -/* Indicates that transceiver will support hardware address filter setting. */ -#define IEEE802154_HW_AFILT 0x00000020 -/* Indicates that transceiver will support promiscuous mode setting. */ -#define IEEE802154_HW_PROMISCUOUS 0x00000040 -/* Indicates that receiver omits FCS. */ -#define IEEE802154_HW_RX_OMIT_CKSUM 0x00000080 -/* Indicates that receiver will not filter frames with bad checksum. */ -#define IEEE802154_HW_RX_DROP_BAD_CKSUM 0x00000100 +enum ieee802154_hw_flags { + IEEE802154_HW_TX_OMIT_CKSUM = BIT(1), + IEEE802154_HW_LBT = BIT(2), + IEEE802154_HW_CSMA_PARAMS = BIT(3), + IEEE802154_HW_FRAME_RETRIES = BIT(4), + IEEE802154_HW_AFILT = BIT(5), + IEEE802154_HW_PROMISCUOUS = BIT(6), + IEEE802154_HW_RX_OMIT_CKSUM = BIT(7), + IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(8), +}; /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ #define IEEE802154_HW_OMIT_CKSUM (IEEE802154_HW_TX_OMIT_CKSUM | \ -- cgit v1.1 From 5661d431c6e65588e813c947117e6d18eb03422f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:50 +0200 Subject: mac802154: remove unused hw_filt attribute This patch removed an attribute from ieee802154_hw structure which is never used inside kernel. Address information are stored inside wpan_dev nowadays. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index c21a700..61d4b80 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -74,7 +74,6 @@ struct ieee802154_hw { struct device *parent; /* filled by mac802154 core */ - struct ieee802154_hw_addr_filt hw_filt; void *priv; struct wpan_phy *phy; }; -- cgit v1.1 From af69a34548cb01aefef76aeb3565cebc7a9fb0d6 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:51 +0200 Subject: mac802154: rearrange attribute in ieee802154_hw This patch removes the priv attribute in ieee802154_hw to the right section which is commented by attributes which needs to be filled by driver layer. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 61d4b80..d8e9e6f 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -72,9 +72,9 @@ struct ieee802154_hw { int extra_tx_headroom; u32 flags; struct device *parent; + void *priv; /* filled by mac802154 core */ - void *priv; struct wpan_phy *phy; }; -- cgit v1.1 From a0825b03aed91dd25d31aaff1e6e85c322caf8b2 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:52 +0200 Subject: mac802154: add missing structure comments This patch add missing comments to internal mac802154 structures. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index d8e9e6f..095c9d0 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -55,18 +55,40 @@ enum ieee802154_hw_addr_filt_flags { IEEE802154_AFILT_PANC_CHANGED = BIT(4), }; +/** + * struct ieee802154_hw_addr_filt - hardware address filtering settings + * + * @pan_id: pan_id which should be set to the hardware address filter. + * + * @short_addr: short_addr which should be set to the hardware address filter. + * + * @ieee_addr: extended address which should be set to the hardware address + * filter. + * + * @pan_coord: boolean if hardware filtering should be operate as coordinator. + */ struct ieee802154_hw_addr_filt { - __le16 pan_id; /* Each independent PAN selects a unique - * identifier. This PAN id allows communication - * between devices within a network using short - * addresses and enables transmissions between - * devices across independent networks. - */ + __le16 pan_id; __le16 short_addr; __le64 ieee_addr; u8 pan_coord; }; +/** + * struct ieee802154_hw - ieee802154 hardware + * + * @extra_tx_headroom: headroom to reserve in each transmit skb for use by the + * driver (e.g. for transmit headers.) + * + * @flags: hardware flags, see &enum ieee802154_hw_flags + * + * @parent: parent device of the hardware. + * + * @priv: pointer to private area that was allocated for driver use along with + * this structure. + * + * @phy: This points to the &struct wpan_phy allocated for this 802.15.4 PHY. + */ struct ieee802154_hw { /* filled by the driver */ int extra_tx_headroom; -- cgit v1.1 From 623c1234a2da2235d6f0bcd09061b6f7776eee93 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:53 +0200 Subject: mac802154: change pan_coord type to bool To indicate if it's a coordinator or not a bool is enough. There should no more values available which represent some other state. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 095c9d0..de1cdde 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -71,7 +71,7 @@ struct ieee802154_hw_addr_filt { __le16 pan_id; __le16 short_addr; __le64 ieee_addr; - u8 pan_coord; + bool pan_coord; }; /** -- cgit v1.1 From b80c0e78582d4a3a004dc1ade4eb06babc6a4eea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Jun 2015 18:30:43 -0700 Subject: tcp: get_cookie_sock() consolidation IPv4 and IPv6 share same implementation of get_cookie_sock(), and there is no point inlining it. We add tcp_ prefix to the common helper name and export it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 2bb2bad..978cebe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -469,6 +469,9 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ +struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); -- cgit v1.1 From b6355e972aaab0173ce11a1650e7dba67f820918 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 6 Jun 2015 13:16:37 +0200 Subject: NFC: nci: Handle proprietary response and notifications Allow for drivers to explicitly define handlers for each proprietary notifications and responses they expect to support. Reviewed-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index d4dcc71..c49688c 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -66,6 +66,12 @@ enum nci_state { struct nci_dev; +struct nci_prop_ops { + __u16 opcode; + int (*rsp)(struct nci_dev *dev, struct sk_buff *skb); + int (*ntf)(struct nci_dev *dev, struct sk_buff *skb); +}; + struct nci_ops { int (*open)(struct nci_dev *ndev); int (*close)(struct nci_dev *ndev); @@ -84,12 +90,16 @@ struct nci_ops { struct sk_buff *skb); void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb); + + struct nci_prop_ops *prop_ops; + size_t n_prop_ops; }; #define NCI_MAX_SUPPORTED_RF_INTERFACES 4 #define NCI_MAX_DISCOVERED_TARGETS 10 #define NCI_MAX_NUM_NFCEE 255 #define NCI_MAX_CONN_ID 7 +#define NCI_MAX_PROPRIETARY_CMD 64 struct nci_conn_info { struct list_head list; @@ -320,6 +330,10 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev) void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb); void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb); +int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb); +int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb); void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb); int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload); int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb); -- cgit v1.1 From c39daeee50eb0b95d3b91bda21b77955a459ee5f Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sat, 6 Jun 2015 13:16:40 +0200 Subject: NFC: nci: Add nci init ops for early device initialization Some device may need to execute some proprietary commands in order to "wake-up"; Before the nci state initialization. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index c49688c..886854a 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -73,6 +73,7 @@ struct nci_prop_ops { }; struct nci_ops { + int (*init)(struct nci_dev *ndev); int (*open)(struct nci_dev *ndev); int (*close)(struct nci_dev *ndev); int (*send)(struct nci_dev *ndev, struct sk_buff *skb); -- cgit v1.1 From 759afb8d288ffbe9a1cdb20af037b5c072dc38b2 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sat, 6 Jun 2015 13:16:41 +0200 Subject: NFC: nci: Add nci_prop_cmd allowing to send proprietary nci cmd Handle allowing to send proprietary nci commands anywhere in the nci state machine. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 886854a..98f18a2 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -275,6 +275,8 @@ int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, unsigned long opt), unsigned long opt, __u32 timeout); +int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload); + int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val); -- cgit v1.1 From 8115dd5905318afcde713726064ec052b7d488cf Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 14 Oct 2014 01:42:23 +0200 Subject: NFC: Introduce vendor commands structures Together with inline routines to associate a vendor commands array with an NFC device. Vendor commands allow vendors to implement their very specific operations from driver code instead of adding new stack ops for non NFC generic commands. Vendors need to select their own unique IDs and use that as a namespace for defining sub commands. Signed-off-by: Samuel Ortiz --- include/net/nfc/hci.h | 7 +++++++ include/net/nfc/nci_core.h | 7 +++++++ include/net/nfc/nfc.h | 22 ++++++++++++++++++++++ 3 files changed, 36 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 020a814..316694d 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -179,6 +179,13 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev); void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata); void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev); +static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev, + struct nfc_vendor_cmd *cmds, + int n_cmds) +{ + return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds); +} + void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err); int nfc_hci_result_to_errno(u8 result); diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 98f18a2..9d77ed5 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -331,6 +331,13 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev) return ndev->driver_data; } +static inline int nci_set_vendor_cmds(struct nci_dev *ndev, + struct nfc_vendor_cmd *cmds, + int n_cmds) +{ + return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds); +} + void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb); void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb); int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 7ac029c..f9e58ae 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -165,6 +165,12 @@ struct nfc_genl_data { struct mutex genl_data_mutex; }; +struct nfc_vendor_cmd { + __u32 vendor_id; + __u32 subcmd; + int (*doit)(struct nfc_dev *dev, void *data, size_t data_len); +}; + struct nfc_dev { int idx; u32 target_next_idx; @@ -193,6 +199,9 @@ struct nfc_dev { struct rfkill *rfkill; + struct nfc_vendor_cmd *vendor_cmds; + int n_vendor_cmds; + struct nfc_ops *ops; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) @@ -296,4 +305,17 @@ struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx); void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, u8 payload_type, u8 direction); +static inline int nfc_set_vendor_cmds(struct nfc_dev *dev, + struct nfc_vendor_cmd *cmds, + int n_cmds) +{ + if (dev->vendor_cmds || dev->n_vendor_cmds) + return -EINVAL; + + dev->vendor_cmds = cmds; + dev->n_vendor_cmds = n_cmds; + + return 0; +} + #endif /* __NET_NFC_H */ -- cgit v1.1 From 8b76ce34c43a569f981623485c1b6c700594678e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 8 Jun 2015 18:14:39 +0300 Subject: Bluetooth: Fix encryption key size handling for LTKs The encryption key size for LTKs is supposed to be applied only at the moment of encryption. When generating a Link Key (using LE SC) from the LTK the full non-shortened value should be used. This patch modifies the code to always keep the full value around and only apply the key size when passing the value to HCI. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a056c2b..24c0e45 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1408,7 +1408,7 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, - __u8 ltk[16]); + __u8 ltk[16], __u8 key_size); void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type); -- cgit v1.1 From 9b4c33364eb653a824c58e637c73caa6feb9879c Mon Sep 17 00:00:00 2001 From: Arron Wang Date: Tue, 9 Jun 2015 17:47:22 +0800 Subject: Bluetooth: Make l2cap_recv_acldata() and sco_recv_scodata() return void The return value of l2cap_recv_acldata() and sco_recv_scodata() are not used, then change it to return void Signed-off-by: Arron Wang Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 24c0e45..f175a51 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -530,10 +530,10 @@ extern struct mutex hci_cb_list_lock; /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); -int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); -int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); +void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ -- cgit v1.1 From ff50e8afc537e66bb3daf5d1cd6628d6b76e7f06 Mon Sep 17 00:00:00 2001 From: Arron Wang Date: Tue, 9 Jun 2015 17:47:23 +0800 Subject: Bluetooth: Move SCO support under BT_BREDR config option SCO/eSCO link is supported by BR/EDR controller, it is suitable to move them under BT_BREDR config option Signed-off-by: Arron Wang Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 11 +++++++++++ include/net/bluetooth/hci_core.h | 12 ++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 7dba805..38d8a34 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -365,8 +365,19 @@ extern struct dentry *bt_debugfs; int l2cap_init(void); void l2cap_exit(void); +#if IS_ENABLED(CONFIG_BT_BREDR) int sco_init(void); void sco_exit(void); +#else +static inline int sco_init(void) +{ + return 0; +} + +static inline void sco_exit(void) +{ +} +#endif int mgmt_init(void); void mgmt_exit(void); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f175a51..3fbb793 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -532,8 +532,20 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +#if IS_ENABLED(CONFIG_BT_BREDR) int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); +#else +static inline int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, + __u8 *flags) +{ + return 0; +} + +static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) +{ +} +#endif /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ -- cgit v1.1 From c2d3955ba322471181ba0e6636ea9bdd9f521239 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Jun 2015 21:10:13 +0200 Subject: mac80211: remove obsolete sentence from documentation FIF_PROMISC_IN_BSS was removed in commit df1404650ccb ("mac80211: remove support for IFF_PROMISC"). Signed-off-by: Jakub Kicinski Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e09a32c..faadb73 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2584,8 +2584,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * * @FIF_OTHER_BSS: pass frames destined to other BSSes * - * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only - * those addressed to this station. + * @FIF_PSPOLL: pass PS Poll frames * * @FIF_PROBE_REQ: pass probe request frames */ -- cgit v1.1 From d446278c408bdebd4103090740ce908c5d6b5ab0 Mon Sep 17 00:00:00 2001 From: Christoffer Holmstedt Date: Wed, 10 Jun 2015 11:03:59 +0200 Subject: nl802154: fix misspelled enum Signed-off-by: Christoffer Holmstedt Signed-off-by: Marcel Holtmann --- include/net/nl802154.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 6fc231e..b0ab530 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -187,7 +187,7 @@ enum nl802154_wpan_phy_capability_attr { * @NL802154_CCA_ENERGY_CARRIER: Carrier sense with energy above threshold * @NL802154_CCA_ALOHA: CCA shall always report an idle medium * @NL802154_CCA_UWB_SHR: UWB preamble sense based on the SHR of a frame - * @NL802154_CCA_UWB_MULTIPEXED: UWB preamble sense based on the packet with + * @NL802154_CCA_UWB_MULTIPLEXED: UWB preamble sense based on the packet with * the multiplexed preamble * @__NL802154_CCA_ATTR_AFTER_LAST: Internal * @NL802154_CCA_ATTR_MAX: Maximum CCA attribute number @@ -199,7 +199,7 @@ enum nl802154_cca_modes { NL802154_CCA_ENERGY_CARRIER, NL802154_CCA_ALOHA, NL802154_CCA_UWB_SHR, - NL802154_CCA_UWB_MULTIPEXED, + NL802154_CCA_UWB_MULTIPLEXED, /* keep last */ __NL802154_CCA_ATTR_AFTER_LAST, -- cgit v1.1 From 30686bf7f5b3c30831761e188a6e3cb33580fa48 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2015 21:39:54 +0200 Subject: mac80211: convert HW flags to unsigned long bitmap As we're running out of hardware capability flags pretty quickly, convert them to use the regular test_bit() style unsigned long bitmaps. This introduces a number of helper functions/macros to set and to test the bits, along with new debugfs code. The occurrences of an explicit __clear_bit() are intentional, the drivers were never supposed to change their supported bits on the fly. We should investigate changing this to be a per-frame flag. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 79 +++++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index faadb73..6b1077c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1887,37 +1887,42 @@ struct ieee80211_txq { * * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands * in one command, mac80211 doesn't have to run separate scans per band. + * + * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { - IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, - IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING = 1<<2, - IEEE80211_HW_SIGNAL_UNSPEC = 1<<5, - IEEE80211_HW_SIGNAL_DBM = 1<<6, - IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC = 1<<7, - IEEE80211_HW_SPECTRUM_MGMT = 1<<8, - IEEE80211_HW_AMPDU_AGGREGATION = 1<<9, - IEEE80211_HW_SUPPORTS_PS = 1<<10, - IEEE80211_HW_PS_NULLFUNC_STACK = 1<<11, - IEEE80211_HW_SUPPORTS_DYNAMIC_PS = 1<<12, - IEEE80211_HW_MFP_CAPABLE = 1<<13, - IEEE80211_HW_WANT_MONITOR_VIF = 1<<14, - IEEE80211_HW_NO_AUTO_VIF = 1<<15, - IEEE80211_HW_SW_CRYPTO_CONTROL = 1<<16, - IEEE80211_HW_SUPPORT_FAST_XMIT = 1<<17, - IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, - IEEE80211_HW_CONNECTION_MONITOR = 1<<19, - IEEE80211_HW_QUEUE_CONTROL = 1<<20, - IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, - IEEE80211_HW_AP_LINK_PS = 1<<22, - IEEE80211_HW_TX_AMPDU_SETUP_IN_HW = 1<<23, - IEEE80211_HW_SUPPORTS_RC_TABLE = 1<<24, - IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF = 1<<25, - IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, - IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, - IEEE80211_HW_CHANCTX_STA_CSA = 1<<28, - IEEE80211_HW_SUPPORTS_CLONED_SKBS = 1<<29, - IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS = 1<<30, + IEEE80211_HW_HAS_RATE_CONTROL, + IEEE80211_HW_RX_INCLUDES_FCS, + IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING, + IEEE80211_HW_SIGNAL_UNSPEC, + IEEE80211_HW_SIGNAL_DBM, + IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC, + IEEE80211_HW_SPECTRUM_MGMT, + IEEE80211_HW_AMPDU_AGGREGATION, + IEEE80211_HW_SUPPORTS_PS, + IEEE80211_HW_PS_NULLFUNC_STACK, + IEEE80211_HW_SUPPORTS_DYNAMIC_PS, + IEEE80211_HW_MFP_CAPABLE, + IEEE80211_HW_WANT_MONITOR_VIF, + IEEE80211_HW_NO_AUTO_VIF, + IEEE80211_HW_SW_CRYPTO_CONTROL, + IEEE80211_HW_SUPPORT_FAST_XMIT, + IEEE80211_HW_REPORTS_TX_ACK_STATUS, + IEEE80211_HW_CONNECTION_MONITOR, + IEEE80211_HW_QUEUE_CONTROL, + IEEE80211_HW_SUPPORTS_PER_STA_GTK, + IEEE80211_HW_AP_LINK_PS, + IEEE80211_HW_TX_AMPDU_SETUP_IN_HW, + IEEE80211_HW_SUPPORTS_RC_TABLE, + IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF, + IEEE80211_HW_TIMING_BEACON_ONLY, + IEEE80211_HW_SUPPORTS_HT_CCK_RATES, + IEEE80211_HW_CHANCTX_STA_CSA, + IEEE80211_HW_SUPPORTS_CLONED_SKBS, + IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS, + + /* keep last, obviously */ + NUM_IEEE80211_HW_FLAGS }; /** @@ -2024,7 +2029,7 @@ struct ieee80211_hw { struct wiphy *wiphy; const char *rate_control_algorithm; void *priv; - u32 flags; + unsigned long flags[BITS_TO_LONGS(NUM_IEEE80211_HW_FLAGS)]; unsigned int extra_tx_headroom; unsigned int extra_beacon_tailroom; int vif_data_size; @@ -2050,6 +2055,20 @@ struct ieee80211_hw { int txq_ac_max_pending; }; +static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, + enum ieee80211_hw_flags flg) +{ + return test_bit(flg, hw->flags); +} +#define ieee80211_hw_check(hw, flg) _ieee80211_hw_check(hw, IEEE80211_HW_##flg) + +static inline void _ieee80211_hw_set(struct ieee80211_hw *hw, + enum ieee80211_hw_flags flg) +{ + return __set_bit(flg, hw->flags); +} +#define ieee80211_hw_set(hw, flg) _ieee80211_hw_set(hw, IEEE80211_HW_##flg) + /** * struct ieee80211_scan_request - hw scan request * -- cgit v1.1 From 37a9a8df8ce9de6ea73349c9ac8bdf6ba4ec4f70 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 10 Jun 2015 08:44:59 -0400 Subject: net/unix: support SCM_SECURITY for stream sockets SCM_SECURITY was originally only implemented for datagram sockets, not for stream sockets. However, SCM_CREDENTIALS is supported on Unix stream sockets. For consistency, implement Unix stream support for SCM_SECURITY as well. Also clean up the existing code and get rid of the superfluous UNIXSID macro. Motivated by https://bugzilla.redhat.com/show_bug.cgi?id=1224211, where systemd was using SCM_CREDENTIALS and assumed wrongly that SCM_SECURITY was also supported on Unix stream sockets. Signed-off-by: Stephen Smalley Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a175ba4..4a167b3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -39,7 +39,6 @@ struct unix_skb_parms { }; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define UNIXSID(skb) (&UNIXCB((skb)).secid) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -- cgit v1.1 From 9961127d4bce6325e9a0b0fb105e0c85a6c62cb7 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 11:25:47 +0200 Subject: NFC: nci: add generic uart support Some NFC controller supports UART as host interface. As with SPI, a lot of code can be shared between vendor drivers. This patch add the generic support of UART and provides some extension API for vendor specific needs. This code is strongly inspired by the Bluetooth HCI ldisc implementation. NCI UART vendor drivers will have to register themselves to this layer via nci_uart_register. Underlying tty will have to be configured from user land thanks to an ioctl. Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci.h | 1 + include/net/nfc/nci_core.h | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h index a2f2f3d..75d2e18 100644 --- a/include/net/nfc/nci.h +++ b/include/net/nfc/nci.h @@ -35,6 +35,7 @@ #define NCI_MAX_NUM_RF_CONFIGS 10 #define NCI_MAX_NUM_CONN 10 #define NCI_MAX_PARAM_LEN 251 +#define NCI_MAX_PACKET_SIZE 258 /* NCI Status Codes */ #define NCI_STATUS_OK 0x00 diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 9d77ed5..01fc8c5 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -391,4 +392,50 @@ int nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb); struct sk_buff *nci_spi_read(struct nci_spi *nspi); +/* ----- NCI UART ---- */ + +/* Ioctl */ +#define NCIUARTSETDRIVER _IOW('U', 0, char *) + +enum nci_uart_driver { + NCI_UART_DRIVER_MARVELL = 0, + NCI_UART_DRIVER_MAX +}; + +struct nci_uart; + +struct nci_uart_ops { + int (*open)(struct nci_uart *nci_uart); + void (*close)(struct nci_uart *nci_uart); + int (*recv)(struct nci_uart *nci_uart, struct sk_buff *skb); + int (*recv_buf)(struct nci_uart *nci_uart, const u8 *data, char *flags, + int count); + int (*send)(struct nci_uart *nci_uart, struct sk_buff *skb); + void (*tx_start)(struct nci_uart *nci_uart); + void (*tx_done)(struct nci_uart *nci_uart); +}; + +struct nci_uart { + struct module *owner; + struct nci_uart_ops ops; + const char *name; + enum nci_uart_driver driver; + + /* Dynamic data */ + struct nci_dev *ndev; + spinlock_t rx_lock; + struct work_struct write_work; + struct tty_struct *tty; + unsigned long tx_state; + struct sk_buff_head tx_q; + struct sk_buff *tx_skb; + struct sk_buff *rx_skb; + int rx_packet_len; + void *drv_data; +}; + +int nci_uart_register(struct nci_uart *nu); +void nci_uart_unregister(struct nci_uart *nu); +void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl); + #endif /* __NCI_CORE_H */ -- cgit v1.1 From f69ad292cfd13aa7ee00847320c6bb9ba2154e87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jun 2015 09:15:18 -0700 Subject: tcp: fill shinfo->gso_size at last moment In commit cd7d8498c9a5 ("tcp: change tcp_skb_pcount() location") we stored gso_segs in a temporary cache hot location. This patch does the same for gso_size. This allows to save 2 cache line misses in tcp xmit path for the last packet that is considered but not sent because of various conditions (cwnd, tso defer, receiver window, TSQ...) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 978cebe..950cfec 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -730,11 +730,14 @@ struct tcp_skb_cb { /* Note : tcp_tw_isn is used in input path only * (isn chosen by tcp_timewait_state_process()) * - * tcp_gso_segs is used in write queue only, - * cf tcp_skb_pcount() + * tcp_gso_segs/size are used in write queue only, + * cf tcp_skb_pcount()/tcp_skb_mss() */ __u32 tcp_tw_isn; - __u32 tcp_gso_segs; + struct { + u16 tcp_gso_segs; + u16 tcp_gso_size; + }; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ @@ -790,10 +793,10 @@ static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs) TCP_SKB_CB(skb)->tcp_gso_segs += segs; } -/* This is valid iff tcp_skb_pcount() > 1. */ +/* This is valid iff skb is in write queue and tcp_skb_pcount() > 1. */ static inline int tcp_skb_mss(const struct sk_buff *skb) { - return skb_shinfo(skb)->gso_size; + return TCP_SKB_CB(skb)->tcp_gso_size; } /* Events passed to congestion control interface */ -- cgit v1.1 From 821f37666815c9f3a7a4d195ce9184ad4d084942 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 11 Jun 2015 13:52:29 +0300 Subject: Bluetooth: Read encryption key size for BR/EDR connections Since Bluetooth 3.0 there's a HCI command available for reading the encryption key size of an BR/EDR connection. This information is essential e.g. for generating an LTK using SMP over BR/EDR, so store it as part of struct hci_conn. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index d95da83..7ca6690 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1202,6 +1202,16 @@ struct hci_rp_read_clock { __le16 accuracy; } __packed; +#define HCI_OP_READ_ENC_KEY_SIZE 0x1408 +struct hci_cp_read_enc_key_size { + __le16 handle; +} __packed; +struct hci_rp_read_enc_key_size { + __u8 status; + __le16 handle; + __u8 key_size; +} __packed; + #define HCI_OP_READ_LOCAL_AMP_INFO 0x1409 struct hci_rp_read_local_amp_info { __u8 status; -- cgit v1.1 From fe89e69050489884b304ea67b580056395dbd2b1 Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Fri, 12 Jun 2015 09:13:49 +0530 Subject: mac802154: cleanup llsec param flags This patch changes the setting of llsec param flag with the BIT macro. Signed-off-by: Varka Bhadram Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 0a87975..2c10a9f 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -346,15 +346,15 @@ struct ieee802154_mac_params { struct wpan_phy; enum { - IEEE802154_LLSEC_PARAM_ENABLED = 1 << 0, - IEEE802154_LLSEC_PARAM_FRAME_COUNTER = 1 << 1, - IEEE802154_LLSEC_PARAM_OUT_LEVEL = 1 << 2, - IEEE802154_LLSEC_PARAM_OUT_KEY = 1 << 3, - IEEE802154_LLSEC_PARAM_KEY_SOURCE = 1 << 4, - IEEE802154_LLSEC_PARAM_PAN_ID = 1 << 5, - IEEE802154_LLSEC_PARAM_HWADDR = 1 << 6, - IEEE802154_LLSEC_PARAM_COORD_HWADDR = 1 << 7, - IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = 1 << 8, + IEEE802154_LLSEC_PARAM_ENABLED = BIT(0), + IEEE802154_LLSEC_PARAM_FRAME_COUNTER = BIT(1), + IEEE802154_LLSEC_PARAM_OUT_LEVEL = BIT(2), + IEEE802154_LLSEC_PARAM_OUT_KEY = BIT(3), + IEEE802154_LLSEC_PARAM_KEY_SOURCE = BIT(4), + IEEE802154_LLSEC_PARAM_PAN_ID = BIT(5), + IEEE802154_LLSEC_PARAM_HWADDR = BIT(6), + IEEE802154_LLSEC_PARAM_COORD_HWADDR = BIT(7), + IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = BIT(8), }; struct ieee802154_llsec_ops { -- cgit v1.1 From 70f36507d0c3277e4a5424ca0c6c2a002ae42768 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 12 Jun 2015 09:24:00 +0200 Subject: mac802154: fix flags BIT definitions order This patch fixes commits ("mac802154: cleanup ieee802154 hardware flags") bcbfd2078d9b11277d9c9ce0c30ba73c750503c9 and ("mac802154: cleanup address filtering flags") 6b70a43c7e0202cf285c864bc9f20f607c42e432 by starting the flags definitions at BIT(0) which is same like the previous behaviour. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index de1cdde..f534a46 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -49,10 +49,10 @@ * do frame address filtering as a pan coordinator. */ enum ieee802154_hw_addr_filt_flags { - IEEE802154_AFILT_SADDR_CHANGED = BIT(1), - IEEE802154_AFILT_IEEEADDR_CHANGED = BIT(2), - IEEE802154_AFILT_PANID_CHANGED = BIT(3), - IEEE802154_AFILT_PANC_CHANGED = BIT(4), + IEEE802154_AFILT_SADDR_CHANGED = BIT(0), + IEEE802154_AFILT_IEEEADDR_CHANGED = BIT(1), + IEEE802154_AFILT_PANID_CHANGED = BIT(2), + IEEE802154_AFILT_PANC_CHANGED = BIT(3), }; /** @@ -133,14 +133,14 @@ struct ieee802154_hw { * frames with bad checksum. */ enum ieee802154_hw_flags { - IEEE802154_HW_TX_OMIT_CKSUM = BIT(1), - IEEE802154_HW_LBT = BIT(2), - IEEE802154_HW_CSMA_PARAMS = BIT(3), - IEEE802154_HW_FRAME_RETRIES = BIT(4), - IEEE802154_HW_AFILT = BIT(5), - IEEE802154_HW_PROMISCUOUS = BIT(6), - IEEE802154_HW_RX_OMIT_CKSUM = BIT(7), - IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(8), + IEEE802154_HW_TX_OMIT_CKSUM = BIT(0), + IEEE802154_HW_LBT = BIT(1), + IEEE802154_HW_CSMA_PARAMS = BIT(2), + IEEE802154_HW_FRAME_RETRIES = BIT(3), + IEEE802154_HW_AFILT = BIT(4), + IEEE802154_HW_PROMISCUOUS = BIT(5), + IEEE802154_HW_RX_OMIT_CKSUM = BIT(6), + IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(7), }; /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ -- cgit v1.1 From 2d45a02d0166caf2627fe91897c6ffc3b19514c4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 12 Jun 2015 10:16:41 -0300 Subject: sctp: fix ASCONF list handling ->auto_asconf_splist is per namespace and mangled by functions like sctp_setsockopt_auto_asconf() which doesn't guarantee any serialization. Also, the call to inet_sk_copy_descendant() was backuping ->auto_asconf_list through the copy but was not honoring ->do_auto_asconf, which could lead to list corruption if it was different between both sockets. This commit thus fixes the list handling by using ->addr_wq_lock spinlock to protect the list. A special handling is done upon socket creation and destruction for that. Error handlig on sctp_init_sock() will never return an error after having initialized asconf, so sctp_destroy_sock() can be called without addrq_wq_lock. The lock now will be take on sctp_close_sock(), before locking the socket, so we don't do it in inverse order compared to sctp_addr_wq_timeout_handler(). Instead of taking the lock on sctp_sock_migrate() for copying and restoring the list values, it's preferred to avoid rewritting it by implementing sctp_copy_descendant(). Issue was found with a test application that kept flipping sysctl default_auto_asconf on and off, but one could trigger it by issuing simultaneous setsockopt() calls on multiple sockets or by creating/destroying sockets fast enough. This is only triggerable locally. Fixes: 9f7d653b67ae ("sctp: Add Auto-ASCONF support (core).") Reported-by: Ji Jianwen Suggested-by: Neil Horman Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 1 + include/net/sctp/structs.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 3573a81..8ba379f 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -31,6 +31,7 @@ struct netns_sctp { struct list_head addr_waitq; struct timer_list addr_wq_timer; struct list_head auto_asconf_splist; + /* Lock that protects both addr_waitq and auto_asconf_splist */ spinlock_t addr_wq_lock; /* Lock that protects the local_addr_list writers */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2bb2fcf5..495c87e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -223,6 +223,10 @@ struct sctp_sock { atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; + + /* These must be the last fields, as they will skipped on copies, + * like on accept and peeloff operations + */ struct list_head auto_asconf_list; int do_auto_asconf; }; -- cgit v1.1 From 2cbce139fc57bc2625f88add055d0b94f00c3352 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 12 Jun 2015 13:55:41 +0200 Subject: netfilter: nf_tables: attach net_device to basechain The device is part of the hook configuration, so instead of a global configuration per table, set it to each of the basechain that we create. This patch reworks ebddf1a8d78a ("netfilter: nf_tables: allow to bind table to net_device"). Note that this adds a dev_name field in the nft_base_chain structure which is required the netdev notification subscription that follows up in a patch to handle gone net_devices. Suggested-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d6f48c..09d6f8d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -791,6 +791,7 @@ struct nft_stats { * @policy: default policy * @stats: per-cpu chain stats * @chain: the chain + * @dev_name: device name that this base chain is attached to (if any) */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; @@ -799,6 +800,7 @@ struct nft_base_chain { u8 policy; struct nft_stats __percpu *stats; struct nft_chain chain; + char dev_name[IFNAMSIZ]; }; static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) @@ -819,7 +821,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @name: name of the table - * @dev: this table is bound to this device (if any) */ struct nft_table { struct list_head list; @@ -829,7 +830,6 @@ struct nft_table { u32 use; u16 flags; char name[NFT_TABLE_MAXNAMELEN]; - struct net_device *dev; }; enum nft_af_flags { -- cgit v1.1 From 835b803377f5f11f9ccf234f70ed667a82605c45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 Jun 2015 12:12:01 +0200 Subject: netfilter: nf_tables_netdev: unregister hooks on net_device removal In case the net_device is gone, we have to unregister the hooks and put back the reference on the net_device object. Once it comes back, register them again. This also covers the device rename case. This patch also adds a new flag to indicate that the basechain is disabled, so their hooks are not registered. This flag is used by the netdev family to handle the case where the net_device object is gone. Currently this flag is not exposed to userspace. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 09d6f8d..2a24668 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -781,6 +781,7 @@ struct nft_stats { }; #define NFT_HOOK_OPS_MAX 2 +#define NFT_BASECHAIN_DISABLED (1 << 0) /** * struct nft_base_chain - nf_tables base chain @@ -798,6 +799,7 @@ struct nft_base_chain { possible_net_t pnet; const struct nf_chain_type *type; u8 policy; + u8 flags; struct nft_stats __percpu *stats; struct nft_chain chain; char dev_name[IFNAMSIZ]; @@ -808,6 +810,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } +int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops); +void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops); + unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops); -- cgit v1.1 From eb4cb008529ca08e0d8c0fa54e8f739520197a65 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:18 -0400 Subject: sock_diag: define destruction multicast groups These groups will contain socket-destruction events for AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP. Near the end of socket destruction, a check for listeners is performed. In the presence of a listener, rather than completely cleanup the socket, a unit of work will be added to a private work queue which will first broadcast information about the socket and then finish the cleanup operation. Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 26c1c31..3e82586 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); +void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, -- cgit v1.1 From d2609b345ebf0547015a78588c4d7ad68c9ccf26 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:34 +0200 Subject: Bluetooth: hci_core/mgmt: Introduce multi-adv list The current hci dev structure only supports a single advertising instance. To support multi-instance advertising it is necessary to introduce a linked list of advertising instances so that multiple advertising instances can be dynamically added and/or removed. In a first step, the existing adv_instance member of the hci_dev struct is supplemented by a linked list of advertising instances. This patch introduces the list and supporting list management infrastructure. The list is not being used yet. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3fbb793..4242dbf 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -157,15 +157,20 @@ struct oob_data { struct adv_info { struct delayed_work timeout_exp; + struct list_head list; __u8 instance; __u32 flags; __u16 timeout; + __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_AD_LENGTH]; __u16 scan_rsp_len; __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; }; +#define HCI_MAX_ADV_INSTANCES 1 +#define HCI_DEFAULT_ADV_DURATION 2 + #define HCI_MAX_SHORT_NAME_LENGTH 10 /* Default LE RPA expiry time, 15 minutes */ @@ -374,6 +379,9 @@ struct hci_dev { __u8 scan_rsp_data_len; struct adv_info adv_instance; + struct list_head adv_instances; + unsigned int adv_instance_cnt; + __u8 cur_adv_instance; __u8 irk[16]; __u32 rpa_timeout; @@ -1019,6 +1027,15 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); +void hci_adv_instances_clear(struct hci_dev *hdev); +struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); +struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); +int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, + u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data, + u16 timeout, u16 duration); +int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); -- cgit v1.1 From 5d900e4601391576a3c0644d7fcad1ebf41a516e Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:35 +0200 Subject: Bluetooth: hci_core/mgmt: move adv timeout to hdev Currently the delayed work managing advertising duration and timeout is part of the advertising instance structure. This is not correct as only a single instance can be advertised at any given time. To implement round robin advertising a single delayed work structure is needed. To fix this the delayed work structure is being moved to the hci_dev structure. The instance specific variable is renamed to "remaining_time" to make it clear that this is the remaining lifetime of the instance and not the current advertising timeout. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4242dbf..b53e1b1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -156,11 +156,11 @@ struct oob_data { }; struct adv_info { - struct delayed_work timeout_exp; struct list_head list; __u8 instance; __u32 flags; __u16 timeout; + __u16 remaining_time; __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_AD_LENGTH]; @@ -382,6 +382,8 @@ struct hci_dev { struct list_head adv_instances; unsigned int adv_instance_cnt; __u8 cur_adv_instance; + __u16 adv_instance_timeout; + struct delayed_work adv_instance_expire; __u8 irk[16]; __u32 rpa_timeout; @@ -1379,6 +1381,7 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err); int mgmt_powered(struct hci_dev *hdev, u8 powered); int mgmt_update_adv_data(struct hci_dev *hdev); void mgmt_discoverable_timeout(struct hci_dev *hdev); +void mgmt_adv_timeout_expired(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, -- cgit v1.1 From fffd38bca51c9a1c00508b754ab66edb6f39cf37 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:47 +0200 Subject: Bluetooth: mgmt/hci_core: multi-adv for add_advertising*() The add_advertising() and add_advertising_complete() functions reference the now obsolete hdev->adv_instance struct. Both methods are being refactored to access the dynamic advertising instance list instead. This patch also introduces all logic necessary to actually deal with multiple instance advertising. Notably the mgmt_adv_inst_expired() and schedule_adv_inst() method are being referenced to schedule instances in a round robin fashion. This patch also introduces a "pending" flag into the adv_info struct. This is necessary to identify and remove recently added advertising instances when the HCI commands return with an error status code. Otherwise new advertising instances could be leaked without properly informing userspace about their existence. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b53e1b1..4f58a0e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -157,6 +157,7 @@ struct oob_data { struct adv_info { struct list_head list; + bool pending; __u8 instance; __u32 flags; __u16 timeout; -- cgit v1.1 From d4c5af8f71c8104504a83f7c71911550ebe43ac3 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:52 +0200 Subject: Bluetooth: hci_core: remove obsolete adv_instance Now that the obsolete adv_instance is no longer being referenced anywhere in the code it can be removed without breaking the build. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4f58a0e..a6cec6d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -379,7 +379,6 @@ struct hci_dev { __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; __u8 scan_rsp_data_len; - struct adv_info adv_instance; struct list_head adv_instances; unsigned int adv_instance_cnt; __u8 cur_adv_instance; @@ -584,11 +583,6 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) hdev->discovery.scan_duration = 0; } -static inline void adv_info_init(struct hci_dev *hdev) -{ - memset(&hdev->adv_instance, 0, sizeof(struct adv_info)); -} - bool hci_discovery_active(struct hci_dev *hdev); void hci_discovery_set_state(struct hci_dev *hdev, int state); -- cgit v1.1 From db25be6657a56ba2d68aae1f90d796f527f65689 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:53 +0200 Subject: Bluetooth: hci_core: increase max adv inst Now that all preconditions are present for actual multi-advertising, the number of allowed advertising instances can be larger than one. This patch increases the number of allowed advertising instances to 5. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a6cec6d..3bd618d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -169,7 +169,7 @@ struct adv_info { __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; }; -#define HCI_MAX_ADV_INSTANCES 1 +#define HCI_MAX_ADV_INSTANCES 5 #define HCI_DEFAULT_ADV_DURATION 2 #define HCI_MAX_SHORT_NAME_LENGTH 10 -- cgit v1.1 From 230ac490f7fba2aea52914c69d14b15dd515e49c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Jun 2015 14:07:03 +0200 Subject: netfilter: bridge: split ipv6 code into separated file Resolve compilation breakage when CONFIG_IPV6 is not set by moving the IPv6 code into a separated br_netfilter_ipv6.c file. Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets") Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/br_netfilter.h | 60 ++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 2aa6048..bab824b 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -1,6 +1,66 @@ #ifndef _BR_NETFILTER_H_ #define _BR_NETFILTER_H_ +#include "../../../net/bridge/br_private.h" + +static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) +{ + skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + + if (likely(skb->nf_bridge)) + atomic_set(&(skb->nf_bridge->use), 1); + + return skb->nf_bridge; +} + +void nf_bridge_update_protocol(struct sk_buff *skb); + +static inline struct nf_bridge_info * +nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + +unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb); + +static inline void nf_bridge_push_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_push(skb, len); + skb->network_header -= len; +} + +int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb); + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; +} + +struct net_device *setup_pre_routing(struct sk_buff *skb); void br_netfilter_enable(void); +#if IS_ENABLED(CONFIG_IPV6) +int br_validate_ipv6(struct sk_buff *skb); +unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state); +#else +static inline int br_validate_ipv6(struct sk_buff *skb) +{ + return -1; +} + +static inline unsigned int +br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return NF_DROP; +} +#endif + #endif /* _BR_NETFILTER_H_ */ -- cgit v1.1 From 04c52dec1473c5dff9d07cd39a68c9b23def6c42 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:25 -0500 Subject: net: include missing headers in net/net_namespace.h Include linux/idr.h and linux/skbuff.h since they are required by objects that are declared in the net structure. struct net { ... struct idr netns_ids; ... struct sk_buff_head wext_nlevents; ... Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/net/net_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 72eb237..e951453 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -28,6 +28,8 @@ #include #include #include +#include +#include struct user_namespace; struct proc_dir_entry; -- cgit v1.1 From 10c04a8e715cca824f96bcbf4af07f5a40985357 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:26 -0500 Subject: netfilter: use forward declaration instead of including linux/proc_fs.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to pull the full definitions in that file, a simple forward declaration is enough. Moreover, include linux/procfs.h from nf_synproxy_core, otherwise this hits a compilation error due to missing declarations, ie. net/netfilter/nf_synproxy_core.c: In function ‘synproxy_proc_init’: net/netfilter/nf_synproxy_core.c:326:2: error: implicit declaration of function ‘proc_create’ [-Werror=implicit-function-declaration] if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat, ^ Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/net/netns/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 8874002..cf25b5e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,9 +1,9 @@ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H -#include #include +struct proc_dir_entry; struct nf_logger; struct netns_nf { -- cgit v1.1 From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:27 -0500 Subject: netfilter: don't pull include/linux/netfilter.h from netns headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pulls the full hook netfilter definitions from all those that include net_namespace.h. Instead let's just include the bare minimum required in the new linux/netfilter_defs.h file, and use it from the netfilter netns header files. I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit this compilation error: In file included from include/linux/netfilter_defs.h:4:0, from include/net/netns/netfilter.h:4, from include/net/net_namespace.h:22, from include/linux/netdevice.h:43, from net/netfilter/nfnetlink_queue_core.c:23: include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in; And also explicit include linux/netfilter.h in several spots. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/net/netns/netfilter.h | 2 +- include/net/netns/x_tables.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cf25b5e..532e4ba 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,7 +1,7 @@ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H -#include +#include struct proc_dir_entry; struct nf_logger; diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 4d6597a..c8a7681 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -2,7 +2,7 @@ #define __NETNS_X_TABLES_H #include -#include +#include struct ebt_table; -- cgit v1.1 From 638579f00a9b810ae37c7086b0d20634c1e0234e Mon Sep 17 00:00:00 2001 From: Zhaowei Yuan Date: Wed, 17 Jun 2015 17:56:27 +0800 Subject: net: Update out-of-date comment Struct inet_proto no longer exists, so update the comment which is out of date. Signed-off-by: Zhaowei Yuan Signed-off-by: David S. Miller --- include/net/sock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 3e82586..14d539c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -926,7 +926,6 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size) /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface - * transport -> network interface is defined by struct inet_proto */ struct proto { void (*close)(struct sock *sk, -- cgit v1.1 From 8405a8fff3f8545c888a872d6e3c0c8eecd4d348 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 14:03:39 -0500 Subject: netfilter: nf_qeueue: Drop queue entries on nf_unregister_hook Add code to nf_unregister_hook to flush the nf_queue when a hook is unregistered. This guarantees that the pointer that the nf_queue code retains into the nf_hook list will remain valid while a packet is queued. I tested what would happen if we do not flush queued packets and was trivially able to obtain the oops below. All that was required was to stop the nf_queue listening process, to delete all of the nf_tables, and to awaken the nf_queue listening process. > BUG: unable to handle kernel paging request at 0000000100000001 > IP: [<0000000100000001>] 0x100000001 > PGD b9c35067 PUD 0 > Oops: 0010 [#1] SMP > Modules linked in: > CPU: 0 PID: 519 Comm: lt-nfqnl_test Not tainted > task: ffff8800b9c8c050 ti: ffff8800ba9d8000 task.ti: ffff8800ba9d8000 > RIP: 0010:[<0000000100000001>] [<0000000100000001>] 0x100000001 > RSP: 0018:ffff8800ba9dba40 EFLAGS: 00010a16 > RAX: ffff8800bab48a00 RBX: ffff8800ba9dba90 RCX: ffff8800ba9dba90 > RDX: ffff8800b9c10128 RSI: ffff8800ba940900 RDI: ffff8800bab48a00 > RBP: ffff8800b9c10128 R08: ffffffff82976660 R09: ffff8800ba9dbb28 > R10: dead000000100100 R11: dead000000200200 R12: ffff8800ba940900 > R13: ffffffff8313fd50 R14: ffff8800b9c95200 R15: 0000000000000000 > FS: 00007fb91fc34700(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000100000001 CR3: 00000000babfb000 CR4: 00000000000007f0 > Stack: > ffffffff8206ab0f ffffffff82982240 ffff8800bab48a00 ffff8800b9c100a8 > ffff8800b9c10100 0000000000000001 ffff8800ba940900 ffff8800b9c10128 > ffffffff8206bd65 ffff8800bfb0d5e0 ffff8800bab48a00 0000000000014dc0 > Call Trace: > [] ? nf_iterate+0x4f/0xa0 > [] ? nf_reinject+0x125/0x190 > [] ? nfqnl_recv_verdict+0x255/0x360 > [] ? nla_parse+0x80/0xf0 > [] ? nfnetlink_rcv_msg+0x13c/0x240 > [] ? __memcg_kmem_get_cache+0x4c/0x150 > [] ? nfnl_lock+0x20/0x20 > [] ? netlink_rcv_skb+0xa9/0xc0 > [] ? netlink_unicast+0x12f/0x1c0 > [] ? netlink_sendmsg+0x28e/0x650 > [] ? sock_sendmsg+0x44/0x50 > [] ? ___sys_sendmsg+0x2ab/0x2c0 > [] ? __wake_up+0x43/0x70 > [] ? tty_write+0x1c4/0x2a0 > [] ? __sys_sendmsg+0x44/0x80 > [] ? system_call_fastpath+0x12/0x6a > Code: Bad RIP value. > RIP [<0000000100000001>] 0x100000001 > RSP > CR2: 0000000100000001 > ---[ end trace 08eb65d42362793f ]--- Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/netfilter/nf_queue.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d81d584..e863585 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -24,6 +24,8 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + struct nf_hook_ops *ops); }; void nf_register_queue_handler(const struct nf_queue_handler *qh); -- cgit v1.1 From 3e3a78b49508e58f798cf519876bbb9ca0f931af Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 22 Jun 2015 00:27:16 -0700 Subject: switchdev: rename vlan vid_start to vid_begin Use vid_begin/end to be consistent with BRIDGE_VLAN_INFO_RANGE_BEGIN/END. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 437f8fe..d5671f1 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -57,7 +57,7 @@ struct switchdev_obj { union { struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; - u16 vid_start; + u16 vid_begin; u16 vid_end; } vlan; struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ -- cgit v1.1 From 8a3d03166f19329b46c6f9e900f93a89f446077b Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 23 Jun 2015 13:45:36 -0400 Subject: net: track link-status of ipv4 nexthops Add a fib flag called RTNH_F_LINKDOWN to any ipv4 nexthops that are reachable via an interface where carrier is off. No action is taken, but additional flags are passed to userspace to indicate carrier status. This also includes a cleanup to fib_disable_ip to more clearly indicate what event made the function call to replace the more cryptic force option previously used. v2: Split out kernel functionality into 2 patches, this patch simply sets and clears new nexthop flag RTNH_F_LINKDOWN. v3: Cleanups suggested by Alex as well as a bug noticed in fib_sync_down_dev and fib_sync_up when multipath was not enabled. v5: Whitespace and variable declaration fixups suggested by Dave. v6: Style fixups noticed by Dave; ran checkpatch to be sure I got them all. Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/ip_fib.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 54271ed..f73d27c 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -305,9 +305,9 @@ void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); -int fib_sync_down_dev(struct net_device *dev, int force); +int fib_sync_down_dev(struct net_device *dev, unsigned long event); int fib_sync_down_addr(struct net *net, __be32 local); -int fib_sync_up(struct net_device *dev); +int fib_sync_up(struct net_device *dev, unsigned int nh_flags); void fib_select_multipath(struct fib_result *res); /* Exported by fib_trie.c */ -- cgit v1.1 From 0eeb075fad736fb92620af995c47c204bbb5e829 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 23 Jun 2015 13:45:37 -0400 Subject: net: ipv4 sysctl option to ignore routes when nexthop link is down This feature is only enabled with the new per-interface or ipv4 global sysctls called 'ignore_routes_with_linkdown'. net.ipv4.conf.all.ignore_routes_with_linkdown = 0 net.ipv4.conf.default.ignore_routes_with_linkdown = 0 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, will report to userspace that a route is dead and will no longer resolve to this nexthop when performing a fib lookup. This will signal to userspace that the route will not be selected. The signalling of a RTNH_F_DEAD is only passed to userspace if the sysctl is enabled and link is down. This was done as without it the netlink listeners would have no idea whether or not a nexthop would be selected. The kernel only sets RTNH_F_DEAD internally if the interface has IFF_UP cleared. With the new sysctl set, the following behavior can be observed (interface p8p1 is link-down): default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 dead linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 dead linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 90.0.0.1 via 70.0.0.2 dev p7p1 src 70.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 via 10.0.5.2 dev p9p1 src 10.0.5.15 cache While the route does remain in the table (so it can be modified if needed rather than being wiped away as it would be if IFF_UP was cleared), the proper next-hop is chosen automatically when the link is down. Now interface p8p1 is linked-up: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 192.168.56.0/24 dev p2p1 proto kernel scope link src 192.168.56.2 90.0.0.1 via 80.0.0.2 dev p8p1 src 80.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 dev p8p1 src 80.0.0.1 cache and the output changes to what one would expect. If the sysctl is not set, the following output would be expected when p8p1 is down: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 Since the dead flag does not appear, there should be no expectation that the kernel would skip using this route due to link being down. v2: Split kernel changes into 2 patches, this actually makes a behavioral change if the sysctl is set. Also took suggestion from Alex to simplify code by only checking sysctl during fib lookup and suggestion from Scott to add a per-interface sysctl. v3: Code clean-ups to make it more readable and efficient as well as a reverse path check fix. v4: Drop binary sysctl v5: Whitespace fixups from Dave v6: Style changes from Dave and checkpatch suggestions v7: One more checkpatch fixup Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/fib_rules.h | 3 ++- include/net/ip_fib.h | 16 +++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 6d67383..903a55e 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -36,7 +36,8 @@ struct fib_lookup_arg { void *result; struct fib_rule *rule; int flags; -#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f73d27c..49c142b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -226,7 +226,7 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id) } static inline int fib_lookup(struct net *net, const struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; @@ -234,7 +234,7 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) err = 0; rcu_read_unlock(); @@ -249,16 +249,18 @@ void __net_exit fib4_rules_exit(struct net *net); struct fib_table *fib_new_table(struct net *net, u32 id); struct fib_table *fib_get_table(struct net *net, u32 id); -int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res); +int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res, unsigned int flags); static inline int fib_lookup(struct net *net, struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err; + flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) - return __fib_lookup(net, flp, res); + return __fib_lookup(net, flp, res, flags); rcu_read_lock(); @@ -266,11 +268,11 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, for (err = 0; !err; err = -ENETUNREACH) { tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; } -- cgit v1.1