summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c138
-rw-r--r--net/core/ethtool.c556
-rw-r--r--net/core/neighbour.c167
-rw-r--r--net/core/net-sysfs.c56
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/netprio_cgroup.c344
-rw-r--r--net/core/pktgen.c15
-rw-r--r--net/core/skbuff.c53
-rw-r--r--net/core/sock.c33
-rw-r--r--net/core/sysctl_net_core.c9
11 files changed, 767 insertions, 611 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 0d357b1..3606d40 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
+obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ba50a1..8afb244 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -137,6 +137,7 @@
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
#include <linux/net_tstamp.h>
+#include <linux/jump_label.h>
#include "net-sysfs.h"
@@ -1320,8 +1321,6 @@ EXPORT_SYMBOL(dev_close);
*/
void dev_disable_lro(struct net_device *dev)
{
- u32 flags;
-
/*
* If we're trying to disable lro on a vlan device
* use the underlying physical device instead
@@ -1329,15 +1328,9 @@ void dev_disable_lro(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
- if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
- flags = dev->ethtool_ops->get_flags(dev);
- else
- flags = ethtool_op_get_flags(dev);
+ dev->wanted_features &= ~NETIF_F_LRO;
+ netdev_update_features(dev);
- if (!(flags & ETH_FLAG_LRO))
- return;
-
- __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
if (unlikely(dev->features & NETIF_F_LRO))
netdev_WARN(dev, "failed to disable LRO!\n");
}
@@ -1449,34 +1442,32 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
-/* When > 0 there are consumers of rx skb time stamps */
-static atomic_t netstamp_needed = ATOMIC_INIT(0);
+static struct jump_label_key netstamp_needed __read_mostly;
void net_enable_timestamp(void)
{
- atomic_inc(&netstamp_needed);
+ jump_label_inc(&netstamp_needed);
}
EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
- atomic_dec(&netstamp_needed);
+ jump_label_dec(&netstamp_needed);
}
EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
- if (atomic_read(&netstamp_needed))
+ skb->tstamp.tv64 = 0;
+ if (static_branch(&netstamp_needed))
__net_timestamp(skb);
- else
- skb->tstamp.tv64 = 0;
}
-static inline void net_timestamp_check(struct sk_buff *skb)
-{
- if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
- __net_timestamp(skb);
-}
+#define net_timestamp_check(COND, SKB) \
+ if (static_branch(&netstamp_needed)) { \
+ if ((COND) && !(SKB)->tstamp.tv64) \
+ __net_timestamp(SKB); \
+ } \
static int net_hwtstamp_validate(struct ifreq *ifr)
{
@@ -1923,7 +1914,8 @@ EXPORT_SYMBOL(skb_checksum_help);
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
*/
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
@@ -1953,9 +1945,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
dev->ethtool_ops->get_drvinfo(dev, &info);
- WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
- info.driver, dev ? dev->features : 0L,
- skb->sk ? skb->sk->sk_route_caps : 0L,
+ WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n",
+ info.driver, dev ? &dev->features : NULL,
+ skb->sk ? &skb->sk->sk_route_caps : NULL,
skb->len, skb->data_len, skb->ip_summed);
if (skb_header_cloned(skb) &&
@@ -2064,7 +2056,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
-static int dev_gso_segment(struct sk_buff *skb, int features)
+static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
struct sk_buff *segs;
@@ -2103,7 +2095,7 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
{
return ((features & NETIF_F_GEN_CSUM) ||
((features & NETIF_F_V4_CSUM) &&
@@ -2114,7 +2106,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
protocol == htons(ETH_P_FCOE)));
}
-static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
+static netdev_features_t harmonize_features(struct sk_buff *skb,
+ __be16 protocol, netdev_features_t features)
{
if (!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
@@ -2126,10 +2119,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features
return features;
}
-u32 netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
- u32 features = skb->dev->features;
+ netdev_features_t features = skb->dev->features;
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2175,7 +2168,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
unsigned int skb_len;
if (likely(!skb->next)) {
- u32 features;
+ netdev_features_t features;
/*
* If device doesn't need skb->dst, release it right now while
@@ -2456,6 +2449,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return rc;
}
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+static void skb_update_prio(struct sk_buff *skb)
+{
+ struct netprio_map *map = rcu_dereference(skb->dev->priomap);
+
+ if ((!skb->priority) && (skb->sk) && map)
+ skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+}
+#else
+#define skb_update_prio(skb)
+#endif
+
static DEFINE_PER_CPU(int, xmit_recursion);
#define RECURSION_LIMIT 10
@@ -2496,6 +2501,8 @@ int dev_queue_xmit(struct sk_buff *skb)
*/
rcu_read_lock_bh();
+ skb_update_prio(skb);
+
txq = dev_pick_tx(dev, skb);
q = rcu_dereference_bh(txq->qdisc);
@@ -2718,6 +2725,8 @@ EXPORT_SYMBOL(__skb_get_rxhash);
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+struct jump_label_key rps_needed __read_mostly;
+
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
@@ -2997,12 +3006,11 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
#ifdef CONFIG_RPS
- {
+ if (static_branch(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
@@ -3017,14 +3025,13 @@ int netif_rx(struct sk_buff *skb)
rcu_read_unlock();
preempt_enable();
- }
-#else
+ } else
+#endif
{
unsigned int qtail;
ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
put_cpu();
}
-#endif
return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -3230,8 +3237,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
int ret = NET_RX_DROP;
__be16 type;
- if (!netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(!netdev_tstamp_prequeue, skb);
trace_netif_receive_skb(skb);
@@ -3362,14 +3368,13 @@ out:
*/
int netif_receive_skb(struct sk_buff *skb)
{
- if (netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
#ifdef CONFIG_RPS
- {
+ if (static_branch(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu, ret;
@@ -3380,16 +3385,12 @@ int netif_receive_skb(struct sk_buff *skb)
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- } else {
- rcu_read_unlock();
- ret = __netif_receive_skb(skb);
+ return ret;
}
-
- return ret;
+ rcu_read_unlock();
}
-#else
- return __netif_receive_skb(skb);
#endif
+ return __netif_receive_skb(skb);
}
EXPORT_SYMBOL(netif_receive_skb);
@@ -5362,7 +5363,8 @@ static void rollback_registered(struct net_device *dev)
list_del(&single);
}
-static u32 netdev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netdev_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
/* Fix illegal checksum combinations */
if ((features & NETIF_F_HW_CSUM) &&
@@ -5371,12 +5373,6 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
}
- if ((features & NETIF_F_NO_CSUM) &&
- (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- netdev_warn(dev, "mixed no checksumming and other settings.\n");
- features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
- }
-
/* Fix illegal SG+CSUM combinations. */
if ((features & NETIF_F_SG) &&
!(features & NETIF_F_ALL_CSUM)) {
@@ -5424,7 +5420,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
int __netdev_update_features(struct net_device *dev)
{
- u32 features;
+ netdev_features_t features;
int err = 0;
ASSERT_RTNL();
@@ -5440,16 +5436,16 @@ int __netdev_update_features(struct net_device *dev)
if (dev->features == features)
return 0;
- netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
- dev->features, features);
+ netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
+ &dev->features, &features);
if (dev->netdev_ops->ndo_set_features)
err = dev->netdev_ops->ndo_set_features(dev, features);
if (unlikely(err < 0)) {
netdev_err(dev,
- "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
- err, features, dev->features);
+ "set_features() failed (%d); wanted %pNF, left %pNF\n",
+ err, &features, &dev->features);
return -1;
}
@@ -5633,11 +5629,12 @@ int register_netdevice(struct net_device *dev)
dev->wanted_features = dev->features & dev->hw_features;
/* Turn on no cache copy if HW is doing checksum */
- dev->hw_features |= NETIF_F_NOCACHE_COPY;
- if ((dev->features & NETIF_F_ALL_CSUM) &&
- !(dev->features & NETIF_F_NO_CSUM)) {
- dev->wanted_features |= NETIF_F_NOCACHE_COPY;
- dev->features |= NETIF_F_NOCACHE_COPY;
+ if (!(dev->flags & IFF_LOOPBACK)) {
+ dev->hw_features |= NETIF_F_NOCACHE_COPY;
+ if (dev->features & NETIF_F_ALL_CSUM) {
+ dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+ dev->features |= NETIF_F_NOCACHE_COPY;
+ }
}
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6373,7 +6370,8 @@ static int dev_cpu_callback(struct notifier_block *nfb,
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
*/
-u32 netdev_increment_features(u32 all, u32 one, u32 mask)
+netdev_features_t netdev_increment_features(netdev_features_t all,
+ netdev_features_t one, netdev_features_t mask)
{
if (mask & NETIF_F_GEN_CSUM)
mask |= NETIF_F_ALL_CSUM;
@@ -6382,10 +6380,6 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
all &= one | ~NETIF_F_ALL_FOR_ALL;
- /* If device needs checksumming, downgrade to it. */
- if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
- all &= ~NETIF_F_NO_CSUM;
-
/* If one device supports hw checksumming, set for all. */
if (all & NETIF_F_GEN_CSUM)
all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f444817..31b0b7f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -36,235 +36,44 @@ u32 ethtool_op_get_link(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_link);
-u32 ethtool_op_get_tx_csum(struct net_device *dev)
-{
- return (dev->features & NETIF_F_ALL_CSUM) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tx_csum);
-
-int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_IP_CSUM;
- else
- dev->features &= ~NETIF_F_IP_CSUM;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-
-int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_HW_CSUM;
- else
- dev->features &= ~NETIF_F_HW_CSUM;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-
-int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
- else
- dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-
-u32 ethtool_op_get_sg(struct net_device *dev)
-{
- return (dev->features & NETIF_F_SG) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_sg);
-
-int ethtool_op_set_sg(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_SG;
- else
- dev->features &= ~NETIF_F_SG;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_sg);
-
-u32 ethtool_op_get_tso(struct net_device *dev)
-{
- return (dev->features & NETIF_F_TSO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tso);
-
-int ethtool_op_set_tso(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_TSO;
- else
- dev->features &= ~NETIF_F_TSO;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tso);
-
-u32 ethtool_op_get_ufo(struct net_device *dev)
-{
- return (dev->features & NETIF_F_UFO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-
-int ethtool_op_set_ufo(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_UFO;
- else
- dev->features &= ~NETIF_F_UFO;
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-
-/* the following list of flags are the same as their associated
- * NETIF_F_xxx values in include/linux/netdevice.h
- */
-static const u32 flags_dup_features =
- (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
- ETH_FLAG_RXHASH);
-
-u32 ethtool_op_get_flags(struct net_device *dev)
-{
- /* in the future, this function will probably contain additional
- * handling for flags which are not so easily handled
- * by a simple masking operation
- */
-
- return dev->features & flags_dup_features;
-}
-EXPORT_SYMBOL(ethtool_op_get_flags);
-
-/* Check if device can enable (or disable) particular feature coded in "data"
- * argument. Flags "supported" describe features that can be toggled by device.
- * If feature can not be toggled, it state (enabled or disabled) must match
- * hardcoded device features state, otherwise flags are marked as invalid.
- */
-bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
-{
- u32 features = dev->features & flags_dup_features;
- /* "data" can contain only flags_dup_features bits,
- * see __ethtool_set_flags */
-
- return (features & ~supported) != (data & ~supported);
-}
-EXPORT_SYMBOL(ethtool_invalid_flags);
-
-int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
- if (ethtool_invalid_flags(dev, data, supported))
- return -EINVAL;
-
- dev->features = ((dev->features & ~flags_dup_features) |
- (data & flags_dup_features));
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_flags);
-
/* Handlers for each ethtool command */
-#define ETHTOOL_DEV_FEATURE_WORDS 1
-
-static void ethtool_get_features_compat(struct net_device *dev,
- struct ethtool_get_features_block *features)
-{
- if (!dev->ethtool_ops)
- return;
-
- /* getting RX checksum */
- if (dev->ethtool_ops->get_rx_csum)
- if (dev->ethtool_ops->get_rx_csum(dev))
- features[0].active |= NETIF_F_RXCSUM;
-
- /* mark legacy-changeable features */
- if (dev->ethtool_ops->set_sg)
- features[0].available |= NETIF_F_SG;
- if (dev->ethtool_ops->set_tx_csum)
- features[0].available |= NETIF_F_ALL_CSUM;
- if (dev->ethtool_ops->set_tso)
- features[0].available |= NETIF_F_ALL_TSO;
- if (dev->ethtool_ops->set_rx_csum)
- features[0].available |= NETIF_F_RXCSUM;
- if (dev->ethtool_ops->set_flags)
- features[0].available |= flags_dup_features;
-}
-
-static int ethtool_set_feature_compat(struct net_device *dev,
- int (*legacy_set)(struct net_device *, u32),
- struct ethtool_set_features_block *features, u32 mask)
-{
- u32 do_set;
-
- if (!legacy_set)
- return 0;
-
- if (!(features[0].valid & mask))
- return 0;
-
- features[0].valid &= ~mask;
-
- do_set = !!(features[0].requested & mask);
-
- if (legacy_set(dev, do_set) < 0)
- netdev_info(dev,
- "Legacy feature change (%s) failed for 0x%08x\n",
- do_set ? "set" : "clear", mask);
-
- return 1;
-}
-
-static int ethtool_set_flags_compat(struct net_device *dev,
- int (*legacy_set)(struct net_device *, u32),
- struct ethtool_set_features_block *features, u32 mask)
-{
- u32 value;
-
- if (!legacy_set)
- return 0;
-
- if (!(features[0].valid & mask))
- return 0;
-
- value = dev->features & ~features[0].valid;
- value |= features[0].requested;
-
- features[0].valid &= ~mask;
-
- if (legacy_set(dev, value & mask) < 0)
- netdev_info(dev, "Legacy flags change failed\n");
-
- return 1;
-}
-
-static int ethtool_set_features_compat(struct net_device *dev,
- struct ethtool_set_features_block *features)
-{
- int compat;
-
- if (!dev->ethtool_ops)
- return 0;
-
- compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
- features, NETIF_F_SG);
- compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
- features, NETIF_F_ALL_CSUM);
- compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
- features, NETIF_F_ALL_TSO);
- compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
- features, NETIF_F_RXCSUM);
- compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags,
- features, flags_dup_features);
-
- return compat;
-}
+#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32)
+
+static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
+ [NETIF_F_SG_BIT] = "tx-scatter-gather",
+ [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
+ [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
+ [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
+ [NETIF_F_HIGHDMA_BIT] = "highdma",
+ [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
+ [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert",
+
+ [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse",
+ [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter",
+ [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
+ [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
+ [NETIF_F_LLTX_BIT] = "tx-lockless",
+ [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
+ [NETIF_F_GRO_BIT] = "rx-gro",
+ [NETIF_F_LRO_BIT] = "rx-lro",
+
+ [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
+ [NETIF_F_UFO_BIT] = "tx-udp-fragmentation",
+ [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
+ [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
+ [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
+ [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
+
+ [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
+ [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
+ [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
+ [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
+ [NETIF_F_RXHASH_BIT] = "rx-hashing",
+ [NETIF_F_RXCSUM_BIT] = "rx-checksum",
+ [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
+ [NETIF_F_LOOPBACK_BIT] = "loopback",
+};
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
@@ -272,18 +81,21 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
.cmd = ETHTOOL_GFEATURES,
.size = ETHTOOL_DEV_FEATURE_WORDS,
};
- struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = {
- {
- .available = dev->hw_features,
- .requested = dev->wanted_features,
- .active = dev->features,
- .never_changed = NETIF_F_NEVER_CHANGE,
- },
- };
+ struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
u32 __user *sizeaddr;
u32 copy_size;
+ int i;
- ethtool_get_features_compat(dev, features);
+ /* in case feature bits run out again */
+ BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t));
+
+ for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
+ features[i].available = (u32)(dev->hw_features >> (32 * i));
+ features[i].requested = (u32)(dev->wanted_features >> (32 * i));
+ features[i].active = (u32)(dev->features >> (32 * i));
+ features[i].never_changed =
+ (u32)(NETIF_F_NEVER_CHANGE >> (32 * i));
+ }
sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
if (get_user(copy_size, sizeaddr))
@@ -305,7 +117,8 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_sfeatures cmd;
struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
- int ret = 0;
+ netdev_features_t wanted = 0, valid = 0;
+ int i, ret = 0;
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
@@ -317,65 +130,29 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
if (copy_from_user(features, useraddr, sizeof(features)))
return -EFAULT;
- if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
- return -EINVAL;
+ for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
+ valid |= (netdev_features_t)features[i].valid << (32 * i);
+ wanted |= (netdev_features_t)features[i].requested << (32 * i);
+ }
- if (ethtool_set_features_compat(dev, features))
- ret |= ETHTOOL_F_COMPAT;
+ if (valid & ~NETIF_F_ETHTOOL_BITS)
+ return -EINVAL;
- if (features[0].valid & ~dev->hw_features) {
- features[0].valid &= dev->hw_features;
+ if (valid & ~dev->hw_features) {
+ valid &= dev->hw_features;
ret |= ETHTOOL_F_UNSUPPORTED;
}
- dev->wanted_features &= ~features[0].valid;
- dev->wanted_features |= features[0].valid & features[0].requested;
+ dev->wanted_features &= ~valid;
+ dev->wanted_features |= wanted & valid;
__netdev_update_features(dev);
- if ((dev->wanted_features ^ dev->features) & features[0].valid)
+ if ((dev->wanted_features ^ dev->features) & valid)
ret |= ETHTOOL_F_WISH;
return ret;
}
-static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
- /* NETIF_F_SG */ "tx-scatter-gather",
- /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
- /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded",
- /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic",
- /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6",
- /* NETIF_F_HIGHDMA */ "highdma",
- /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist",
- /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert",
-
- /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse",
- /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter",
- /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
- /* NETIF_F_GSO */ "tx-generic-segmentation",
- /* NETIF_F_LLTX */ "tx-lockless",
- /* NETIF_F_NETNS_LOCAL */ "netns-local",
- /* NETIF_F_GRO */ "rx-gro",
- /* NETIF_F_LRO */ "rx-lro",
-
- /* NETIF_F_TSO */ "tx-tcp-segmentation",
- /* NETIF_F_UFO */ "tx-udp-fragmentation",
- /* NETIF_F_GSO_ROBUST */ "tx-gso-robust",
- /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation",
- /* NETIF_F_TSO6 */ "tx-tcp6-segmentation",
- /* NETIF_F_FSO */ "tx-fcoe-segmentation",
- "",
- "",
-
- /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc",
- /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp",
- /* NETIF_F_FCOE_MTU */ "fcoe-mtu",
- /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
- /* NETIF_F_RXHASH */ "rx-hashing",
- /* NETIF_F_RXCSUM */ "rx-checksum",
- /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy",
- /* NETIF_F_LOOPBACK */ "loopback",
-};
-
static int __ethtool_get_sset_count(struct net_device *dev, int sset)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -402,7 +179,7 @@ static void __ethtool_get_strings(struct net_device *dev,
ops->get_strings(dev, stringset, data);
}
-static u32 ethtool_get_feature_mask(u32 eth_cmd)
+static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
{
/* feature masks of legacy discrete ethtool ops */
@@ -433,136 +210,82 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
}
}
-static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops)
- return NULL;
-
- switch (ethcmd) {
- case ETHTOOL_GTXCSUM:
- return ops->get_tx_csum;
- case ETHTOOL_GRXCSUM:
- return ops->get_rx_csum;
- case ETHTOOL_SSG:
- return ops->get_sg;
- case ETHTOOL_STSO:
- return ops->get_tso;
- case ETHTOOL_SUFO:
- return ops->get_ufo;
- default:
- return NULL;
- }
-}
-
-static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
-{
- return !!(dev->features & NETIF_F_ALL_CSUM);
-}
-
static int ethtool_get_one_feature(struct net_device *dev,
char __user *useraddr, u32 ethcmd)
{
- u32 mask = ethtool_get_feature_mask(ethcmd);
+ netdev_features_t mask = ethtool_get_feature_mask(ethcmd);
struct ethtool_value edata = {
.cmd = ethcmd,
.data = !!(dev->features & mask),
};
- /* compatibility with discrete get_ ops */
- if (!(dev->hw_features & mask)) {
- u32 (*actor)(struct net_device *);
-
- actor = __ethtool_get_one_feature_actor(dev, ethcmd);
-
- /* bug compatibility with old get_rx_csum */
- if (ethcmd == ETHTOOL_GRXCSUM && !actor)
- actor = __ethtool_get_rx_csum_oldbug;
-
- if (actor)
- edata.data = actor(dev);
- }
-
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_sg(struct net_device *dev, u32 data);
-static int __ethtool_set_tso(struct net_device *dev, u32 data);
-static int __ethtool_set_ufo(struct net_device *dev, u32 data);
-
static int ethtool_set_one_feature(struct net_device *dev,
void __user *useraddr, u32 ethcmd)
{
struct ethtool_value edata;
- u32 mask;
+ netdev_features_t mask;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
mask = ethtool_get_feature_mask(ethcmd);
mask &= dev->hw_features;
- if (mask) {
- if (edata.data)
- dev->wanted_features |= mask;
- else
- dev->wanted_features &= ~mask;
+ if (!mask)
+ return -EOPNOTSUPP;
- __netdev_update_features(dev);
- return 0;
- }
+ if (edata.data)
+ dev->wanted_features |= mask;
+ else
+ dev->wanted_features &= ~mask;
- /* Driver is not converted to ndo_fix_features or does not
- * support changing this offload. In the latter case it won't
- * have corresponding ethtool_ops field set.
- *
- * Following part is to be removed after all drivers advertise
- * their changeable features in netdev->hw_features and stop
- * using discrete offload setting ops.
- */
+ __netdev_update_features(dev);
- switch (ethcmd) {
- case ETHTOOL_STXCSUM:
- return __ethtool_set_tx_csum(dev, edata.data);
- case ETHTOOL_SRXCSUM:
- return __ethtool_set_rx_csum(dev, edata.data);
- case ETHTOOL_SSG:
- return __ethtool_set_sg(dev, edata.data);
- case ETHTOOL_STSO:
- return __ethtool_set_tso(dev, edata.data);
- case ETHTOOL_SUFO:
- return __ethtool_set_ufo(dev, edata.data);
- default:
- return -EOPNOTSUPP;
- }
+ return 0;
+}
+
+#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
+ ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
+#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \
+ NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH)
+
+static u32 __ethtool_get_flags(struct net_device *dev)
+{
+ u32 flags = 0;
+
+ if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
+ if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN;
+ if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN;
+ if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
+ if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
+
+ return flags;
}
-int __ethtool_set_flags(struct net_device *dev, u32 data)
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
{
- u32 changed;
+ netdev_features_t features = 0, changed;
- if (data & ~flags_dup_features)
+ if (data & ~ETH_ALL_FLAGS)
return -EINVAL;
- /* legacy set_flags() op */
- if (dev->ethtool_ops->set_flags) {
- if (unlikely(dev->hw_features & flags_dup_features))
- netdev_warn(dev,
- "driver BUG: mixed hw_features and set_flags()\n");
- return dev->ethtool_ops->set_flags(dev, data);
- }
+ if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO;
+ if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX;
+ if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX;
+ if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE;
+ if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH;
/* allow changing only bits set in hw_features */
- changed = (data ^ dev->features) & flags_dup_features;
+ changed = (features ^ dev->features) & ETH_ALL_FEATURES;
if (changed & ~dev->hw_features)
return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
dev->wanted_features =
- (dev->wanted_features & ~changed) | (data & dev->hw_features);
+ (dev->wanted_features & ~changed) | (features & changed);
__netdev_update_features(dev);
@@ -1231,81 +954,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
}
-static int __ethtool_set_sg(struct net_device *dev, u32 data)
-{
- int err;
-
- if (!dev->ethtool_ops->set_sg)
- return -EOPNOTSUPP;
-
- if (data && !(dev->features & NETIF_F_ALL_CSUM))
- return -EINVAL;
-
- if (!data && dev->ethtool_ops->set_tso) {
- err = dev->ethtool_ops->set_tso(dev, 0);
- if (err)
- return err;
- }
-
- if (!data && dev->ethtool_ops->set_ufo) {
- err = dev->ethtool_ops->set_ufo(dev, 0);
- if (err)
- return err;
- }
- return dev->ethtool_ops->set_sg(dev, data);
-}
-
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
-{
- int err;
-
- if (!dev->ethtool_ops->set_tx_csum)
- return -EOPNOTSUPP;
-
- if (!data && dev->ethtool_ops->set_sg) {
- err = __ethtool_set_sg(dev, 0);
- if (err)
- return err;
- }
-
- return dev->ethtool_ops->set_tx_csum(dev, data);
-}
-
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
-{
- if (!dev->ethtool_ops->set_rx_csum)
- return -EOPNOTSUPP;
-
- if (!data)
- dev->features &= ~NETIF_F_GRO;
-
- return dev->ethtool_ops->set_rx_csum(dev, data);
-}
-
-static int __ethtool_set_tso(struct net_device *dev, u32 data)
-{
- if (!dev->ethtool_ops->set_tso)
- return -EOPNOTSUPP;
-
- if (data && !(dev->features & NETIF_F_SG))
- return -EINVAL;
-
- return dev->ethtool_ops->set_tso(dev, data);
-}
-
-static int __ethtool_set_ufo(struct net_device *dev, u32 data)
-{
- if (!dev->ethtool_ops->set_ufo)
- return -EOPNOTSUPP;
- if (data && !(dev->features & NETIF_F_SG))
- return -EINVAL;
- if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
- (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
- == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
- return -EINVAL;
- return dev->ethtool_ops->set_ufo(dev, data);
-}
-
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
@@ -1771,9 +1419,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
break;
case ETHTOOL_GFLAGS:
rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_flags ?
- dev->ethtool_ops->get_flags :
- ethtool_op_get_flags));
+ __ethtool_get_flags);
break;
case ETHTOOL_SFLAGS:
rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 039d51e..27d3fef 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
it to safe state.
*/
skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
n->output = neigh_blackhole;
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
@@ -702,6 +703,7 @@ void neigh_destroy(struct neighbour *neigh)
printk(KERN_WARNING "Impossible event.\n");
skb_queue_purge(&neigh->arp_queue);
+ neigh->arp_queue_len_bytes = 0;
dev_put(neigh->dev);
neigh_parms_put(neigh->parms);
@@ -842,6 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
write_lock(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
+ neigh->arp_queue_len_bytes = 0;
}
static void neigh_probe(struct neighbour *neigh)
@@ -980,15 +983,20 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
if (neigh->nud_state == NUD_INCOMPLETE) {
if (skb) {
- if (skb_queue_len(&neigh->arp_queue) >=
- neigh->parms->queue_len) {
+ while (neigh->arp_queue_len_bytes + skb->truesize >
+ neigh->parms->queue_len_bytes) {
struct sk_buff *buff;
+
buff = __skb_dequeue(&neigh->arp_queue);
+ if (!buff)
+ break;
+ neigh->arp_queue_len_bytes -= buff->truesize;
kfree_skb(buff);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
__skb_queue_tail(&neigh->arp_queue, skb);
+ neigh->arp_queue_len_bytes += skb->truesize;
}
rc = 1;
}
@@ -1175,6 +1183,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
write_lock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
+ neigh->arp_queue_len_bytes = 0;
}
out:
if (update_isrouter) {
@@ -1747,7 +1756,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
- NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+ NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
+ /* approximative value for deprecated QUEUE_LEN (in packets) */
+ NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
+ DIV_ROUND_UP(parms->queue_len_bytes,
+ SKB_TRUESIZE(ETH_FRAME_LEN)));
NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
@@ -1974,7 +1987,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
switch (i) {
case NDTPA_QUEUE_LEN:
- p->queue_len = nla_get_u32(tbp[i]);
+ p->queue_len_bytes = nla_get_u32(tbp[i]) *
+ SKB_TRUESIZE(ETH_FRAME_LEN);
+ break;
+ case NDTPA_QUEUE_LENBYTES:
+ p->queue_len_bytes = nla_get_u32(tbp[i]);
break;
case NDTPA_PROXY_QLEN:
p->proxy_qlen = nla_get_u32(tbp[i]);
@@ -2397,7 +2414,10 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
- pn = pn->next;
+ do {
+ pn = pn->next;
+ } while (pn && !net_eq(pneigh_net(pn), net));
+
while (!pn) {
if (++state->bucket > PNEIGH_HASHMASK)
break;
@@ -2635,117 +2655,158 @@ EXPORT_SYMBOL(neigh_app_ns);
#ifdef CONFIG_SYSCTL
-#define NEIGH_VARS_MAX 19
+static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int size, ret;
+ ctl_table tmp = *ctl;
+
+ tmp.data = &size;
+ size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
+ ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret)
+ *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
+ return ret;
+}
+
+enum {
+ NEIGH_VAR_MCAST_PROBE,
+ NEIGH_VAR_UCAST_PROBE,
+ NEIGH_VAR_APP_PROBE,
+ NEIGH_VAR_RETRANS_TIME,
+ NEIGH_VAR_BASE_REACHABLE_TIME,
+ NEIGH_VAR_DELAY_PROBE_TIME,
+ NEIGH_VAR_GC_STALETIME,
+ NEIGH_VAR_QUEUE_LEN,
+ NEIGH_VAR_QUEUE_LEN_BYTES,
+ NEIGH_VAR_PROXY_QLEN,
+ NEIGH_VAR_ANYCAST_DELAY,
+ NEIGH_VAR_PROXY_DELAY,
+ NEIGH_VAR_LOCKTIME,
+ NEIGH_VAR_RETRANS_TIME_MS,
+ NEIGH_VAR_BASE_REACHABLE_TIME_MS,
+ NEIGH_VAR_GC_INTERVAL,
+ NEIGH_VAR_GC_THRESH1,
+ NEIGH_VAR_GC_THRESH2,
+ NEIGH_VAR_GC_THRESH3,
+ NEIGH_VAR_MAX
+};
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table neigh_vars[NEIGH_VARS_MAX];
+ struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
char *dev_name;
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
- {
+ [NEIGH_VAR_MCAST_PROBE] = {
.procname = "mcast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_UCAST_PROBE] = {
.procname = "ucast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_APP_PROBE] = {
.procname = "app_solicit",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_RETRANS_TIME] = {
.procname = "retrans_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_BASE_REACHABLE_TIME] = {
.procname = "base_reachable_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_DELAY_PROBE_TIME] = {
.procname = "delay_first_probe_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_GC_STALETIME] = {
.procname = "gc_stale_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_QUEUE_LEN] = {
.procname = "unres_qlen",
.maxlen = sizeof(int),
.mode = 0644,
+ .proc_handler = proc_unres_qlen,
+ },
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = {
+ .procname = "unres_qlen_bytes",
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_PROXY_QLEN] = {
.procname = "proxy_qlen",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_ANYCAST_DELAY] = {
.procname = "anycast_delay",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_PROXY_DELAY] = {
.procname = "proxy_delay",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_LOCKTIME] = {
.procname = "locktime",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_RETRANS_TIME_MS] = {
.procname = "retrans_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
- {
+ [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
.procname = "base_reachable_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
- {
+ [NEIGH_VAR_GC_INTERVAL] = {
.procname = "gc_interval",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_GC_THRESH1] = {
.procname = "gc_thresh1",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_GC_THRESH2] = {
.procname = "gc_thresh2",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_GC_THRESH3] = {
.procname = "gc_thresh3",
.maxlen = sizeof(int),
.mode = 0644,
@@ -2778,47 +2839,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (!t)
goto err;
- t->neigh_vars[0].data = &p->mcast_probes;
- t->neigh_vars[1].data = &p->ucast_probes;
- t->neigh_vars[2].data = &p->app_probes;
- t->neigh_vars[3].data = &p->retrans_time;
- t->neigh_vars[4].data = &p->base_reachable_time;
- t->neigh_vars[5].data = &p->delay_probe_time;
- t->neigh_vars[6].data = &p->gc_staletime;
- t->neigh_vars[7].data = &p->queue_len;
- t->neigh_vars[8].data = &p->proxy_qlen;
- t->neigh_vars[9].data = &p->anycast_delay;
- t->neigh_vars[10].data = &p->proxy_delay;
- t->neigh_vars[11].data = &p->locktime;
- t->neigh_vars[12].data = &p->retrans_time;
- t->neigh_vars[13].data = &p->base_reachable_time;
+ t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes;
+ t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes;
+ t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time;
+ t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
+ t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
+ t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
+ t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
+ t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
+ t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
+ t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
+ t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
- memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
+ memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
+ sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
} else {
dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
- t->neigh_vars[14].data = (int *)(p + 1);
- t->neigh_vars[15].data = (int *)(p + 1) + 1;
- t->neigh_vars[16].data = (int *)(p + 1) + 2;
- t->neigh_vars[17].data = (int *)(p + 1) + 3;
+ t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
+ t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
}
if (handler) {
/* RetransTime */
- t->neigh_vars[3].proc_handler = handler;
- t->neigh_vars[3].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
/* ReachableTime */
- t->neigh_vars[4].proc_handler = handler;
- t->neigh_vars[4].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
/* RetransTime (in milliseconds)*/
- t->neigh_vars[12].proc_handler = handler;
- t->neigh_vars[12].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
/* ReachableTime (in milliseconds) */
- t->neigh_vars[13].proc_handler = handler;
- t->neigh_vars[13].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
}
t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c71c434..db6c2f8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,9 +606,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
rcu_assign_pointer(queue->rps_map, map);
spin_unlock(&rps_map_lock);
- if (old_map)
+ if (map)
+ jump_label_inc(&rps_needed);
+ if (old_map) {
kfree_rcu(old_map, rcu);
-
+ jump_label_dec(&rps_needed);
+ }
free_cpumask_var(mask);
return len;
}
@@ -780,7 +783,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
#endif
}
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
/*
* netdev_queue sysfs structures and functions.
*/
@@ -826,6 +829,23 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
+static ssize_t show_trans_timeout(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ char *buf)
+{
+ unsigned long trans_timeout;
+
+ spin_lock_irq(&queue->_xmit_lock);
+ trans_timeout = queue->trans_timeout;
+ spin_unlock_irq(&queue->_xmit_lock);
+
+ return sprintf(buf, "%lu", trans_timeout);
+}
+
+static struct netdev_queue_attribute queue_trans_timeout =
+ __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+
+#ifdef CONFIG_XPS
static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
@@ -901,7 +921,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
struct xps_map *map, *new_map;
struct xps_dev_maps *dev_maps, *new_dev_maps;
int nonempty = 0;
- int numa_node = -2;
+ int numa_node_id = -2;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -944,10 +964,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
#ifdef CONFIG_NUMA
if (need_set) {
- if (numa_node == -2)
- numa_node = cpu_to_node(cpu);
- else if (numa_node != cpu_to_node(cpu))
- numa_node = -1;
+ if (numa_node_id == -2)
+ numa_node_id = cpu_to_node(cpu);
+ else if (numa_node_id != cpu_to_node(cpu))
+ numa_node_id = -1;
}
#endif
if (need_set && pos >= map_len) {
@@ -997,7 +1017,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
if (dev_maps)
kfree_rcu(dev_maps, rcu);
- netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
+ netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
NUMA_NO_NODE);
mutex_unlock(&xps_map_mutex);
@@ -1020,12 +1040,17 @@ error:
static struct netdev_queue_attribute xps_cpus_attribute =
__ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+#endif /* CONFIG_XPS */
static struct attribute *netdev_queue_default_attrs[] = {
+ &queue_trans_timeout.attr,
+#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
+#endif
NULL
};
+#ifdef CONFIG_XPS
static void netdev_queue_release(struct kobject *kobj)
{
struct netdev_queue *queue = to_netdev_queue(kobj);
@@ -1076,10 +1101,13 @@ static void netdev_queue_release(struct kobject *kobj)
memset(kobj, 0, sizeof(*kobj));
dev_put(queue->dev);
}
+#endif /* CONFIG_XPS */
static struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
+#ifdef CONFIG_XPS
.release = netdev_queue_release,
+#endif
.default_attrs = netdev_queue_default_attrs,
};
@@ -1102,12 +1130,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
return error;
}
-#endif /* CONFIG_XPS */
+#endif /* CONFIG_SYSFS */
int
netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
int i;
int error = 0;
@@ -1125,14 +1153,14 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
return error;
#else
return 0;
-#endif
+#endif /* CONFIG_SYSFS */
}
static int register_queue_kobjects(struct net_device *net)
{
int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
net->queues_kset = kset_create_and_add("queues",
NULL, &net->dev.kobj);
if (!net->queues_kset)
@@ -1173,7 +1201,7 @@ static void remove_queue_kobjects(struct net_device *net)
net_rx_queue_update_kobjects(net, real_rx, 0);
netdev_queue_update_kobjects(net, real_tx, 0);
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
kset_unregister(net->queues_kset);
#endif
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cf64c1f..1a7d8e2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -422,6 +422,7 @@ static void arp_reply(struct sk_buff *skb)
struct sk_buff *send_skb;
struct netpoll *np, *tmp;
unsigned long flags;
+ int hlen, tlen;
int hits = 0;
if (list_empty(&npinfo->rx_np))
@@ -479,8 +480,9 @@ static void arp_reply(struct sk_buff *skb)
if (tip != np->local_ip)
continue;
- send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
- LL_RESERVED_SPACE(np->dev));
+ hlen = LL_RESERVED_SPACE(np->dev);
+ tlen = np->dev->needed_tailroom;
+ send_skb = find_skb(np, size + hlen + tlen, hlen);
if (!send_skb)
continue;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
new file mode 100644
index 0000000..3a9fd48
--- /dev/null
+++ b/net/core/netprio_cgroup.c
@@ -0,0 +1,344 @@
+/*
+ * net/core/netprio_cgroup.c Priority Control Group
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <linux/rcupdate.h>
+#include <linux/atomic.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+#include <net/netprio_cgroup.h>
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
+
+struct cgroup_subsys net_prio_subsys = {
+ .name = "net_prio",
+ .create = cgrp_create,
+ .destroy = cgrp_destroy,
+ .populate = cgrp_populate,
+#ifdef CONFIG_NETPRIO_CGROUP
+ .subsys_id = net_prio_subsys_id,
+#endif
+ .module = THIS_MODULE
+};
+
+#define PRIOIDX_SZ 128
+
+static unsigned long prioidx_map[PRIOIDX_SZ];
+static DEFINE_SPINLOCK(prioidx_map_lock);
+static atomic_t max_prioidx = ATOMIC_INIT(0);
+
+static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
+ struct cgroup_netprio_state, css);
+}
+
+static int get_prioidx(u32 *prio)
+{
+ unsigned long flags;
+ u32 prioidx;
+
+ spin_lock_irqsave(&prioidx_map_lock, flags);
+ prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
+ set_bit(prioidx, prioidx_map);
+ spin_unlock_irqrestore(&prioidx_map_lock, flags);
+ if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ)
+ return -ENOSPC;
+
+ atomic_set(&max_prioidx, prioidx);
+ *prio = prioidx;
+ return 0;
+}
+
+static void put_prioidx(u32 idx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&prioidx_map_lock, flags);
+ clear_bit(idx, prioidx_map);
+ spin_unlock_irqrestore(&prioidx_map_lock, flags);
+}
+
+static void extend_netdev_table(struct net_device *dev, u32 new_len)
+{
+ size_t new_size = sizeof(struct netprio_map) +
+ ((sizeof(u32) * new_len));
+ struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
+ struct netprio_map *old_priomap;
+ int i;
+
+ old_priomap = rtnl_dereference(dev->priomap);
+
+ if (!new_priomap) {
+ printk(KERN_WARNING "Unable to alloc new priomap!\n");
+ return;
+ }
+
+ for (i = 0;
+ old_priomap && (i < old_priomap->priomap_len);
+ i++)
+ new_priomap->priomap[i] = old_priomap->priomap[i];
+
+ new_priomap->priomap_len = new_len;
+
+ rcu_assign_pointer(dev->priomap, new_priomap);
+ if (old_priomap)
+ kfree_rcu(old_priomap, rcu);
+}
+
+static void update_netdev_tables(void)
+{
+ struct net_device *dev;
+ u32 max_len = atomic_read(&max_prioidx);
+ struct netprio_map *map;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ map = rtnl_dereference(dev->priomap);
+ if ((!map) ||
+ (map->priomap_len < max_len))
+ extend_netdev_table(dev, max_len);
+ }
+ rtnl_unlock();
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct cgroup_netprio_state *cs;
+ int ret;
+
+ cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+ if (!cs)
+ return ERR_PTR(-ENOMEM);
+
+ if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
+ kfree(cs);
+ return ERR_PTR(-EINVAL);
+ }
+
+ ret = get_prioidx(&cs->prioidx);
+ if (ret != 0) {
+ printk(KERN_WARNING "No space in priority index array\n");
+ kfree(cs);
+ return ERR_PTR(ret);
+ }
+
+ return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct cgroup_netprio_state *cs;
+ struct net_device *dev;
+ struct netprio_map *map;
+
+ cs = cgrp_netprio_state(cgrp);
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ map = rtnl_dereference(dev->priomap);
+ if (map)
+ map->priomap[cs->prioidx] = 0;
+ }
+ rtnl_unlock();
+ put_prioidx(cs->prioidx);
+ kfree(cs);
+}
+
+static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+{
+ return (u64)cgrp_netprio_state(cgrp)->prioidx;
+}
+
+static int read_priomap(struct cgroup *cont, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ struct net_device *dev;
+ u32 prioidx = cgrp_netprio_state(cont)->prioidx;
+ u32 priority;
+ struct netprio_map *map;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, dev) {
+ map = rcu_dereference(dev->priomap);
+ priority = map ? map->priomap[prioidx] : 0;
+ cb->fill(cb, dev->name, priority);
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ char *devname = kstrdup(buffer, GFP_KERNEL);
+ int ret = -EINVAL;
+ u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
+ unsigned long priority;
+ char *priostr;
+ struct net_device *dev;
+ struct netprio_map *map;
+
+ if (!devname)
+ return -ENOMEM;
+
+ /*
+ * Minimally sized valid priomap string
+ */
+ if (strlen(devname) < 3)
+ goto out_free_devname;
+
+ priostr = strstr(devname, " ");
+ if (!priostr)
+ goto out_free_devname;
+
+ /*
+ *Separate the devname from the associated priority
+ *and advance the priostr poitner to the priority value
+ */
+ *priostr = '\0';
+ priostr++;
+
+ /*
+ * If the priostr points to NULL, we're at the end of the passed
+ * in string, and its not a valid write
+ */
+ if (*priostr == '\0')
+ goto out_free_devname;
+
+ ret = kstrtoul(priostr, 10, &priority);
+ if (ret < 0)
+ goto out_free_devname;
+
+ ret = -ENODEV;
+
+ dev = dev_get_by_name(&init_net, devname);
+ if (!dev)
+ goto out_free_devname;
+
+ update_netdev_tables();
+ ret = 0;
+ rcu_read_lock();
+ map = rcu_dereference(dev->priomap);
+ if (map)
+ map->priomap[prioidx] = priority;
+ rcu_read_unlock();
+ dev_put(dev);
+
+out_free_devname:
+ kfree(devname);
+ return ret;
+}
+
+static struct cftype ss_files[] = {
+ {
+ .name = "prioidx",
+ .read_u64 = read_prioidx,
+ },
+ {
+ .name = "ifpriomap",
+ .read_map = read_priomap,
+ .write_string = write_priomap,
+ },
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+static int netprio_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct netprio_map *old;
+ u32 max_len = atomic_read(&max_prioidx);
+
+ /*
+ * Note this is called with rtnl_lock held so we have update side
+ * protection on our rcu assignments
+ */
+
+ switch (event) {
+
+ case NETDEV_REGISTER:
+ if (max_len)
+ extend_netdev_table(dev, max_len);
+ break;
+ case NETDEV_UNREGISTER:
+ old = rtnl_dereference(dev->priomap);
+ RCU_INIT_POINTER(dev->priomap, NULL);
+ if (old)
+ kfree_rcu(old, rcu);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block netprio_device_notifier = {
+ .notifier_call = netprio_device_event
+};
+
+static int __init init_cgroup_netprio(void)
+{
+ int ret;
+
+ ret = cgroup_load_subsys(&net_prio_subsys);
+ if (ret)
+ goto out;
+#ifndef CONFIG_NETPRIO_CGROUP
+ smp_wmb();
+ net_prio_subsys_id = net_prio_subsys.subsys_id;
+#endif
+
+ register_netdevice_notifier(&netprio_device_notifier);
+
+out:
+ return ret;
+}
+
+static void __exit exit_cgroup_netprio(void)
+{
+ struct netprio_map *old;
+ struct net_device *dev;
+
+ unregister_netdevice_notifier(&netprio_device_notifier);
+
+ cgroup_unload_subsys(&net_prio_subsys);
+
+#ifndef CONFIG_NETPRIO_CGROUP
+ net_prio_subsys_id = -1;
+ synchronize_rcu();
+#endif
+
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ old = rtnl_dereference(dev->priomap);
+ RCU_INIT_POINTER(dev->priomap, NULL);
+ if (old)
+ kfree_rcu(old, rcu);
+ }
+ rtnl_unlock();
+}
+
+module_init(init_cgroup_netprio);
+module_exit(exit_cgroup_netprio);
+MODULE_LICENSE("GPL v2");
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0001c24..aa53a35 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file,
scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
- ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
+ pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
if (debug)
printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
@@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file,
scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
- ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
- &pkt_dev->min_in6_daddr);
+ pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
if (debug)
printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
@@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file,
scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
- ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
+ pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
if (debug)
printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
@@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
ifp = ifp->if_next) {
if (ifp->scope == IFA_LINK &&
!(ifp->flags & IFA_F_TENTATIVE)) {
- ipv6_addr_copy(&pkt_dev->
- cur_in6_saddr,
- &ifp->addr);
+ pkt_dev->cur_in6_saddr = ifp->addr;
err = 0;
break;
}
@@ -2958,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
iph->payload_len = htons(sizeof(struct udphdr) + datalen);
iph->nexthdr = IPPROTO_UDP;
- ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
- ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
+ iph->daddr = pkt_dev->cur_in6_daddr;
+ iph->saddr = pkt_dev->cur_in6_saddr;
skb->mac_header = (skb->network_header - ETH_HLEN -
pkt_dev->pkt_overhead);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 06438f9..678ae4e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -245,6 +245,55 @@ nodata:
EXPORT_SYMBOL(__alloc_skb);
/**
+ * build_skb - build a network buffer
+ * @data: data buffer provided by caller
+ *
+ * Allocate a new &sk_buff. Caller provides space holding head and
+ * skb_shared_info. @data must have been allocated by kmalloc()
+ * The return is the new skb buffer.
+ * On a failure the return is %NULL, and @data is not freed.
+ * Notes :
+ * Before IO, driver allocates only data buffer where NIC put incoming frame
+ * Driver should add room at head (NET_SKB_PAD) and
+ * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
+ * After IO, driver calls build_skb(), to allocate sk_buff and populate it
+ * before giving packet to stack.
+ * RX rings only contains data buffers, not full skbs.
+ */
+struct sk_buff *build_skb(void *data)
+{
+ struct skb_shared_info *shinfo;
+ struct sk_buff *skb;
+ unsigned int size;
+
+ skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ skb->truesize = SKB_TRUESIZE(size);
+ atomic_set(&skb->users, 1);
+ skb->head = data;
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->mac_header = ~0U;
+#endif
+
+ /* make sure we initialize shinfo sequentially */
+ shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+ atomic_set(&shinfo->dataref, 1);
+ kmemcheck_annotate_variable(shinfo->destructor_arg);
+
+ return skb;
+}
+EXPORT_SYMBOL(build_skb);
+
+/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on
* @length: length to allocate
@@ -2230,7 +2279,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb)
* @shiftlen: shift up to this many bytes
*
* Attempts to shift up to shiftlen worth of bytes, which may be less than
- * the length of the skb, from tgt to skb. Returns number bytes shifted.
+ * the length of the skb, from skb to tgt. Returns number bytes shifted.
* It's up to caller to free skb if everything was shifted.
*
* If @tgt runs out of frags, the whole operation is aborted.
@@ -2621,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
diff --git a/net/core/sock.c b/net/core/sock.c
index cbdf51c..1606913 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -125,6 +125,7 @@
#include <net/xfrm.h>
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
+#include <net/netprio_cgroup.h>
#include <linux/filter.h>
@@ -221,10 +222,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
-#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+#if defined(CONFIG_CGROUPS)
+#if !defined(CONFIG_NET_CLS_CGROUP)
int net_cls_subsys_id = -1;
EXPORT_SYMBOL_GPL(net_cls_subsys_id);
#endif
+#if !defined(CONFIG_NETPRIO_CGROUP)
+int net_prio_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_prio_subsys_id);
+#endif
+#endif
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
@@ -1120,6 +1127,18 @@ void sock_update_classid(struct sock *sk)
sk->sk_classid = classid;
}
EXPORT_SYMBOL(sock_update_classid);
+
+void sock_update_netprioidx(struct sock *sk)
+{
+ struct cgroup_netprio_state *state;
+ if (in_interrupt())
+ return;
+ rcu_read_lock();
+ state = task_netprio_state(current);
+ sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(sock_update_netprioidx);
#endif
/**
@@ -1147,6 +1166,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
atomic_set(&sk->sk_wmem_alloc, 1);
sock_update_classid(sk);
+ sock_update_netprioidx(sk);
}
return sk;
@@ -1213,7 +1233,14 @@ void sk_release_kernel(struct sock *sk)
}
EXPORT_SYMBOL(sk_release_kernel);
-struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
+/**
+ * sk_clone_lock - clone a socket, and lock its clone
+ * @sk: the socket to clone
+ * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
struct sock *newsk;
@@ -1306,7 +1333,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
out:
return newsk;
}
-EXPORT_SYMBOL_GPL(sk_clone);
+EXPORT_SYMBOL_GPL(sk_clone_lock);
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 77a65f0..d05559d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
if (sock_table != orig_sock_table) {
rcu_assign_pointer(rps_sock_flow_table, sock_table);
- synchronize_rcu();
- vfree(orig_sock_table);
+ if (sock_table)
+ jump_label_inc(&rps_needed);
+ if (orig_sock_table) {
+ jump_label_dec(&rps_needed);
+ synchronize_rcu();
+ vfree(orig_sock_table);
+ }
}
}
OpenPOWER on IntegriCloud