summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c24
-rw-r--r--net/core/dev.c336
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/ethtool.c20
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/kmap_skb.h2
-rw-r--r--net/core/link_watch.c9
-rw-r--r--net/core/neighbour.c44
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/pktgen.c25
-rw-r--r--net/core/rtnetlink.c34
-rw-r--r--net/core/scm.c10
-rw-r--r--net/core/secure_seq.c2
-rw-r--r--net/core/skbuff.c174
-rw-r--r--net/core/sock.c23
-rw-r--r--net/core/timestamping.c12
-rw-r--r--net/core/user_dma.c6
21 files changed, 515 insertions, 263 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 18ac112..68bbf9f 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -324,15 +324,15 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
int err;
u8 *vaddr;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
@@ -410,15 +410,15 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
int err;
u8 *vaddr;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
@@ -500,15 +500,15 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
int err;
u8 *vaddr;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
@@ -585,16 +585,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
__wsum csum2;
int err = 0;
u8 *vaddr;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 231d312..edcf019 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,6 +133,10 @@
#include <linux/pci.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
+#include <linux/if_tunnel.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
+#include <linux/net_tstamp.h>
#include "net-sysfs.h"
@@ -1474,6 +1478,57 @@ static inline void net_timestamp_check(struct sk_buff *skb)
__net_timestamp(skb);
}
+static int net_hwtstamp_validate(struct ifreq *ifr)
+{
+ struct hwtstamp_config cfg;
+ enum hwtstamp_tx_types tx_type;
+ enum hwtstamp_rx_filters rx_filter;
+ int tx_type_valid = 0;
+ int rx_filter_valid = 0;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ if (cfg.flags) /* reserved for future extensions */
+ return -EINVAL;
+
+ tx_type = cfg.tx_type;
+ rx_filter = cfg.rx_filter;
+
+ switch (tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ tx_type_valid = 1;
+ break;
+ }
+
+ switch (rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ rx_filter_valid = 1;
+ break;
+ }
+
+ if (!tx_type_valid || !rx_filter_valid)
+ return -ERANGE;
+
+ return 0;
+}
+
static inline bool is_skb_forwardable(struct net_device *dev,
struct sk_buff *skb)
{
@@ -1955,9 +2010,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
#ifdef CONFIG_HIGHMEM
int i;
if (!(dev->features & NETIF_F_HIGHDMA)) {
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ if (PageHighMem(skb_frag_page(frag)))
return 1;
+ }
}
if (PCI_DMA_BUS_IS_PHYS) {
@@ -1966,7 +2023,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
if (!pdev)
return 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ dma_addr_t addr = page_to_phys(skb_frag_page(frag));
if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
return 1;
}
@@ -2527,25 +2585,31 @@ static inline void ____napi_schedule(struct softnet_data *sd,
/*
* __skb_get_rxhash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers. Returns a non-zero hash number on success
- * and 0 on failure.
+ * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
+ * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
+ * if hash is a canonical 4-tuple hash over transport ports.
*/
-__u32 __skb_get_rxhash(struct sk_buff *skb)
+void __skb_get_rxhash(struct sk_buff *skb)
{
int nhoff, hash = 0, poff;
const struct ipv6hdr *ip6;
const struct iphdr *ip;
+ const struct vlan_hdr *vlan;
u8 ip_proto;
- u32 addr1, addr2, ihl;
+ u32 addr1, addr2;
+ u16 proto;
union {
u32 v32;
u16 v16[2];
} ports;
nhoff = skb_network_offset(skb);
+ proto = skb->protocol;
- switch (skb->protocol) {
+again:
+ switch (proto) {
case __constant_htons(ETH_P_IP):
+ip:
if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
goto done;
@@ -2556,9 +2620,10 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
ip_proto = ip->protocol;
addr1 = (__force u32) ip->saddr;
addr2 = (__force u32) ip->daddr;
- ihl = ip->ihl;
+ nhoff += ip->ihl * 4;
break;
case __constant_htons(ETH_P_IPV6):
+ipv6:
if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
goto done;
@@ -2566,20 +2631,71 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
ip_proto = ip6->nexthdr;
addr1 = (__force u32) ip6->saddr.s6_addr32[3];
addr2 = (__force u32) ip6->daddr.s6_addr32[3];
- ihl = (40 >> 2);
+ nhoff += 40;
break;
+ case __constant_htons(ETH_P_8021Q):
+ if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
+ goto done;
+ vlan = (const struct vlan_hdr *) (skb->data + nhoff);
+ proto = vlan->h_vlan_encapsulated_proto;
+ nhoff += sizeof(*vlan);
+ goto again;
+ case __constant_htons(ETH_P_PPP_SES):
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
+ goto done;
+ proto = *((__be16 *) (skb->data + nhoff +
+ sizeof(struct pppoe_hdr)));
+ nhoff += PPPOE_SES_HLEN;
+ switch (proto) {
+ case __constant_htons(PPP_IP):
+ goto ip;
+ case __constant_htons(PPP_IPV6):
+ goto ipv6;
+ default:
+ goto done;
+ }
default:
goto done;
}
+ switch (ip_proto) {
+ case IPPROTO_GRE:
+ if (pskb_may_pull(skb, nhoff + 16)) {
+ u8 *h = skb->data + nhoff;
+ __be16 flags = *(__be16 *)h;
+
+ /*
+ * Only look inside GRE if version zero and no
+ * routing
+ */
+ if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
+ proto = *(__be16 *)(h + 2);
+ nhoff += 4;
+ if (flags & GRE_CSUM)
+ nhoff += 4;
+ if (flags & GRE_KEY)
+ nhoff += 4;
+ if (flags & GRE_SEQ)
+ nhoff += 4;
+ goto again;
+ }
+ }
+ break;
+ case IPPROTO_IPIP:
+ goto again;
+ default:
+ break;
+ }
+
ports.v32 = 0;
poff = proto_ports_offset(ip_proto);
if (poff >= 0) {
- nhoff += ihl * 4 + poff;
+ nhoff += poff;
if (pskb_may_pull(skb, nhoff + 4)) {
ports.v32 = * (__force u32 *) (skb->data + nhoff);
if (ports.v16[1] < ports.v16[0])
swap(ports.v16[0], ports.v16[1]);
+ skb->l4_rxhash = 1;
}
}
@@ -2592,7 +2708,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
hash = 1;
done:
- return hash;
+ skb->rxhash = hash;
}
EXPORT_SYMBOL(__skb_get_rxhash);
@@ -2606,10 +2722,7 @@ static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
{
- u16 tcpu;
-
- tcpu = rflow->cpu = next_cpu;
- if (tcpu != RPS_NO_CPU) {
+ if (next_cpu != RPS_NO_CPU) {
#ifdef CONFIG_RFS_ACCEL
struct netdev_rx_queue *rxqueue;
struct rps_dev_flow_table *flow_table;
@@ -2637,16 +2750,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
goto out;
old_rflow = rflow;
rflow = &flow_table->flows[flow_id];
- rflow->cpu = next_cpu;
rflow->filter = rc;
if (old_rflow->filter == rflow->filter)
old_rflow->filter = RPS_NO_FILTER;
out:
#endif
rflow->last_qtail =
- per_cpu(softnet_data, tcpu).input_queue_head;
+ per_cpu(softnet_data, next_cpu).input_queue_head;
}
+ rflow->cpu = next_cpu;
return rflow;
}
@@ -2681,13 +2794,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
map = rcu_dereference(rxqueue->rps_map);
if (map) {
if (map->len == 1 &&
- !rcu_dereference_raw(rxqueue->rps_flow_table)) {
+ !rcu_access_pointer(rxqueue->rps_flow_table)) {
tcpu = map->cpus[0];
if (cpu_online(tcpu))
cpu = tcpu;
goto done;
}
- } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
+ } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
goto done;
}
@@ -3102,8 +3215,8 @@ void netdev_rx_handler_unregister(struct net_device *dev)
{
ASSERT_RTNL();
- rcu_assign_pointer(dev->rx_handler, NULL);
- rcu_assign_pointer(dev->rx_handler_data, NULL);
+ RCU_INIT_POINTER(dev->rx_handler, NULL);
+ RCU_INIT_POINTER(dev->rx_handler_data, NULL);
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
@@ -3170,6 +3283,17 @@ another_round:
ncls:
#endif
+ if (vlan_tx_tag_present(skb)) {
+ if (pt_prev) {
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = NULL;
+ }
+ if (vlan_do_receive(&skb))
+ goto another_round;
+ else if (unlikely(!skb))
+ goto out;
+ }
+
rx_handler = rcu_dereference(skb->dev->rx_handler);
if (rx_handler) {
if (pt_prev) {
@@ -3190,18 +3314,6 @@ ncls:
}
}
- if (vlan_tx_tag_present(skb)) {
- if (pt_prev) {
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = NULL;
- }
- if (vlan_do_receive(&skb)) {
- ret = __netif_receive_skb(skb);
- goto out;
- } else if (unlikely(!skb))
- goto out;
- }
-
/* deliver only exact match when indicated */
null_or_dev = deliver_exact ? skb->dev : NULL;
@@ -3429,10 +3541,10 @@ pull:
skb->data_len -= grow;
skb_shinfo(skb)->frags[0].page_offset += grow;
- skb_shinfo(skb)->frags[0].size -= grow;
+ skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
- if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
+ skb_frag_unref(skb, 0);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -3496,11 +3608,10 @@ void skb_gro_reset_offset(struct sk_buff *skb)
NAPI_GRO_CB(skb)->frag0_len = 0;
if (skb->mac_header == skb->tail &&
- !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
+ !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
NAPI_GRO_CB(skb)->frag0 =
- page_address(skb_shinfo(skb)->frags[0].page) +
- skb_shinfo(skb)->frags[0].page_offset;
- NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
+ skb_frag_address(&skb_shinfo(skb)->frags[0]);
+ NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
}
}
EXPORT_SYMBOL(skb_gro_reset_offset);
@@ -3982,6 +4093,60 @@ static int dev_ifconf(struct net *net, char __user *arg)
}
#ifdef CONFIG_PROC_FS
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
+
+struct dev_iter_state {
+ struct seq_net_private p;
+ unsigned int pos; /* bucket << BUCKET_SPACE + offset */
+};
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
+{
+ struct dev_iter_state *state = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct net_device *dev;
+ struct hlist_node *p;
+ struct hlist_head *h;
+ unsigned int count, bucket, offset;
+
+ bucket = get_bucket(state->pos);
+ offset = get_offset(state->pos);
+ h = &net->dev_name_head[bucket];
+ count = 0;
+ hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+ if (count++ == offset) {
+ state->pos = set_bucket_offset(bucket, count);
+ return dev;
+ }
+ }
+
+ return NULL;
+}
+
+static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
+{
+ struct dev_iter_state *state = seq->private;
+ struct net_device *dev;
+ unsigned int bucket;
+
+ bucket = get_bucket(state->pos);
+ do {
+ dev = dev_from_same_bucket(seq);
+ if (dev)
+ return dev;
+
+ bucket++;
+ state->pos = set_bucket_offset(bucket, 0);
+ } while (bucket < NETDEV_HASHENTRIES);
+
+ return NULL;
+}
+
/*
* This is invoked by the /proc filesystem handler to display a device
* in detail.
@@ -3989,33 +4154,33 @@ static int dev_ifconf(struct net *net, char __user *arg)
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
- struct net *net = seq_file_net(seq);
- loff_t off;
- struct net_device *dev;
+ struct dev_iter_state *state = seq->private;
rcu_read_lock();
if (!*pos)
return SEQ_START_TOKEN;
- off = 1;
- for_each_netdev_rcu(net, dev)
- if (off++ == *pos)
- return dev;
+ /* check for end of the hash */
+ if (state->pos == 0 && *pos > 1)
+ return NULL;
- return NULL;
+ return dev_from_new_bucket(seq);
}
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev = v;
+ struct net_device *dev;
+
+ ++*pos;
if (v == SEQ_START_TOKEN)
- dev = first_net_device_rcu(seq_file_net(seq));
- else
- dev = next_net_device_rcu(dev);
+ return dev_from_new_bucket(seq);
- ++*pos;
- return dev;
+ dev = dev_from_same_bucket(seq);
+ if (dev)
+ return dev;
+
+ return dev_from_new_bucket(seq);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4114,7 +4279,7 @@ static const struct seq_operations dev_seq_ops = {
static int dev_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &dev_seq_ops,
- sizeof(struct seq_net_private));
+ sizeof(struct dev_iter_state));
}
static const struct file_operations dev_seq_fops = {
@@ -4497,9 +4662,7 @@ void __dev_set_rx_mode(struct net_device *dev)
if (!netif_device_present(dev))
return;
- if (ops->ndo_set_rx_mode)
- ops->ndo_set_rx_mode(dev);
- else {
+ if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
/* Unicast addresses changes may only happen under the rtnl,
* therefore calling __dev_set_promiscuity here is safe.
*/
@@ -4510,10 +4673,10 @@ void __dev_set_rx_mode(struct net_device *dev)
__dev_set_promiscuity(dev, -1);
dev->uc_promisc = false;
}
-
- if (ops->ndo_set_multicast_list)
- ops->ndo_set_multicast_list(dev);
}
+
+ if (ops->ndo_set_rx_mode)
+ ops->ndo_set_rx_mode(dev);
}
void dev_set_rx_mode(struct net_device *dev)
@@ -4524,30 +4687,6 @@ void dev_set_rx_mode(struct net_device *dev)
}
/**
- * dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
- * @dev: device
- * @cmd: memory area for ethtool_ops::get_settings() result
- *
- * The cmd arg is initialized properly (cleared and
- * ethtool_cmd::cmd field set to ETHTOOL_GSET).
- *
- * Return device's ethtool_ops::get_settings() result value or
- * -EOPNOTSUPP when device doesn't expose
- * ethtool_ops::get_settings() operation.
- */
-int dev_ethtool_get_settings(struct net_device *dev,
- struct ethtool_cmd *cmd)
-{
- if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
- return -EOPNOTSUPP;
-
- memset(cmd, 0, sizeof(struct ethtool_cmd));
- cmd->cmd = ETHTOOL_GSET;
- return dev->ethtool_ops->get_settings(dev, cmd);
-}
-EXPORT_SYMBOL(dev_ethtool_get_settings);
-
-/**
* dev_get_flags - get flags reported to userspace
* @dev: device
*
@@ -4863,7 +5002,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return -EOPNOTSUPP;
case SIOCADDMULTI:
- if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+ if (!ops->ndo_set_rx_mode ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -4871,7 +5010,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCDELMULTI:
- if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+ if (!ops->ndo_set_rx_mode ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -4888,6 +5027,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
ifr->ifr_newname[IFNAMSIZ-1] = '\0';
return dev_change_name(dev, ifr->ifr_newname);
+ case SIOCSHWTSTAMP:
+ err = net_hwtstamp_validate(ifr);
+ if (err)
+ return err;
+ /* fall through */
+
/*
* Unknown or private ioctl
*/
@@ -5202,7 +5347,7 @@ static void rollback_registered_many(struct list_head *head)
dev = list_first_entry(head, struct net_device, unreg_list);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
- rcu_barrier();
+ synchronize_net();
list_for_each_entry(dev, head, unreg_list)
dev_put(dev);
@@ -5715,6 +5860,12 @@ void netdev_run_todo(void)
__rtnl_unlock();
+ /* Wait for rcu callbacks to finish before attempting to drain
+ * the device list. This usually avoids a 250ms wait.
+ */
+ if (!list_empty(&list))
+ rcu_barrier();
+
while (!list_empty(&list)) {
struct net_device *dev
= list_first_entry(&list, struct net_device, todo_list);
@@ -5735,8 +5886,8 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
- WARN_ON(rcu_dereference_raw(dev->ip_ptr));
- WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
+ WARN_ON(rcu_access_pointer(dev->ip_ptr));
+ WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
if (dev->destructor)
@@ -5940,7 +6091,7 @@ void free_netdev(struct net_device *dev)
kfree(dev->_rx);
#endif
- kfree(rcu_dereference_raw(dev->ingress_queue));
+ kfree(rcu_dereference_protected(dev->ingress_queue, 1));
/* Flush device addresses */
dev_addr_flush(dev);
@@ -6115,6 +6266,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
/*
* Flush the unicast and multicast chains
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index e2e6693..283d1b8 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global);
* addresses that have no users left. The source device must be
* locked by netif_tx_lock_bh.
*
- * This function is intended to be called from the dev->set_multicast_list
- * or dev->set_rx_mode function of layered software devices.
+ * This function is intended to be called from the ndo_set_rx_mode
+ * function of layered software devices.
*/
int dev_mc_sync(struct net_device *to, struct net_device *from)
{
diff --git a/net/core/dst.c b/net/core/dst.c
index 14b33baf..d5e2c4c 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst_init_metrics(dst, dst_default_metrics, true);
dst->expires = 0UL;
dst->path = dst;
- dst->_neighbour = NULL;
+ RCU_INIT_POINTER(dst->_neighbour, NULL);
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
smp_rmb();
again:
- neigh = dst->_neighbour;
+ neigh = rcu_dereference_protected(dst->_neighbour, 1);
child = dst->child;
if (neigh) {
- dst->_neighbour = NULL;
+ RCU_INIT_POINTER(dst->_neighbour, NULL);
neigh_release(neigh);
}
@@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
+ struct neighbour *neigh;
+
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
- if (dst->_neighbour && dst->_neighbour->dev == dev) {
- dst->_neighbour->dev = dst->dev;
+ rcu_read_lock();
+ neigh = dst_get_neighbour(dst);
+ if (neigh && neigh->dev == dev) {
+ neigh->dev = dst->dev;
dev_hold(dst->dev);
dev_put(dev);
}
+ rcu_read_unlock();
}
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6cdba5f..f444817 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
-static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
- struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
- int err;
+ ASSERT_RTNL();
- if (!dev->ethtool_ops->get_settings)
+ if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
return -EOPNOTSUPP;
- err = dev->ethtool_ops->get_settings(dev, &cmd);
+ memset(cmd, 0, sizeof(struct ethtool_cmd));
+ cmd->cmd = ETHTOOL_GSET;
+ return dev->ethtool_ops->get_settings(dev, cmd);
+}
+EXPORT_SYMBOL(__ethtool_get_settings);
+
+static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+{
+ int err;
+ struct ethtool_cmd cmd;
+
+ err = __ethtool_get_settings(dev, &cmd);
if (err < 0)
return err;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 27071ee..57e8f95 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -490,7 +490,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (ops->nr_goto_rules > 0) {
list_for_each_entry(tmp, &ops->rules_list, list) {
if (rtnl_dereference(tmp->ctarget) == rule) {
- rcu_assign_pointer(tmp->ctarget, NULL);
+ RCU_INIT_POINTER(tmp->ctarget, NULL);
ops->unresolved_rules++;
}
}
@@ -548,7 +548,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->flags = rule->flags;
if (rule->action == FR_ACT_GOTO &&
- rcu_dereference_raw(rule->ctarget) == NULL)
+ rcu_access_pointer(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 36f975f..5dea452 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -436,7 +436,7 @@ error:
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
-int sk_chk_filter(struct sock_filter *filter, int flen)
+int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
{
/*
* Valid instructions are initialized to non-0.
@@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk)
filter = rcu_dereference_protected(sk->sk_filter,
sock_owned_by_user(sk));
if (filter) {
- rcu_assign_pointer(sk->sk_filter, NULL);
+ RCU_INIT_POINTER(sk->sk_filter, NULL);
sk_filter_uncharge(sk, filter);
ret = 0;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index 555a456..8ae42de 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -413,7 +413,7 @@ static int __init flow_cache_init(struct flow_cache *fc)
for_each_online_cpu(i) {
if (flow_cache_cpu_prepare(fc, i))
- return -ENOMEM;
+ goto err;
}
fc->hotcpu_notifier = (struct notifier_block){
.notifier_call = flow_cache_cpu,
@@ -426,6 +426,18 @@ static int __init flow_cache_init(struct flow_cache *fc)
add_timer(&fc->rnd_timer);
return 0;
+
+err:
+ for_each_possible_cpu(i) {
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
+ kfree(fcp->hash_table);
+ fcp->hash_table = NULL;
+ }
+
+ free_percpu(fc->percpu);
+ fc->percpu = NULL;
+
+ return -ENOMEM;
}
static int __init flow_cache_init_global(void)
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
index 283c2b99..81e1ed7 100644
--- a/net/core/kmap_skb.h
+++ b/net/core/kmap_skb.h
@@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag)
local_bh_disable();
#endif
- return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+ return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ);
}
static inline void kunmap_skb_frag(void *vaddr)
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 357bd4e..c3519c6 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev)
static bool linkwatch_urgent_event(struct net_device *dev)
{
- return netif_running(dev) && netif_carrier_ok(dev) &&
- qdisc_tx_changing(dev);
+ if (!netif_running(dev))
+ return false;
+
+ if (dev->ifindex != dev->iflink)
+ return true;
+
+ return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1334d7e..909ecb3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -844,6 +844,19 @@ static void neigh_invalidate(struct neighbour *neigh)
skb_queue_purge(&neigh->arp_queue);
}
+static void neigh_probe(struct neighbour *neigh)
+ __releases(neigh->lock)
+{
+ struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+ /* keep skb alive even if arp_queue overflows */
+ if (skb)
+ skb = skb_copy(skb, GFP_ATOMIC);
+ write_unlock(&neigh->lock);
+ neigh->ops->solicit(neigh, skb);
+ atomic_inc(&neigh->probes);
+ kfree_skb(skb);
+}
+
/* Called when a timer expires for a neighbour entry. */
static void neigh_timer_handler(unsigned long arg)
@@ -920,14 +933,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
- struct sk_buff *skb = skb_peek(&neigh->arp_queue);
- /* keep skb alive even if arp_queue overflows */
- if (skb)
- skb = skb_copy(skb, GFP_ATOMIC);
- write_unlock(&neigh->lock);
- neigh->ops->solicit(neigh, skb);
- atomic_inc(&neigh->probes);
- kfree_skb(skb);
+ neigh_probe(neigh);
} else {
out:
write_unlock(&neigh->lock);
@@ -942,7 +948,7 @@ out:
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
int rc;
- unsigned long now;
+ bool immediate_probe = false;
write_lock_bh(&neigh->lock);
@@ -950,14 +956,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
- now = jiffies;
-
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+ unsigned long next, now = jiffies;
+
atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
- neigh->updated = jiffies;
- neigh_add_timer(neigh, now + 1);
+ neigh->updated = now;
+ next = now + max(neigh->parms->retrans_time, HZ/2);
+ neigh_add_timer(neigh, next);
+ immediate_probe = true;
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
@@ -989,7 +997,11 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
rc = 1;
}
out_unlock_bh:
- write_unlock_bh(&neigh->lock);
+ if (immediate_probe)
+ neigh_probe(neigh);
+ else
+ write_unlock(&neigh->lock);
+ local_bh_enable();
return rc;
}
EXPORT_SYMBOL(__neigh_event_send);
@@ -1156,10 +1168,14 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
struct dst_entry *dst = skb_dst(skb);
struct neighbour *n2, *n1 = neigh;
write_unlock_bh(&neigh->lock);
+
+ rcu_read_lock();
/* On shaper/eql skb->dst->neighbour != neigh :( */
if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
n1 = n2;
n1->output(n1, skb);
+ rcu_read_unlock();
+
write_lock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1683e5d..7604a63 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -147,7 +147,7 @@ static ssize_t show_speed(struct device *dev,
if (netif_running(netdev)) {
struct ethtool_cmd cmd;
- if (!dev_ethtool_get_settings(netdev, &cmd))
+ if (!__ethtool_get_settings(netdev, &cmd))
ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
}
rtnl_unlock();
@@ -165,7 +165,7 @@ static ssize_t show_duplex(struct device *dev,
if (netif_running(netdev)) {
struct ethtool_cmd cmd;
- if (!dev_ethtool_get_settings(netdev, &cmd))
+ if (!__ethtool_get_settings(netdev, &cmd))
ret = sprintf(buf, "%s\n",
cmd.duplex ? "full" : "half");
}
@@ -712,13 +712,13 @@ static void rx_queue_release(struct kobject *kobj)
struct rps_dev_flow_table *flow_table;
- map = rcu_dereference_raw(queue->rps_map);
+ map = rcu_dereference_protected(queue->rps_map, 1);
if (map) {
RCU_INIT_POINTER(queue->rps_map, NULL);
kfree_rcu(map, rcu);
}
- flow_table = rcu_dereference_raw(queue->rps_flow_table);
+ flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
if (flow_table) {
RCU_INIT_POINTER(queue->rps_flow_table, NULL);
call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
@@ -987,10 +987,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
}
if (nonempty)
- rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+ RCU_INIT_POINTER(dev->xps_maps, new_dev_maps);
else {
kfree(new_dev_maps);
- rcu_assign_pointer(dev->xps_maps, NULL);
+ RCU_INIT_POINTER(dev->xps_maps, NULL);
}
if (dev_maps)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5262251..f57d946 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -762,7 +762,7 @@ int __netpoll_setup(struct netpoll *np)
}
/* last thing to do is link it to the net device structure */
- rcu_assign_pointer(ndev->npinfo, npinfo);
+ RCU_INIT_POINTER(ndev->npinfo, npinfo);
return 0;
@@ -903,7 +903,7 @@ void __netpoll_cleanup(struct netpoll *np)
if (ops->ndo_netpoll_cleanup)
ops->ndo_netpoll_cleanup(np->dev);
- rcu_assign_pointer(np->dev->npinfo, NULL);
+ RCU_INIT_POINTER(np->dev->npinfo, NULL);
/* avoid racing with NAPI reading npinfo */
synchronize_rcu_bh();
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e35a6fb..0001c24 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2145,9 +2145,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
}
start_time = ktime_now();
- if (remaining < 100000)
- ndelay(remaining); /* really small just spin */
- else {
+ if (remaining < 100000) {
+ /* for small delays (<100us), just loop until limit is reached */
+ do {
+ end_time = ktime_now();
+ } while (ktime_lt(end_time, spin_until));
+ } else {
/* see do_nanosleep */
hrtimer_init_sleeper(&t, current);
do {
@@ -2162,8 +2165,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
hrtimer_cancel(&t.timer);
} while (t.task && pkt_dev->running && !signal_pending(current));
__set_current_state(TASK_RUNNING);
+ end_time = ktime_now();
}
- end_time = ktime_now();
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
@@ -2602,18 +2605,18 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
if (!pkt_dev->page)
break;
}
- skb_shinfo(skb)->frags[i].page = pkt_dev->page;
get_page(pkt_dev->page);
+ skb_frag_set_page(skb, i, pkt_dev->page);
skb_shinfo(skb)->frags[i].page_offset = 0;
/*last fragment, fill rest of data*/
if (i == (frags - 1))
- skb_shinfo(skb)->frags[i].size =
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+ skb_frag_size_set(&skb_shinfo(skb)->frags[i],
+ (datalen < PAGE_SIZE ? datalen : PAGE_SIZE));
else
- skb_shinfo(skb)->frags[i].size = frag_len;
- datalen -= skb_shinfo(skb)->frags[i].size;
- skb->len += skb_shinfo(skb)->frags[i].size;
- skb->data_len += skb_shinfo(skb)->frags[i].size;
+ skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len);
+ datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
+ skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
+ skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
i++;
skb_shinfo(skb)->nr_frags = i;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 99d9e95..9083e82 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -731,7 +731,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
size += num_vfs *
(nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
- nla_total_size(sizeof(struct ifla_vf_tx_rate)));
+ nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
+ nla_total_size(sizeof(struct ifla_vf_spoofchk)));
return size;
} else
return 0;
@@ -954,13 +955,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_mac vf_mac;
struct ifla_vf_vlan vf_vlan;
struct ifla_vf_tx_rate vf_tx_rate;
+ struct ifla_vf_spoofchk vf_spoofchk;
+
+ /*
+ * Not all SR-IOV capable drivers support the
+ * spoofcheck query. Preset to -1 so the user
+ * space tool can detect that the driver didn't
+ * report anything.
+ */
+ ivi.spoofchk = -1;
if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
break;
- vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
+ vf_mac.vf =
+ vf_vlan.vf =
+ vf_tx_rate.vf =
+ vf_spoofchk.vf = ivi.vf;
+
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
vf_tx_rate.rate = ivi.tx_rate;
+ vf_spoofchk.setting = ivi.spoofchk;
vf = nla_nest_start(skb, IFLA_VF_INFO);
if (!vf) {
nla_nest_cancel(skb, vfinfo);
@@ -968,7 +983,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
}
NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
- NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
+ NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
+ &vf_tx_rate);
+ NLA_PUT(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
+ &vf_spoofchk);
nla_nest_end(skb, vf);
}
nla_nest_end(skb, vfinfo);
@@ -1202,6 +1220,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
ivt->rate);
break;
}
+ case IFLA_VF_SPOOFCHK: {
+ struct ifla_vf_spoofchk *ivs;
+ ivs = nla_data(vf);
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_spoofchk)
+ err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
+ ivs->setting);
+ break;
+ }
default:
err = -EINVAL;
break;
@@ -1604,7 +1631,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
dev_net_set(dev, net);
dev->rtnl_link_ops = ops;
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
- dev->real_num_tx_queues = real_num_queues;
if (tb[IFLA_MTU])
dev->mtu = nla_get_u32(tb[IFLA_MTU]);
diff --git a/net/core/scm.c b/net/core/scm.c
index 811b53f..ff52ad0 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
if (err)
goto error;
- if (pid_vnr(p->pid) != p->creds.pid) {
+ if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
struct pid *pid;
err = -ESRCH;
pid = find_get_pid(p->creds.pid);
@@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
p->pid = pid;
}
- if ((p->cred->euid != p->creds.uid) ||
- (p->cred->egid != p->creds.gid)) {
+ if (!p->cred ||
+ (p->cred->euid != p->creds.uid) ||
+ (p->cred->egid != p->creds.gid)) {
struct cred *cred;
err = -ENOMEM;
cred = prepare_creds();
@@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
cred->uid = cred->euid = p->creds.uid;
cred->gid = cred->egid = p->creds.gid;
- put_cred(p->cred);
+ if (p->cred)
+ put_cred(p->cred);
p->cred = cred;
}
break;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 45329d7..025233d 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq)
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport)
{
u32 secret[MD5_MESSAGE_BYTES / 4];
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 387703f..ca4db40 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
goto out;
prefetchw(skb);
- size = SKB_DATA_ALIGN(size);
- data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
- gfp_mask, node);
+ /* We do our best to align skb_shared_info on a separate cache
+ * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+ * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+ * Both skb->head and skb_shared_info are cache line aligned.
+ */
+ size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ data = kmalloc_node_track_caller(size, gfp_mask, node);
if (!data)
goto nodata;
+ /* kmalloc(size) might give us more room than requested.
+ * Put skb_shared_info exactly at the end of allocated zone,
+ * to allow max possible filling before reallocation.
+ */
+ size = SKB_WITH_OVERHEAD(ksize(data));
prefetchw(data + size);
/*
@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->truesize = size + sizeof(struct sk_buff);
+ /* Account for allocated memory : skb + skb->head */
+ skb->truesize = SKB_TRUESIZE(size);
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
@@ -326,7 +336,7 @@ static void skb_release_data(struct sk_buff *skb)
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
}
/*
@@ -475,6 +485,30 @@ void consume_skb(struct sk_buff *skb)
EXPORT_SYMBOL(consume_skb);
/**
+ * skb_recycle - clean up an skb for reuse
+ * @skb: buffer
+ *
+ * Recycles the skb to be reused as a receive buffer. This
+ * function does any necessary reference count dropping, and
+ * cleans up the skbuff as if it just came from __alloc_skb().
+ */
+void skb_recycle(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo;
+
+ skb_release_head_state(skb);
+
+ shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+ atomic_set(&shinfo->dataref, 1);
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ skb->data = skb->head + NET_SKB_PAD;
+ skb_reset_tail_pointer(skb);
+}
+EXPORT_SYMBOL(skb_recycle);
+
+/**
* skb_recycle_check - check if skb can be reused for receive
* @skb: buffer
* @skb_size: minimum receive buffer size
@@ -488,33 +522,10 @@ EXPORT_SYMBOL(consume_skb);
*/
bool skb_recycle_check(struct sk_buff *skb, int skb_size)
{
- struct skb_shared_info *shinfo;
-
- if (irqs_disabled())
- return false;
-
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)
- return false;
-
- if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ if (!skb_is_recycleable(skb, skb_size))
return false;
- skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
- if (skb_end_pointer(skb) - skb->head < skb_size)
- return false;
-
- if (skb_shared(skb) || skb_cloned(skb))
- return false;
-
- skb_release_head_state(skb);
-
- shinfo = skb_shinfo(skb);
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
- atomic_set(&shinfo->dataref, 1);
-
- memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->data = skb->head + NET_SKB_PAD;
- skb_reset_tail_pointer(skb);
+ skb_recycle(skb);
return true;
}
@@ -529,6 +540,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac_header = old->mac_header;
skb_dst_copy(new, old);
new->rxhash = old->rxhash;
+ new->ooo_okay = old->ooo_okay;
+ new->l4_rxhash = old->l4_rxhash;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
@@ -647,7 +660,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
}
vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
memcpy(page_address(page),
- vaddr + f->page_offset, f->size);
+ vaddr + f->page_offset, skb_frag_size(f));
kunmap_skb_frag(vaddr);
page->private = (unsigned long)head;
head = page;
@@ -655,14 +668,14 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
uarg->callback(uarg);
/* skb frags point to kernel buffers */
for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
- skb_shinfo(skb)->frags[i - 1].page_offset = 0;
- skb_shinfo(skb)->frags[i - 1].page = head;
+ __skb_fill_page_desc(skb, i-1, head, 0,
+ skb_shinfo(skb)->frags[i - 1].size);
head = (struct page *)head->private;
}
@@ -820,7 +833,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ skb_frag_ref(skb, i);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -911,7 +924,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
goto nofrags;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1178,20 +1191,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
goto drop_pages;
for (; i < nfrags; i++) {
- int end = offset + skb_shinfo(skb)->frags[i].size;
+ int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if (end < len) {
offset = end;
continue;
}
- skb_shinfo(skb)->frags[i++].size = len - offset;
+ skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1294,9 +1307,11 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Estimate size of pulled pages. */
eat = delta;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- if (skb_shinfo(skb)->frags[i].size >= eat)
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+ if (size >= eat)
goto pull_pages;
- eat -= skb_shinfo(skb)->frags[i].size;
+ eat -= size;
}
/* If we need update frag list, we are in troubles.
@@ -1359,14 +1374,16 @@ pull_pages:
eat = delta;
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
- eat -= skb_shinfo(skb)->frags[i].size;
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+ if (size <= eat) {
+ skb_frag_unref(skb, i);
+ eat -= size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
if (eat) {
skb_shinfo(skb)->frags[k].page_offset += eat;
- skb_shinfo(skb)->frags[k].size -= eat;
+ skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
eat = 0;
}
k++;
@@ -1421,7 +1438,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
u8 *vaddr;
@@ -1619,7 +1636,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
- if (__splice_segment(f->page, f->page_offset, f->size,
+ if (__splice_segment(skb_frag_page(f),
+ f->page_offset, skb_frag_size(f),
offset, len, skb, spd, 0, sk, pipe))
return 1;
}
@@ -1729,7 +1747,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
WARN_ON(start > offset + len);
- end = start + frag->size;
+ end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
u8 *vaddr;
@@ -1802,7 +1820,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
__wsum csum2;
u8 *vaddr;
@@ -1877,7 +1895,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
__wsum csum2;
u8 *vaddr;
@@ -2150,7 +2168,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
skb->data_len = len - pos;
for (i = 0; i < nfrags; i++) {
- int size = skb_shinfo(skb)->frags[i].size;
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
if (pos + size > len) {
skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -2164,10 +2182,10 @@ static inline void skb_split_no_header(struct sk_buff *skb,
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_ref(skb, i);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
- skb_shinfo(skb1)->frags[0].size -= len - pos;
- skb_shinfo(skb)->frags[i].size = len - pos;
+ skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
+ skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
skb_shinfo(skb)->nr_frags++;
}
k++;
@@ -2239,12 +2257,13 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
* commit all, so that we don't have to undo partial changes
*/
if (!to ||
- !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) {
+ !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+ fragfrom->page_offset)) {
merge = -1;
} else {
merge = to - 1;
- todo -= fragfrom->size;
+ todo -= skb_frag_size(fragfrom);
if (todo < 0) {
if (skb_prepare_for_shift(skb) ||
skb_prepare_for_shift(tgt))
@@ -2254,8 +2273,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragfrom = &skb_shinfo(skb)->frags[from];
fragto = &skb_shinfo(tgt)->frags[merge];
- fragto->size += shiftlen;
- fragfrom->size -= shiftlen;
+ skb_frag_size_add(fragto, shiftlen);
+ skb_frag_size_sub(fragfrom, shiftlen);
fragfrom->page_offset += shiftlen;
goto onlymerged;
@@ -2279,20 +2298,20 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragfrom = &skb_shinfo(skb)->frags[from];
fragto = &skb_shinfo(tgt)->frags[to];
- if (todo >= fragfrom->size) {
+ if (todo >= skb_frag_size(fragfrom)) {
*fragto = *fragfrom;
- todo -= fragfrom->size;
+ todo -= skb_frag_size(fragfrom);
from++;
to++;
} else {
- get_page(fragfrom->page);
+ __skb_frag_ref(fragfrom);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
- fragto->size = todo;
+ skb_frag_size_set(fragto, todo);
fragfrom->page_offset += todo;
- fragfrom->size -= todo;
+ skb_frag_size_sub(fragfrom, todo);
todo = 0;
to++;
@@ -2307,8 +2326,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragfrom = &skb_shinfo(skb)->frags[0];
fragto = &skb_shinfo(tgt)->frags[merge];
- fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ skb_frag_size_add(fragto, skb_frag_size(fragfrom));
+ __skb_frag_unref(fragfrom);
}
/* Reposition in the original skb */
@@ -2405,7 +2424,7 @@ next_skb:
while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
- block_limit = frag->size + st->stepped_offset;
+ block_limit = skb_frag_size(frag) + st->stepped_offset;
if (abs_offset < block_limit) {
if (!st->frag_data)
@@ -2423,7 +2442,7 @@ next_skb:
}
st->frag_idx++;
- st->stepped_offset += frag->size;
+ st->stepped_offset += skb_frag_size(frag);
}
if (st->frag_data) {
@@ -2553,14 +2572,13 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
left = PAGE_SIZE - frag->page_offset;
copy = (length > left)? left : length;
- ret = getfrag(from, (page_address(frag->page) +
- frag->page_offset + frag->size),
+ ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
offset, copy, 0, skb);
if (ret < 0)
return -EFAULT;
/* copy was successful so update the size parameters */
- frag->size += copy;
+ skb_frag_size_add(frag, copy);
skb->len += copy;
skb->data_len += copy;
offset += copy;
@@ -2706,12 +2724,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
- size = frag->size;
+ __skb_frag_ref(frag);
+ size = skb_frag_size(frag);
if (pos < offset) {
frag->page_offset += offset - pos;
- frag->size -= offset - pos;
+ skb_frag_size_sub(frag, offset - pos);
}
skb_shinfo(nskb)->nr_frags++;
@@ -2720,7 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
i++;
pos += size;
} else {
- frag->size -= pos + size - (offset + len);
+ skb_frag_size_sub(frag, pos + size - (offset + len));
goto skip_fraglist;
}
@@ -2800,7 +2818,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
} while (--i);
frag->page_offset += offset;
- frag->size -= offset;
+ skb_frag_size_sub(frag, offset);
skb->truesize -= skb->data_len;
skb->len -= skb->data_len;
@@ -2852,7 +2870,7 @@ merge:
unsigned int eat = offset - headlen;
skbinfo->frags[0].page_offset += eat;
- skbinfo->frags[0].size -= eat;
+ skb_frag_size_sub(&skbinfo->frags[0], eat);
skb->data_len -= eat;
skb->len -= eat;
offset = headlen;
@@ -2923,13 +2941,13 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
if (copy > len)
copy = len;
- sg_set_page(&sg[elt], frag->page, copy,
+ sg_set_page(&sg[elt], skb_frag_page(frag), copy,
frag->page_offset+offset-start);
elt++;
if (!(len -= copy))
diff --git a/net/core/sock.c b/net/core/sock.c
index bc745d0..5a08762 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
* not depend upon such differences.
*/
#define _SK_MEM_PACKETS 256
-#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
+#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
@@ -387,7 +387,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
- rcu_assign_pointer(sk->sk_dst_cache, NULL);
+ RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
}
@@ -738,10 +738,7 @@ set_rcvbuf:
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
case SO_RXQ_OVFL:
- if (valbool)
- sock_set_flag(sk, SOCK_RXQ_OVFL);
- else
- sock_reset_flag(sk, SOCK_RXQ_OVFL);
+ sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
break;
default:
ret = -ENOPROTOOPT;
@@ -1158,7 +1155,7 @@ static void __sk_free(struct sock *sk)
atomic_read(&sk->sk_wmem_alloc) == 0);
if (filter) {
sk_filter_uncharge(sk, filter);
- rcu_assign_pointer(sk->sk_filter, NULL);
+ RCU_INIT_POINTER(sk->sk_filter, NULL);
}
sock_disable_timestamp(sk, SOCK_TIMESTAMP);
@@ -1533,7 +1530,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
skb_shinfo(skb)->nr_frags = npages;
for (i = 0; i < npages; i++) {
struct page *page;
- skb_frag_t *frag;
page = alloc_pages(sk->sk_allocation, 0);
if (!page) {
@@ -1543,12 +1539,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
goto failure;
}
- frag = &skb_shinfo(skb)->frags[i];
- frag->page = page;
- frag->page_offset = 0;
- frag->size = (data_len >= PAGE_SIZE ?
- PAGE_SIZE :
- data_len);
+ __skb_fill_page_desc(skb, i,
+ page, 0,
+ (data_len >= PAGE_SIZE ?
+ PAGE_SIZE :
+ data_len));
data_len -= PAGE_SIZE;
}
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 98a5264..82fb288 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -57,9 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
case PTP_CLASS_V2_VLAN:
phydev = skb->dev->phydev;
if (likely(phydev->drv->txtstamp)) {
+ if (!atomic_inc_not_zero(&sk->sk_refcnt))
+ return;
clone = skb_clone(skb, GFP_ATOMIC);
- if (!clone)
+ if (!clone) {
+ sock_put(sk);
return;
+ }
clone->sk = sk;
phydev->drv->txtstamp(phydev, clone, type);
}
@@ -77,8 +81,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock_exterr_skb *serr;
int err;
- if (!hwtstamps)
+ if (!hwtstamps) {
+ sock_put(sk);
+ kfree_skb(skb);
return;
+ }
*skb_hwtstamps(skb) = *hwtstamps;
serr = SKB_EXT_ERR(skb);
@@ -87,6 +94,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
skb->sk = NULL;
err = sock_queue_err_skb(sk, skb);
+ sock_put(sk);
if (err)
kfree_skb(skb);
}
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 25d717e..2d7cf3d 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -71,14 +71,14 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(frag);
copy = end - offset;
if (copy > 0) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
OpenPOWER on IntegriCloud