From 06e3041164a1c52d6e8a369af8be3827f428272b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 18 Oct 2012 17:55:31 +0000 Subject: pktgen: Use ipv6_addr_any Use the standard test for a non-zero ipv6 address. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/core/pktgen.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d1dc14c..1d1c216 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2427,11 +2427,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } else { /* IPV6 * */ - if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[1] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[2] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ; - else { + if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) { int i; /* Only random destinations yet */ -- cgit v1.1 From f7b86bfe8d9f10e5a9fcacf52dddf29d0d58a33b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2012 23:55:56 +0000 Subject: sockopt: Make SO_BINDTODEVICE readable The SO_BINDTODEVICE option is the only SOL_SOCKET one that can be set, but cannot be get via sockopt API. The only way we can find the device id a socket is bound to is via sock-diag interface. But the diag works only on hashed sockets, while the opt in question can be set for yet unhashed one. That said, in order to know what device a socket is bound to (we do want to know this in checkpoint-restore project) I propose to make this option getsockopt-able and report the respective device index. Another solution to the problem might be to teach the sock-diag reporting info on unhashed sockets. Should I go this way instead? Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 8a146cf..c49412c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1074,6 +1074,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); break; + case SO_BINDTODEVICE: + v.val = sk->sk_bound_dev_if; + break; default: return -ENOPROTOOPT; } -- cgit v1.1 From 47b70db5558388b3f4ecd10b492a0b3f6d680789 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Fri, 19 Oct 2012 01:09:30 +0000 Subject: net:dev: remove double indentical assignment in dev_change_net_namespace(). This patch removes double assignment of err to -EINVAL in dev_change_net_namespace(). Signed-off-by: Rami Rosen Acked-by: Serge E. Hallyn Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 09cb3f6..b4978e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,7 +6264,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char goto out; /* Ensure the device has been registrered */ - err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) goto out; -- cgit v1.1 From c80bbeaec98b36eeba9c6c77061226034d5c4622 Mon Sep 17 00:00:00 2001 From: Hans Zhang Date: Mon, 22 Oct 2012 22:21:23 +0000 Subject: netlink: cleanup the unnecessary return value check It's no needed to check the return value of tab since the NULL situation has been handled already, and the rtnl_msg_handlers[PF_UNSPEC] has been initialized as non-NULL during the rtnetlink_init(). Signed-off-by: Hans Zhang Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 76d4c2c..64fe3cc 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].doit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].doit : NULL; + return tab[msgindex].doit; } static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) @@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].dumpit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].dumpit : NULL; + return tab[msgindex].dumpit; } static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) @@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].calcit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].calcit : NULL; + return tab[msgindex].calcit; } /** -- cgit v1.1 From c658f19db3c3077a3328a0e0ea182f6777337c40 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 25 Oct 2012 04:16:55 +0000 Subject: cgroup: net_prio: Mark local used function static net_prio_attach() is only access via cgroup_subsys callbacks, therefore we can reduce the visibility of this function. Signed-off-by: Daniel Wagner Cc: "David S. Miller" Cc: John Fastabend Cc: Li Zefan Cc: Neil Horman Cc: Tejun Heo Cc: Cc: Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 79285a3..847c02b 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -248,7 +248,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n) return 0; } -void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; void *v; -- cgit v1.1 From 3ace03cc2a03eadf83a59eecada68b37bc1a46ae Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 25 Oct 2012 04:16:57 +0000 Subject: cgroup: net_cls: Remove rcu_read_lock/unlock As Eric pointed out: "Hey task_cls_classid() has its own rcu protection since commit 3fb5a991916091a908d (cls_cgroup: Fix rcu lockdep warning) So we can safely revert Paul commit (1144182a8757f2a1) (We no longer need rcu_read_lock/unlock here)" Signed-off-by: Daniel Wagner Cc: "David S. Miller" Cc: Eric Dumazet Cc: Glauber Costa Cc: Li Zefan Cc: Neil Horman Cc: Paul E. McKenney Cc: Tejun Heo Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/sock.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index c49412c..9fedbbf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1221,9 +1221,7 @@ void sock_update_classid(struct sock *sk) { u32 classid; - rcu_read_lock(); /* doing current task, which cannot vanish. */ classid = task_cls_classid(current); - rcu_read_unlock(); if (classid != sk->sk_classid) sk->sk_classid = classid; } -- cgit v1.1 From fd9a08a7b83074e34c13c6340f673f7a51f53489 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 25 Oct 2012 04:16:58 +0000 Subject: cgroup: net_cls: Pass in task to sock_update_classid() sock_update_classid() assumes that the update operation always are applied on the current task. sock_update_classid() needs to know on which tasks to work on in order to be able to migrate task between cgroups using the struct cgroup_subsys attach() callback. Signed-off-by: Daniel Wagner Cc: "David S. Miller" Cc: "Michael S. Tsirkin" Cc: Eric Dumazet Cc: Glauber Costa Cc: Joe Perches Cc: Neil Horman Cc: Stanislav Kinsbursky Cc: Tejun Heo Cc: Cc: Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 9fedbbf..0a023b8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1217,11 +1217,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) #ifdef CONFIG_CGROUPS #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) +void sock_update_classid(struct sock *sk, struct task_struct *task) { u32 classid; - classid = task_cls_classid(current); + classid = task_cls_classid(task); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1264,7 +1264,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); + sock_update_classid(sk, current); sock_update_netprioidx(sk, current); } -- cgit v1.1 From e5a55a898720096f43bc24938f8875c0a1b34cd7 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Oct 2012 08:12:57 +0000 Subject: net: create generic bridge ops The PF_BRIDGE:RTM_{GET|SET}LINK nlmsg family and type are currently embedded in the ./net/bridge module. This prohibits them from being used by other bridging devices. One example of this being hardware that has embedded bridging components. In order to use these nlmsg types more generically this patch adds two net_device_ops hooks. One to set link bridge attributes and another to dump the current bride attributes. ndo_bridge_setlink() ndo_bridge_getlink() CC: Lennert Buytenhek CC: Stephen Hemminger Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 64fe3cc..a068666 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2252,6 +2252,83 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx = 0; + u32 portid = NETLINK_CB(cb->skb).portid; + u32 seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *master = dev->master; + + if (idx < cb->args[0]) + continue; + + if (master && master->netdev_ops->ndo_bridge_getlink) { + const struct net_device_ops *bops = master->netdev_ops; + int err = bops->ndo_bridge_getlink(skb, portid, + seq, dev); + + if (err < 0) + break; + else + idx++; + } + + if (ops->ndo_bridge_getlink) { + int err = ops->ndo_bridge_getlink(skb, portid, + seq, dev); + + if (err < 0) + break; + else + idx++; + } + } + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + int err = -EINVAL; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + if (dev->master && dev->master->netdev_ops->ndo_bridge_setlink) { + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); + if (err) + goto out; + } + + if (dev->netdev_ops->ndo_bridge_setlink) + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2433,5 +2510,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); + + rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } -- cgit v1.1 From 2469ffd723f76ac2d3ce3d4f31ee31ee0a06cd38 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Oct 2012 08:13:03 +0000 Subject: net: set and query VEB/VEPA bridge mode via PF_BRIDGE Hardware switches may support enabling and disabling the loopback switch which puts the device in a VEPA mode defined in the IEEE 802.1Qbg specification. In this mode frames are not switched in the hardware but sent directly to the switch. SR-IOV capable NICs will likely support this mode I am aware of at least two such devices. Also I am told (but don't have any of this hardware available) that there are devices that only support VEPA modes. In these cases it is important at a minimum to be able to query these attributes. This patch adds an additional IFLA_BRIDGE_MODE attribute that can be set and dumped via the PF_BRIDGE:{SET|GET}LINK operations. Also anticipating bridge attributes that may be common for both embedded bridges and software bridges this adds a flags attribute IFLA_BRIDGE_FLAGS currently used to determine if the command or event is being generated to/from an embedded bridge or software bridge. Finally, the event generation is pulled out of the bridge module and into rtnetlink proper. For example using the macvlan driver in VEPA mode on top of an embedded switch requires putting the embedded switch into a VEPA mode to get the expected results. -------- -------- | VEPA | | VEPA | <-- macvlan vepa edge relays -------- -------- | | | | ------------------ | VEPA | <-- embedded switch in NIC ------------------ | | ------------------- | external switch | <-- shiny new physical ------------------- switch with VEPA support A packet sent from the macvlan VEPA at the top could be loopbacked on the embedded switch and never seen by the external switch. So in order for this to work the embedded switch needs to be set in the VEPA state via the above described commands. By making these attributes nested in IFLA_AF_SPEC we allow future extensions to be made as needed. CC: Lennert Buytenhek CC: Stephen Hemminger Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a068666..8d2af0f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2295,13 +2295,60 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static inline size_t bridge_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ +} + +static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +{ + struct net *net = dev_net(dev); + struct net_device *master = dev->master; + struct sk_buff *skb; + int err = -EOPNOTSUPP; + + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto errout; + } + + if (!flags && master && master->netdev_ops->ndo_bridge_getlink) + err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + else if (dev->netdev_ops->ndo_bridge_getlink) + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + + if (err < 0) + goto errout; + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return 0; +errout: + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return err; +} + static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - int err = -EINVAL; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 flags = 0; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -2316,15 +2363,45 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (dev->master && dev->master->netdev_ops->ndo_bridge_setlink) { + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + flags = nla_get_u16(attr); + break; + } + } + } + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + if (!dev->master || + !dev->master->netdev_ops->ndo_bridge_setlink) { + err = -EOPNOTSUPP; + goto out; + } + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); if (err) goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; } - if (dev->netdev_ops->ndo_bridge_setlink) - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_setlink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + if (attr && nla_type(attr) == IFLA_BRIDGE_FLAGS) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, flags); out: return err; } -- cgit v1.1 From 815cccbf10b27115fb3e5827bef26768616e5e27 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Oct 2012 08:13:09 +0000 Subject: ixgbe: add setlink, getlink support to ixgbe and ixgbevf This adds support for the net device ops to manage the embedded hardware bridge on ixgbe devices. With this patch the bridge mode can be toggled between VEB and VEPA to support stacking macvlan devices or using the embedded switch without any SW component in 802.1Qbg/br environments. Additionally, this adds source address pruning to the ixgbevf driver to prune any frames sent back from a reflective relay on the switch. This is required because the existing hardware does not support this. Without it frames get pushed into the stack with its own src mac which is invalid per 802.1Qbg VEPA definition. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8d2af0f..51dc58f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2252,6 +2252,56 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u16 mode) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + struct nlattr *br_afspec; + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + if (nlh == NULL) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifi_family = AF_BRIDGE; + ifm->__ifi_pad = 0; + ifm->ifi_type = dev->type; + ifm->ifi_index = dev->ifindex; + ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_change = 0; + + + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + nla_put_u8(skb, IFLA_OPERSTATE, operstate) || + (dev->master && + nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (dev->addr_len && + nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || + (dev->ifindex != dev->iflink && + nla_put_u32(skb, IFLA_LINK, dev->iflink))) + goto nla_put_failure; + + br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); + if (!br_afspec) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) || + nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) { + nla_nest_cancel(skb, br_afspec); + goto nla_put_failure; + } + nla_nest_end(skb, br_afspec); + + return nlmsg_end(skb, nlh); +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} +EXPORT_SYMBOL(ndo_dflt_bridge_getlink); + static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); -- cgit v1.1 From f3335031b9452baebfe49b8b5e55d3fe0c4677d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Oct 2012 02:26:17 +0000 Subject: net: filter: add vlan tag access BPF filters lack ability to access skb->vlan_tci This patch adds two new ancillary accessors : SKF_AD_VLAN_TAG (44) mapped to vlan_tx_tag_get(skb) SKF_AD_VLAN_TAG_PRESENT (48) mapped to vlan_tx_tag_present(skb) This allows libpcap/tcpdump to use a kernel filter instead of having to fallback to accept all packets, then filter them in user space. Signed-off-by: Eric Dumazet Suggested-by: Ani Sinha Suggested-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 3d92ebb..5a114d4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -39,6 +39,7 @@ #include #include #include +#include /* No hurry in this branch * @@ -341,6 +342,12 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; + case BPF_S_ANC_VLAN_TAG: + A = vlan_tx_tag_get(skb); + continue; + case BPF_S_ANC_VLAN_TAG_PRESENT: + A = !!vlan_tx_tag_present(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(RXHASH); ANCILLARY(CPU); ANCILLARY(ALU_XOR_X); + ANCILLARY(VLAN_TAG); + ANCILLARY(VLAN_TAG_PRESENT); } } ftest->code = code; -- cgit v1.1 From a8fc92778080c845eaadc369a0ecf5699a03bef0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2012 02:01:48 +0000 Subject: sk-filter: Add ability to get socket filter program (v2) The SO_ATTACH_FILTER option is set only. I propose to add the get ability by using SO_ATTACH_FILTER in getsockopt. To be less irritating to eyes the SO_GET_FILTER alias to it is declared. This ability is required by checkpoint-restore project to be able to save full state of a socket. There are two issues with getting filter back. First, kernel modifies the sock_filter->code on filter load, thus in order to return the filter element back to user we have to decode it into user-visible constants. Fortunately the modification in question is interconvertible. Second, the BPF_S_ALU_DIV_K code modifies the command argument k to speed up the run-time division by doing kernel_k = reciprocal(user_k). Bad news is that different user_k may result in same kernel_k, so we can't get the original user_k back. Good news is that we don't have to do it. What we need to is calculate a user2_k so, that reciprocal(user2_k) == reciprocal(user_k) == kernel_k i.e. if it's re-loaded back the compiled again value will be exactly the same as it was. That said, the user2_k can be calculated like this user2_k = reciprocal(kernel_k) with an exception, that if kernel_k == 0, then user2_k == 1. The optlen argument is treated like this -- when zero, kernel returns the amount of sock_fprog elements in filter, otherwise it should be large enough for the sock_fprog array. changes since v1: * Declared SO_GET_FILTER in all arch headers * Added decode of vlan-tag codes Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/filter.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/core/sock.c | 6 +++ 2 files changed, 136 insertions(+) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 5a114d4..c23543c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -760,3 +760,133 @@ int sk_detach_filter(struct sock *sk) return ret; } EXPORT_SYMBOL_GPL(sk_detach_filter); + +static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) +{ + static const u16 decodes[] = { + [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, + [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, + [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, + [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, + [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, + [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, + [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, + [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, + [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, + [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, + [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, + [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, + [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, + [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, + [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, + [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, + [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, + [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, + [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, + [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, + [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, + [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, + [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, + [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, + [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, + [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, + [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, + [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, + [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, + [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, + [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, + [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, + [BPF_S_RET_K] = BPF_RET|BPF_K, + [BPF_S_RET_A] = BPF_RET|BPF_A, + [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, + [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, + [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, + [BPF_S_ST] = BPF_ST, + [BPF_S_STX] = BPF_STX, + [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, + [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, + [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, + [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, + [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, + [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, + [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, + [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, + [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, + }; + u16 code; + + code = filt->code; + + to->code = decodes[code]; + to->jt = filt->jt; + to->jf = filt->jf; + + if (code == BPF_S_ALU_DIV_K) { + /* + * When loaded this rule user gave us X, which was + * translated into R = r(X). Now we calculate the + * RR = r(R) and report it back. If next time this + * value is loaded and RRR = r(RR) is calculated + * then the R == RRR will be true. + * + * One exception. X == 1 translates into R == 0 and + * we can't calculate RR out of it with r(). + */ + + if (filt->k == 0) + to->k = 1; + else + to->k = reciprocal_value(filt->k); + + BUG_ON(reciprocal_value(to->k) != filt->k); + } else + to->k = filt->k; +} + +int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) +{ + struct sk_filter *filter; + int i, ret; + + lock_sock(sk); + filter = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + ret = 0; + if (!filter) + goto out; + ret = filter->len; + if (!len) + goto out; + ret = -EINVAL; + if (len < filter->len) + goto out; + + ret = -EFAULT; + for (i = 0; i < filter->len; i++) { + struct sock_filter fb; + + sk_decode_filter(&filter->insns[i], &fb); + if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) + goto out; + } + + ret = filter->len; +out: + release_sock(sk); + return ret; +} diff --git a/net/core/sock.c b/net/core/sock.c index 0a023b8..0628600 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1077,6 +1077,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_BINDTODEVICE: v.val = sk->sk_bound_dev_if; break; + case SO_GET_FILTER: + len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); + if (len < 0) + return len; + + goto lenout; default: return -ENOPROTOOPT; } -- cgit v1.1 From e19d6763cc300fcb706bd291b24ac06be71e1ce6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Nov 2012 09:16:22 +0000 Subject: skb: report completion status for zero copy skbs Even if skb is marked for zero copy, net core might still decide to copy it later which is somewhat slower than a copy in user context: besides copying the data we need to pin/unpin the pages. Add a parameter reporting such cases through zero copy callback: if this happens a lot, device can take this into account and switch to copying in user context. This patch updates all users but ignores the passed value for now: it will be used by follow-up patches. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6e04b1f..4abdf71 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb) uarg = skb_shinfo(skb)->destructor_arg; if (uarg->callback) - uarg->callback(uarg); + uarg->callback(uarg, true); } if (skb_has_frag_list(skb)) @@ -797,7 +797,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); - uarg->callback(uarg); + uarg->callback(uarg, false); /* skb frags point to kernel buffers */ for (i = num_frags - 1; i >= 0; i--) { -- cgit v1.1 From 25121173f7b1e4ac3fc692df6e7b8c52ec36abba Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Nov 2012 09:16:28 +0000 Subject: skb: api to report errors for zero copy skbs Orphaning frags for zero copy skbs needs to allocate data in atomic context so is has a chance to fail. If it does we currently discard the skb which is safe, but we don't report anything to the caller, so it can not recover by e.g. disabling zero copy. Add an API to free skb reporting such errors: this is used by tun in case orphaning frags fails. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/skbuff.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4abdf71..d9addea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(kfree_skb); /** + * skb_tx_error - report an sk_buff xmit error + * @skb: buffer that triggered an error + * + * Report xmit error if a device callback is tracking this skb. + * skb must be freed afterwards. + */ +void skb_tx_error(struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + struct ubuf_info *uarg; + + uarg = skb_shinfo(skb)->destructor_arg; + if (uarg->callback) + uarg->callback(uarg, false); + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } +} +EXPORT_SYMBOL(skb_tx_error); + +/** * consume_skb - free an skbuff * @skb: buffer to free * -- cgit v1.1 From 25b1e67921f448cdddf70042ba233ffe43d33a9c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 2 Nov 2012 12:56:52 +0000 Subject: net: Fix continued iteration in rtnl_bridge_getlink() Commit e5a55a898720096f43bc24938f8875c0a1b34cd7 ('net: create generic bridge ops') broke the handling of a non-zero starting index in rtnl_bridge_getlink() (based on the old br_dump_ifinfo()). When the starting index is non-zero, we need to increment the current index for each entry that we are skipping. Also, we need to check the index before both cases, since we may previously have stopped iteration between getting information about a device from its master and from itself. Signed-off-by: Ben Hutchings Tested-by: John Fastabend Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 51dc58f..a0e3507 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2315,28 +2315,19 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) const struct net_device_ops *ops = dev->netdev_ops; struct net_device *master = dev->master; - if (idx < cb->args[0]) - continue; - if (master && master->netdev_ops->ndo_bridge_getlink) { - const struct net_device_ops *bops = master->netdev_ops; - int err = bops->ndo_bridge_getlink(skb, portid, - seq, dev); - - if (err < 0) + if (idx >= cb->args[0] && + master->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev) < 0) break; - else - idx++; + idx++; } if (ops->ndo_bridge_getlink) { - int err = ops->ndo_bridge_getlink(skb, portid, - seq, dev); - - if (err < 0) + if (idx >= cb->args[0] && + ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) break; - else - idx++; + idx++; } } rcu_read_unlock(); -- cgit v1.1 From 398f382c0a5bd62f0f31871e844700e1b9fd1ad3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 28 Oct 2012 08:27:19 +0000 Subject: pktgen: clean up ktime_t helpers Some years ago, the ktime_t helper functions ktime_now() and ktime_lt() have been introduced. Instead of defining them inside pktgen.c, they should either use ktime_t library functions or, if not available, they should be defined in ktime.h, so that also others can benefit from them. ktime_compare() is introduced with a similar notion as in timespec_compare(). Signed-off-by: Daniel Borkmann Cc: Cong Wang Cc: Stephen Hemminger Cc: Thomas Gleixner Acked-by: Thomas Gleixner Signed-off-by: David S. Miller --- net/core/pktgen.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1d1c216..b29dacf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -419,20 +419,6 @@ struct pktgen_thread { #define REMOVE 1 #define FIND 0 -static inline ktime_t ktime_now(void) -{ - struct timespec ts; - ktime_get_ts(&ts); - - return timespec_to_ktime(ts); -} - -/* This works even if 32 bit because of careful byte order choice */ -static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) -{ - return cmp1.tv64 < cmp2.tv64; -} - static const char version[] = "Packet Generator for packet performance testing. " "Version: " VERSION "\n"; @@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, "\n"); /* not really stopped, more like last-running-at */ - stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; + stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at; idle = pkt_dev->idle_acc; do_div(idle, NSEC_PER_USEC); @@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) return; } - start_time = ktime_now(); + start_time = ktime_get(); if (remaining < 100000) { /* for small delays (<100us), just loop until limit is reached */ do { - end_time = ktime_now(); - } while (ktime_lt(end_time, spin_until)); + end_time = ktime_get(); + } while (ktime_compare(end_time, spin_until) < 0); } else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); @@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_cancel(&t.timer); } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); - end_time = ktime_now(); + end_time = ktime_get(); } pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); @@ -2912,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t) pktgen_clear_counters(pkt_dev); pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; - pkt_dev->started_at = - pkt_dev->next_tx = ktime_now(); + pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); @@ -3072,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; - pkt_dev->stopped_at = ktime_now(); + pkt_dev->stopped_at = ktime_get(); pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3091,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) continue; if (best == NULL) best = pkt_dev; - else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) + else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } if_unlock(t); @@ -3176,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static void pktgen_resched(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); schedule(); - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); while (atomic_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) @@ -3194,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) else cpu_relax(); } - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_xmit(struct pktgen_dev *pkt_dev) @@ -3216,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) * "never transmit" */ if (unlikely(pkt_dev->delay == ULLONG_MAX)) { - pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); + pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX); return; } -- cgit v1.1 From c38e01b8b958cb6606bcc156d3d00c3ee99a13f8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 2 Nov 2012 16:32:36 +0000 Subject: net: fix bridge notify hook to manage flags correctly The bridge notify hook rtnl_bridge_notify() was not handling the case where the master flags was set or with both flags set. First flags are not being passed correctly and second the logic to parse them is broken. This patch passes the original flags value and fixes the logic. Reported-by: Ben Hutchings Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a0e3507..04a201a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2364,13 +2364,19 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) goto errout; } - if (!flags && master && master->netdev_ops->ndo_bridge_getlink) + if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && + master && master->netdev_ops->ndo_bridge_getlink) { err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); - else if (dev->netdev_ops->ndo_bridge_getlink) - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } - if (err < 0) - goto errout; + if ((flags & BRIDGE_FLAGS_SELF) && + dev->netdev_ops->ndo_bridge_getlink) { + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; @@ -2389,7 +2395,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; - u16 flags = 0; + u16 oflags, flags = 0; + bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -2408,12 +2415,15 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; flags = nla_get_u16(attr); break; } } } + oflags = flags; + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { if (!dev->master || !dev->master->netdev_ops->ndo_bridge_setlink) { @@ -2438,11 +2448,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, flags &= ~BRIDGE_FLAGS_SELF; } - if (attr && nla_type(attr) == IFLA_BRIDGE_FLAGS) + if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); /* Generate event to notify upper layer of bridge change */ if (!err) - err = rtnl_bridge_notify(dev, flags); + err = rtnl_bridge_notify(dev, oflags); out: return err; } -- cgit v1.1 From 62532da9d5f47a7ced3b965aa73ffd5b1afbeb79 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:10 +0000 Subject: net: Add generic packet offload infrastructure. Create a new data structure to contain the GRO/GSO callbacks and add a new registration mechanism. Singed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/dev.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 83232a1..6884f87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -176,8 +176,10 @@ #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) static DEFINE_SPINLOCK(ptype_lock); +static DEFINE_SPINLOCK(offload_lock); static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; static struct list_head ptype_all __read_mostly; /* Taps */ +static struct list_head offload_base __read_mostly; /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt) } EXPORT_SYMBOL(dev_remove_pack); + +/** + * dev_add_offload - register offload handlers + * @po: protocol offload declaration + * + * Add protocol offload handlers to the networking stack. The passed + * &proto_offload is linked into kernel lists and may not be freed until + * it has been removed from the kernel lists. + * + * This call does not sleep therefore it can not + * guarantee all CPU's that are in middle of receiving packets + * will see the new offload handlers (until the next received packet). + */ +void dev_add_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + + spin_lock(&offload_lock); + list_add_rcu(&po->list, head); + spin_unlock(&offload_lock); +} +EXPORT_SYMBOL(dev_add_offload); + +/** + * __dev_remove_offload - remove offload handler + * @po: packet offload declaration + * + * Remove a protocol offload handler that was previously added to the + * kernel offload handlers by dev_add_offload(). The passed &offload_type + * is removed from the kernel lists and can be freed or reused once this + * function returns. + * + * The packet type might still be in use by receivers + * and must not be freed until after all the CPU's have gone + * through a quiescent state. + */ +void __dev_remove_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + struct packet_offload *po1; + + spin_lock(&ptype_lock); + + list_for_each_entry(po1, head, list) { + if (po == po1) { + list_del_rcu(&po->list); + goto out; + } + } + + pr_warn("dev_remove_offload: %p not found\n", po); +out: + spin_unlock(&ptype_lock); +} +EXPORT_SYMBOL(__dev_remove_offload); + +/** + * dev_remove_offload - remove packet offload handler + * @po: packet offload declaration + * + * Remove a packet offload handler that was previously added to the kernel + * offload handlers by dev_add_offload(). The passed &offload_type is + * removed from the kernel lists and can be freed or reused once this + * function returns. + * + * This call sleeps to guarantee that no CPU is looking at the packet + * type after return. + */ +void dev_remove_offload(struct packet_offload *po) +{ + __dev_remove_offload(po); + + synchronize_net(); +} +EXPORT_SYMBOL(dev_remove_offload); + /****************************************************************************** Device Boot-time Settings Routines @@ -6661,6 +6739,8 @@ static int __init net_dev_init(void) for (i = 0; i < PTYPE_HASH_SIZE; i++) INIT_LIST_HEAD(&ptype_base[i]); + INIT_LIST_HEAD(&offload_base); + if (register_pernet_subsys(&netdev_net_ops)) goto out; -- cgit v1.1 From 22061d8014455b01eb018bd6c35a1b3040ccc230 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:11 +0000 Subject: net: Switch to using the new packet offload infrustructure Convert to using the new GSO/GRO registration mechanism and new packet offload structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/dev.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 6884f87..cf843a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2072,7 +2072,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; int err; @@ -2101,9 +2101,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, } rcu_read_lock(); - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { err = ptype->gso_send_check(skb); segs = ERR_PTR(err); @@ -3522,9 +3521,9 @@ static void flush_backlog(void *arg) static int napi_gro_complete(struct sk_buff *skb) { - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int err = -ENOENT; if (NAPI_GRO_CB(skb)->count == 1) { @@ -3534,7 +3533,7 @@ static int napi_gro_complete(struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_complete) + if (ptype->type != type || !ptype->gro_complete) continue; err = ptype->gro_complete(skb); @@ -3584,9 +3583,9 @@ EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int same_flow; int mac_len; enum gro_result ret; @@ -3599,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_receive) + if (ptype->type != type || !ptype->gro_receive) continue; skb_set_network_header(skb, skb_gro_offset(skb)); -- cgit v1.1 From f191a1d17f227032c159e5499809f545402b6dc6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:23 +0000 Subject: net: Remove code duplication between offload structures Move the offload callbacks into its own structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index cf843a2..cf105e8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2102,16 +2102,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->gso_segment) { + if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - err = ptype->gso_send_check(skb); + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) break; __skb_push(skb, (skb->data - skb_network_header(skb))); } - segs = ptype->gso_segment(skb, features); + segs = ptype->callbacks.gso_segment(skb, features); break; } } @@ -3533,10 +3533,10 @@ static int napi_gro_complete(struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || !ptype->gro_complete) + if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->gro_complete(skb); + err = ptype->callbacks.gro_complete(skb); break; } rcu_read_unlock(); @@ -3598,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || !ptype->gro_receive) + if (ptype->type != type || !ptype->callbacks.gro_receive) continue; skb_set_network_header(skb, skb_gro_offset(skb)); @@ -3608,7 +3608,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - pp = ptype->gro_receive(&napi->gro_list, skb); + pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; } rcu_read_unlock(); -- cgit v1.1 From c53aa5058ad5ca8876a47d6639ad4d4f2c5ed584 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Nov 2012 08:08:23 +0000 Subject: net: use right lock in __dev_remove_offload offload_base is protected by offload_lock, not ptype_lock Signed-off-by: Eric Dumazet Cc: Vlad Yasevich Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index cf105e8..2705a2a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -513,7 +513,7 @@ void __dev_remove_offload(struct packet_offload *po) struct list_head *head = &offload_base; struct packet_offload *po1; - spin_lock(&ptype_lock); + spin_lock(&offload_lock); list_for_each_entry(po1, head, list) { if (po == po1) { @@ -524,7 +524,7 @@ void __dev_remove_offload(struct packet_offload *po) pr_warn("dev_remove_offload: %p not found\n", po); out: - spin_unlock(&ptype_lock); + spin_unlock(&offload_lock); } EXPORT_SYMBOL(__dev_remove_offload); -- cgit v1.1 From 2407dc25f741cf73853e0521ce9257531c9fc61d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:02:56 +0000 Subject: netns: Deduplicate and fix copy_net_ns when !CONFIG_NET_NS The copy of copy_net_ns used when the network stack is not built is broken as it does not return -EINVAL when attempting to create a new network namespace. We don't even have a previous network namespace. Since we need a copy of copy_net_ns in net/net_namespace.h that is available when the networking stack is not built at all move the correct version of copy_net_ns from net_namespace.c into net_namespace.h Leaving us with just 2 versions of copy_net_ns. One version for when we compile in network namespace suport and another stub for all other occasions. Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/net_namespace.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net/core') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 42f1e1c..2c1c590 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -347,13 +347,6 @@ struct net *get_net_ns_by_fd(int fd) } #else -struct net *copy_net_ns(unsigned long flags, struct net *old_net) -{ - if (flags & CLONE_NEWNET) - return ERR_PTR(-EINVAL); - return old_net; -} - struct net *get_net_ns_by_fd(int fd) { return ERR_PTR(-EINVAL); -- cgit v1.1 From d328b836823cd4a76611a45f52e208f8ce3d75d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:02:57 +0000 Subject: userns: make each net (net_ns) belong to a user_ns The user namespace which creates a new network namespace owns that namespace and all resources created in it. This way we can target capability checks for privileged operations against network resources to the user_ns which created the network namespace in which the resource lives. Privilege to the user namespace which owns the network namespace, or any parent user namespace thereof, provides the same privilege to the network resource. This patch is reworked from a version originally by Serge E. Hallyn Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/net_namespace.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c1c590..6456439 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -145,7 +146,7 @@ static void ops_free_list(const struct pernet_operations *ops, /* * setup_net runs the initializers for the network namespace object. */ -static __net_init int setup_net(struct net *net) +static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with net_mutex held */ const struct pernet_operations *ops, *saved_ops; @@ -155,6 +156,7 @@ static __net_init int setup_net(struct net *net) atomic_set(&net->count, 1); atomic_set(&net->passive, 1); net->dev_base_seq = 1; + net->user_ns = user_ns; #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -232,7 +234,8 @@ void net_drop_ns(void *p) net_free(ns); } -struct net *copy_net_ns(unsigned long flags, struct net *old_net) +struct net *copy_net_ns(unsigned long flags, + struct user_namespace *user_ns, struct net *old_net) { struct net *net; int rv; @@ -243,8 +246,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) net = net_alloc(); if (!net) return ERR_PTR(-ENOMEM); + + get_user_ns(user_ns); + mutex_lock(&net_mutex); - rv = setup_net(net); + rv = setup_net(net, user_ns); if (rv == 0) { rtnl_lock(); list_add_tail_rcu(&net->list, &net_namespace_list); @@ -252,6 +258,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { + put_user_ns(user_ns); net_drop_ns(net); return ERR_PTR(rv); } @@ -308,6 +315,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); + put_user_ns(net->user_ns); net_drop_ns(net); } } @@ -395,7 +403,7 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); mutex_lock(&net_mutex); - if (setup_net(&init_net)) + if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); rtnl_lock(); -- cgit v1.1 From 464dc801c76aa0db88e16e8f5f47c6879858b9b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:02:59 +0000 Subject: net: Don't export sysctls to unprivileged users In preparation for supporting the creation of network namespaces by unprivileged users, modify all of the per net sysctl exports and refuse to allow them to unprivileged users. This makes it safe for unprivileged users in general to access per net sysctls, and allows sysctls to be exported to unprivileged users on an individual basis as they are deemed safe. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++++ net/core/sysctl_net_core.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2257148..f1c0c2e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2987,6 +2987,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; } + /* Don't export sysctls to unprivileged users */ + if (neigh_parms_net(p)->user_ns != &init_user_ns) + t->neigh_vars[0].procname = NULL; + snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", p_name, dev_name_source); t->sysctl_header = diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a7c3684..d1b0804 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -216,6 +216,11 @@ static __net_init int sysctl_core_net_init(struct net *net) goto err_dup; tbl[0].data = &net->core.sysctl_somaxconn; + + /* Don't export any sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) { + tbl[0].procname = NULL; + } } net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); -- cgit v1.1 From dfc47ef8639facd77210e74be831943c2fdd9c74 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:00 +0000 Subject: net: Push capable(CAP_NET_ADMIN) into the rtnl methods - In rtnetlink_rcv_msg convert the capable(CAP_NET_ADMIN) check to ns_capable(net->user-ns, CAP_NET_ADMIN). Allowing unprivileged users to make netlink calls to modify their local network namespace. - In the rtnetlink doit methods add capable(CAP_NET_ADMIN) so that calls that are not safe for unprivileged users are still protected. Later patches will remove the extra capable calls from methods that are safe for unprivilged users. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/fib_rules.c | 6 ++++++ net/core/neighbour.c | 9 +++++++++ net/core/rtnetlink.c | 17 ++++++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 58a4ba2..bf5b5b8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -275,6 +275,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct nlattr *tb[FRA_MAX+1]; int err = -EINVAL, unresolved = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; @@ -424,6 +427,9 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct nlattr *tb[FRA_MAX+1]; int err = -EINVAL; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f1c0c2e..7adcdaf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1620,6 +1620,9 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *dev = NULL; int err = -EINVAL; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + ASSERT_RTNL(); if (nlmsg_len(nlh) < sizeof(*ndm)) goto out; @@ -1684,6 +1687,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *dev = NULL; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + ASSERT_RTNL(); err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) @@ -1962,6 +1968,9 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[NDTA_MAX+1]; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, nl_neightbl_policy); if (err < 0) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a810f6a..a40c10b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1547,6 +1547,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) goto errout; @@ -1590,6 +1593,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int err; LIST_HEAD(list_kill); + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) return err; @@ -1720,6 +1726,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *linkinfo[IFLA_INFO_MAX+1]; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + #ifdef CONFIG_MODULES replay: #endif @@ -2057,6 +2066,9 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 *addr; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) return err; @@ -2123,6 +2135,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int err = -EINVAL; __u8 *addr; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlmsg_len(nlh) < sizeof(*ndm)) return -EINVAL; @@ -2488,7 +2503,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sz_idx = type>>2; kind = type&3; - if (kind != 2 && !capable(CAP_NET_ADMIN)) + if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { -- cgit v1.1 From 00f70de09c418bfb028d03f046e39c1d301db7b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:03 +0000 Subject: net: Allow userns root to force the scm creds If the user calling sendmsg has the appropriate privieleges in their user namespace allow them to set the uid, gid, and pid in the SCM_CREDENTIALS control message to any valid value. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/scm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/scm.c b/net/core/scm.c index ab57084..57fb1ee 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -51,11 +51,11 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || - uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) && + uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || - gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) { + gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { return 0; } return -EPERM; -- cgit v1.1 From 5e1fccc0bfac4946932b36e4535c03957d35113d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:04 +0000 Subject: net: Allow userns root control of the core of the network stack. Allow an unpriviled user who has created a user namespace, and then created a network namespace to effectively use the new network namespace, by reducing capable(CAP_NET_ADMIN) and capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns, CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls. Settings that merely control a single network device are allowed. Either the network device is a logical network device where restrictions make no difference or the network device is hardware NIC that has been explicity moved from the initial network namespace. In general policy and network stack state changes are allowed while resource control is left unchanged. Allow ethtool ioctls. Allow binding to network devices. Allow setting the socket mark. Allow setting the socket priority. Allow setting the network device alias via sysfs. Allow setting the mtu via sysfs. Allow changing the network device flags via sysfs. Allow setting the network device group via sysfs. Allow the following network device ioctls. SIOCGMIIPHY SIOCGMIIREG SIOCSIFNAME SIOCSIFFLAGS SIOCSIFMETRIC SIOCSIFMTU SIOCSIFHWADDR SIOCSIFSLAVE SIOCADDMULTI SIOCDELMULTI SIOCSIFHWBROADCAST SIOCSMIIREG SIOCBONDENSLAVE SIOCBONDRELEASE SIOCBONDSETHWADDR SIOCBONDCHANGEACTIVE SIOCBRADDIF SIOCBRDELIF SIOCSHWTSTAMP Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/dev.c | 17 +++++++++++++---- net/core/ethtool.c | 2 +- net/core/net-sysfs.c | 15 ++++++++++----- net/core/sock.c | 7 ++++--- 4 files changed, 28 insertions(+), 13 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 974199d..0afae8b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5279,7 +5279,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; dev_load(net, ifr.ifr_name); rtnl_lock(); @@ -5300,16 +5300,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) * - require strict serialization. * - do not return a value */ + case SIOCSIFMAP: + case SIOCSIFTXQLEN: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + /* + * These ioctl calls: + * - require local superuser power. + * - require strict serialization. + * - do not return a value + */ case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: - case SIOCSIFMAP: case SIOCSIFHWADDR: case SIOCSIFSLAVE: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFHWBROADCAST: - case SIOCSIFTXQLEN: case SIOCSMIIREG: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: @@ -5318,7 +5327,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCBRADDIF: case SIOCBRDELIF: case SIOCSHWTSTAMP: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* fall through */ case SIOCBONDSLAVEINFOQUERY: diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4d64cc2..a870543 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GEEE: break; default: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bcf02f6..c66b8c2 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -73,11 +73,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len, int (*set)(struct net_device *, unsigned long)) { - struct net_device *net = to_net_dev(dev); + struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); unsigned long new; int ret = -EINVAL; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = kstrtoul(buf, 0, &new); @@ -87,8 +88,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (!rtnl_trylock()) return restart_syscall(); - if (dev_isalive(net)) { - if ((ret = (*set)(net, new)) == 0) + if (dev_isalive(netdev)) { + if ((ret = (*set)(netdev, new)) == 0) ret = len; } rtnl_unlock(); @@ -264,6 +265,9 @@ static ssize_t store_tx_queue_len(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + return netdev_store(dev, attr, buf, len, change_tx_queue_len); } @@ -271,10 +275,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); size_t count = len; ssize_t ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* ignore trailing newline */ diff --git a/net/core/sock.c b/net/core/sock.c index 0628600..d4f7b58 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -515,7 +515,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) /* Sorry... */ ret = -EPERM; - if (!capable(CAP_NET_RAW)) + if (!ns_capable(net->user_ns, CAP_NET_RAW)) goto out; ret = -EINVAL; @@ -696,7 +696,8 @@ set_rcvbuf: break; case SO_PRIORITY: - if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) + if ((val >= 0 && val <= 6) || + ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) sk->sk_priority = val; else ret = -EPERM; @@ -813,7 +814,7 @@ set_rcvbuf: clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) ret = -EPERM; else sk->sk_mark = val; -- cgit v1.1 From b51642f6d77b131dc85d1d71029c3cbb5b07c262 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:11 +0000 Subject: net: Enable a userns root rtnl calls that are safe for unprivilged users - Only allow moving network devices to network namespaces you have CAP_NET_ADMIN privileges over. - Enable creating/deleting/modifying interfaces - Enable adding/deleting addresses - Enable adding/setting/deleting neighbour entries - Enable adding/removing routes - Enable adding/removing fib rules - Enable setting the forwarding state - Enable adding/removing ipv6 address labels - Enable setting bridge parameter Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/fib_rules.c | 6 ------ net/core/neighbour.c | 9 --------- net/core/rtnetlink.c | 13 ++++--------- 3 files changed, 4 insertions(+), 24 deletions(-) (limited to 'net/core') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bf5b5b8..58a4ba2 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -275,9 +275,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct nlattr *tb[FRA_MAX+1]; int err = -EINVAL, unresolved = 0; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; @@ -427,9 +424,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct nlattr *tb[FRA_MAX+1]; int err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7adcdaf..f1c0c2e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1620,9 +1620,6 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *dev = NULL; int err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - ASSERT_RTNL(); if (nlmsg_len(nlh) < sizeof(*ndm)) goto out; @@ -1687,9 +1684,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *dev = NULL; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - ASSERT_RTNL(); err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) @@ -1968,9 +1962,6 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[NDTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, nl_neightbl_policy); if (err < 0) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a40c10b..575a6ee 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1316,6 +1316,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + err = -EPERM; + goto errout; + } err = dev_change_net_namespace(dev, net, ifname); put_net(net); if (err) @@ -1547,9 +1551,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) goto errout; @@ -1593,9 +1594,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int err; LIST_HEAD(list_kill); - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) return err; @@ -1726,9 +1724,6 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *linkinfo[IFLA_INFO_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - #ifdef CONFIG_MODULES replay: #endif -- cgit v1.1 From 1f743b07652f11100bee004e261b9931632beac1 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Mon, 12 Nov 2012 15:51:54 +0000 Subject: net: core: use this_cpu_ptr per-cpu helper flush_tasklet is a struct, not a pointer in percpu var. so use this_cpu_ptr to get the member pointer. Signed-off-by: Shan Wei Reviewed-by: Christoph Lameter Signed-off-by: David S. Miller --- net/core/flow.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/flow.c b/net/core/flow.c index e318c7e..b0901ee 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data) static void flow_cache_flush_per_cpu(void *data) { struct flow_flush_info *info = data; - int cpu; struct tasklet_struct *tasklet; - cpu = smp_processor_id(); - tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; + tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } -- cgit v1.1 From 01f1c6b994bc4e0cf048534e67702e08ae69a890 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Nov 2012 10:59:21 +0000 Subject: net: remove unnecessary wireless includes The wireless and wext includes in net-sysfs.c aren't needed, so remove them. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c66b8c2..1f8a13c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -18,11 +18,9 @@ #include #include #include -#include #include #include #include -#include #include "net-sysfs.h" -- cgit v1.1 From 388dfc2d2d9c43c251921a397d6fe5ef7dc34731 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 20 Nov 2012 00:57:04 +0000 Subject: net: Remove redundant null check before kfree in dev.c kfree on a null pointer is a no-op. Signed-off-by: Sachin Kamat Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 0afae8b..7304ea8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1153,10 +1153,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return -EINVAL; if (!len) { - if (dev->ifalias) { - kfree(dev->ifalias); - dev->ifalias = NULL; - } + kfree(dev->ifalias); + dev->ifalias = NULL; return 0; } -- cgit v1.1 From c91f6df2db4972d3cc983e6988b9abf1ad02f5f9 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 26 Nov 2012 05:21:08 +0000 Subject: sockopt: Change getsockopt() of SO_BINDTODEVICE to return an interface name Instead of having the getsockopt() of SO_BINDTODEVICE return an index, which will then require another call like if_indextoname() to get the actual interface name, have it return the name directly. This also matches the existing man page description on socket(7) which mentions the argument being an interface name. If the value has not been set, zero is returned and optlen will be set to zero to indicate there is no interface name present. Added a seqlock to protect this code path, and dev_ifname(), from someone changing the device name via dev_change_name(). v2: Added seqlock protection while copying device name. v3: Fixed word wrap in patch. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/core/dev.c | 21 +++++++++++++++++-- net/core/sock.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 79 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 7304ea8..2a5f5586 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -203,6 +203,8 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +DEFINE_SEQLOCK(devnet_rename_seq); + static inline void dev_base_seq_inc(struct net *net) { while (++net->dev_base_seq == 0); @@ -1091,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) + write_seqlock(&devnet_rename_seq); + + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { + write_sequnlock(&devnet_rename_seq); return 0; + } memcpy(oldname, dev->name, IFNAMSIZ); err = dev_get_valid_name(net, dev, newname); - if (err < 0) + if (err < 0) { + write_sequnlock(&devnet_rename_seq); return err; + } rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); + write_sequnlock(&devnet_rename_seq); return ret; } + write_sequnlock(&devnet_rename_seq); + write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); @@ -1124,6 +1135,7 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; + write_seqlock(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; } else { @@ -4148,6 +4160,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; + unsigned seq; /* * Fetch the caller's info block. @@ -4156,6 +4169,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; +retry: + seq = read_seqbegin(&devnet_rename_seq); rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { @@ -4165,6 +4180,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) strcpy(ifr.ifr_name, dev->name); rcu_read_unlock(); + if (read_seqretry(&devnet_rename_seq, seq)) + goto retry; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; diff --git a/net/core/sock.c b/net/core/sock.c index d4f7b58..a692ef4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -505,7 +505,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); -static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +static int sock_setbindtodevice(struct sock *sk, char __user *optval, + int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -562,6 +563,59 @@ out: return ret; } +static int sock_getbindtodevice(struct sock *sk, char __user *optval, + int __user *optlen, int len) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + struct net *net = sock_net(sk); + struct net_device *dev; + char devname[IFNAMSIZ]; + unsigned seq; + + if (sk->sk_bound_dev_if == 0) { + len = 0; + goto zero; + } + + ret = -EINVAL; + if (len < IFNAMSIZ) + goto out; + +retry: + seq = read_seqbegin(&devnet_rename_seq); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + ret = -ENODEV; + if (!dev) { + rcu_read_unlock(); + goto out; + } + + strcpy(devname, dev->name); + rcu_read_unlock(); + if (read_seqretry(&devnet_rename_seq, seq)) + goto retry; + + len = strlen(devname) + 1; + + ret = -EFAULT; + if (copy_to_user(optval, devname, len)) + goto out; + +zero: + ret = -EFAULT; + if (put_user(len, optlen)) + goto out; + + ret = 0; + +out: +#endif + + return ret; +} + static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) { if (valbool) @@ -589,7 +643,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ if (optname == SO_BINDTODEVICE) - return sock_bindtodevice(sk, optval, optlen); + return sock_setbindtodevice(sk, optval, optlen); if (optlen < sizeof(int)) return -EINVAL; @@ -1075,15 +1129,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); break; + case SO_BINDTODEVICE: - v.val = sk->sk_bound_dev_if; - break; + return sock_getbindtodevice(sk, optval, optlen, len); + case SO_GET_FILTER: len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); if (len < 0) return len; goto lenout; + default: return -ENOPROTOOPT; } -- cgit v1.1 From bb728820fe7c42fdb838ab2745fb5fe6b18b5ffa Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Wed, 28 Nov 2012 21:55:25 +0000 Subject: core: make GRO methods static. This patch changes three methods to be static and removes their EXPORT_SYMBOLs in core/dev.c and their external declaration in netdevice.h. The methods, dev_gro_receive(), napi_frags_finish() and napi_skb_finish(), which are in the GRO rx path, are not used outside core/dev.c. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 2a5f5586..2f94df2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3592,7 +3592,7 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) } EXPORT_SYMBOL(napi_gro_flush); -enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_offload *ptype; @@ -3683,7 +3683,6 @@ normal: ret = GRO_NORMAL; goto pull; } -EXPORT_SYMBOL(dev_gro_receive); static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) @@ -3710,7 +3709,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return dev_gro_receive(napi, skb); } -gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) +static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { case GRO_NORMAL: @@ -3736,7 +3735,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } -EXPORT_SYMBOL(napi_skb_finish); static void skb_gro_reset_offset(struct sk_buff *skb) { @@ -3788,7 +3786,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, +static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) { switch (ret) { @@ -3813,7 +3811,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, return ret; } -EXPORT_SYMBOL(napi_frags_finish); static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { -- cgit v1.1 From c07135633bee3f01a6454d15b6411f32cfbeb2fd Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Fri, 30 Nov 2012 01:08:47 +0000 Subject: rtnelink: remove unused parameter from rtnl_create_link(). This patch removes an unused parameter (src_net) from rtnl_create_link() method and from the method single invocation, in veth. This parameter was used in the past when calling ops->get_tx_queues(src_net, tb) in rtnl_create_link(). The get_tx_queues() member of rtnl_link_ops was replaced by two methods, get_num_tx_queues() and get_num_rx_queues(), which do not get any parameter. This was done in commit d40156aa5ecbd51fed932ed4813df82b56e5ff4d by Jiri Pirko ("rtnl: allow to specify different num for rx and tx queue count"). Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 575a6ee..1868625 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1642,7 +1642,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } EXPORT_SYMBOL(rtnl_configure_link); -struct net_device *rtnl_create_link(struct net *src_net, struct net *net, +struct net_device *rtnl_create_link(struct net *net, char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; @@ -1840,7 +1840,7 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - dev = rtnl_create_link(net, dest_net, ifname, ops, tb); + dev = rtnl_create_link(dest_net, ifname, ops, tb); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; -- cgit v1.1 From 4e66ae2ea371cf431283e2cb95480eb860432856 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Mon, 3 Dec 2012 16:17:12 +0000 Subject: net: dev_change_net_namespace: send a KOBJ_REMOVED/KOBJ_ADD When a new nic is created in namespace ns1, the kernel sends a KOBJ_ADD uevent to ns1. When the nic is moved to ns2, we only send a KOBJ_MOVE to ns2, and nothing to ns1. This patch changes that behavior so that when moving a nic from ns1 to ns2, we send a KOBJ_REMOVED to ns1 and KOBJ_ADD to ns2. (The KOBJ_MOVE is still sent to ns2). The effects of this can be seen when starting and stopping containers in an upstart based host. Lxc will create a pair of veth nics, the kernel sends KOBJ_ADD, and upstart starts network-instance jobs for each. When one nic is moved to the container, because no KOBJ_REMOVED event is received, the network-instance job for that veth never goes away. This was reported at https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1065589 With this patch the networ-instance jobs properly go away. The other oddness solved here is that if a nic is passed into a running upstart-based container, without this patch no network-instance job is started in the container. But when the container creates a new nic itself (ip link add new type veth) then network-interface jobs are created. With this patch, behavior comes in line with a regular host. v2: also send KOBJ_ADD to new netns. There will then be a _MOVE event from the device_rename() call, but that should be innocuous. Signed-off-by: Serge Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 2f94df2..0aea3fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6418,6 +6418,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_uc_flush(dev); dev_mc_flush(dev); + /* Send a netdev-removed uevent to the old namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); + /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -6429,6 +6432,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev->iflink = dev->ifindex; } + /* Send a netdev-add uevent to the new namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + /* Fixup kobjects */ err = device_rename(&dev->dev, dev->name); WARN_ON(err); -- cgit v1.1 From ce46cc64d47a8afaf13c300b09a7f9c29f4979b6 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 4 Dec 2012 18:49:15 +0000 Subject: net: neighbour: prohibit negative value for unres_qlen_bytes parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unres_qlen_bytes and unres_qlen are int type. But multiple relation(unres_qlen_bytes = unres_qlen * SKB_TRUESIZE(ETH_FRAME_LEN)) will cause type overflow when seting unres_qlen. e.g. $ echo 1027506 > /proc/sys/net/ipv4/neigh/eth1/unres_qlen $ cat /proc/sys/net/ipv4/neigh/eth1/unres_qlen 1182657265 $ cat /proc/sys/net/ipv4/neigh/eth1/unres_qlen_bytes -2147479756 The gutted value is not that we setting。 But user/administrator don't know this is caused by int type overflow. what's more, it is meaningless and even dangerous that unres_qlen_bytes is set with negative number. Because, for unresolved neighbour address, kernel will cache packets without limit in __neigh_event_send()(e.g. (u32)-1 = 2GB). Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/core/neighbour.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f1c0c2e..36fc692 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -62,6 +62,9 @@ static void __neigh_notify(struct neighbour *n, int type, int flags); static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); +static int zero; +static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); + static struct neigh_table *neigh_tables; #ifdef CONFIG_PROC_FS static const struct file_operations neigh_stat_seq_fops; @@ -1787,8 +1790,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || /* approximative value for deprecated QUEUE_LEN (in packets) */ nla_put_u32(skb, NDTPA_QUEUE_LEN, - DIV_ROUND_UP(parms->queue_len_bytes, - SKB_TRUESIZE(ETH_FRAME_LEN))) || + parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || @@ -2777,9 +2779,13 @@ static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, int size, ret; ctl_table tmp = *ctl; + tmp.extra1 = &zero; + tmp.extra2 = &unres_qlen_max; tmp.data = &size; - size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN)); - ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); + + size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); return ret; @@ -2865,7 +2871,8 @@ static struct neigh_sysctl_table { .procname = "unres_qlen_bytes", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_PROXY_QLEN] = { .procname = "proxy_qlen", -- cgit v1.1 From b93196dc5af7729ff7cc50d3d322ab1a364aa14f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Dec 2012 10:04:04 +0800 Subject: net: fix some compiler warning in net/core/neighbour.c net/core/neighbour.c:65:12: warning: 'zero' defined but not used [-Wunused-variable] net/core/neighbour.c:66:12: warning: 'unres_qlen_max' defined but not used [-Wunused-variable] These variables are only used when CONFIG_SYSCTL is defined, so move them under #ifdef CONFIG_SYSCTL. Reported-by: Fengguang Wu Signed-off-by: Cong Wang Acked-by: Shan Wei Signed-off-by: David S. Miller --- net/core/neighbour.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 36fc692..c815f28 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -62,9 +62,6 @@ static void __neigh_notify(struct neighbour *n, int type, int flags); static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); -static int zero; -static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); - static struct neigh_table *neigh_tables; #ifdef CONFIG_PROC_FS static const struct file_operations neigh_stat_seq_fops; @@ -2772,6 +2769,8 @@ EXPORT_SYMBOL(neigh_app_ns); #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL +static int zero; +static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) -- cgit v1.1 From e3d8fabee3b66ce158b2603f270479b84b6e4ba7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 3 Dec 2012 01:16:32 +0000 Subject: net: call notifiers for mtu change even if iface is not up Do the same thing as in set mac. Call notifiers every time. Signed-off-by: Jiri Pirko Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 0aea3fe..307142a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4971,7 +4971,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; - if (!err && dev->flags & IFF_UP) + if (!err) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } -- cgit v1.1 From 6a674e9c75b17e7a88ff15b3c2e269eed54f7cfb Mon Sep 17 00:00:00 2001 From: Joseph Gasparakis Date: Fri, 7 Dec 2012 14:14:14 +0000 Subject: net: Add support for hardware-offloaded encapsulation This patch adds support in the kernel for offloading in the NIC Tx and Rx checksumming for encapsulated packets (such as VXLAN and IP GRE). For Tx encapsulation offload, the driver will need to set the right bits in netdev->hw_enc_features. The protocol driver will have to set the skb->encapsulation bit and populate the inner headers, so the NIC driver will use those inner headers to calculate the csum in hardware. For Rx encapsulation offload, the driver will need to set again the skb->encapsulation flag and the skb->ip_csum to CHECKSUM_UNNECESSARY. In that case the protocol driver should push the decapsulated packet up to the stack, again with CHECKSUM_UNNECESSARY. In ether case, the protocol driver should set the skb->encapsulation flag back to zero. Finally the protocol driver should have NETIF_F_RXCSUM flag set in its features. Signed-off-by: Joseph Gasparakis Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 880722e2..ccbabf5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -682,11 +682,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; + new->inner_transport_header = old->inner_transport_header; + new->inner_network_header = old->inner_transport_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; new->l4_rxhash = old->l4_rxhash; new->no_fcs = old->no_fcs; + new->encapsulation = old->encapsulation; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -892,6 +895,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header += offset; if (skb_mac_header_was_set(new)) new->mac_header += offset; + new->inner_transport_header += offset; + new->inner_network_header += offset; #endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -1089,6 +1094,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; + skb->inner_transport_header += off; + skb->inner_network_header += off; /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1188,6 +1195,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, n->network_header += off; if (skb_mac_header_was_set(skb)) n->mac_header += off; + n->inner_transport_header += off; + n->inner_network_header += off; #endif return n; -- cgit v1.1 From fc70fb640b159f1d6bf5ad2321cd55e874c8d1b8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 7 Dec 2012 14:14:15 +0000 Subject: net: Handle encapsulated offloads before fragmentation or handing to lower dev This change allows the VXLAN to enable Tx checksum offloading even on devices that do not support encapsulated checksum offloads. The advantage to this is that it allows for the lower device to change due to routing table changes without impacting features on the VXLAN itself. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 307142a..a4c4a1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2324,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; + if (netif_needs_gso(skb, features)) { if (unlikely(dev_gso_segment(skb, features))) goto out_kfree_skb; @@ -2339,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * checksumming here. */ if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, - skb_checksum_start_offset(skb)); + if (skb->encapsulation) + skb_set_inner_transport_header(skb, + skb_checksum_start_offset(skb)); + else + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); if (!(features & NETIF_F_ALL_CSUM) && skb_checksum_help(skb)) goto out_kfree_skb; -- cgit v1.1 From 4b5511ebc7e1cf94e4f13be19c2cf3e90edc3395 Mon Sep 17 00:00:00 2001 From: Abhijit Pawar Date: Sun, 9 Dec 2012 23:12:28 +0000 Subject: net: remove obsolete simple_strto This patch replace the obsolete simple_strto with kstrto Signed-off-by: Abhijit Pawar Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 77a0388..12c129f72 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -674,7 +674,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim = 0; - np->local_port = simple_strtol(cur, NULL, 10); + if (kstrtou16(cur, 10, &np->local_port)) + goto parse_failed; cur = delim; } cur++; @@ -706,6 +707,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if (*cur == ' ' || *cur == '\t') np_info(np, "warning: whitespace is not allowed\n"); np->remote_port = simple_strtol(cur, NULL, 10); + if (kstrtou16(cur, 10, &np->remote_port)) + goto parse_failed; cur = delim; } cur++; -- cgit v1.1 From 89c5fa3369a47db0df904c45c1c26e64c0404430 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Dec 2012 13:28:16 +0000 Subject: net: gro: dev_gro_receive() cleanup __napi_gro_receive() is inlined from two call sites for no good reason. Lets move the prep stuff in a function of its own, called only if/when needed. This saves 300 bytes on x86 : # size net/core/dev.o.after net/core/dev.o.before text data bss dec hex filename 51968 1238 1040 54246 d3e6 net/core/dev.o.before 51664 1238 1040 53942 d2b6 net/core/dev.o.after Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index a4c4a1b..4783850 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3603,6 +3603,28 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) } EXPORT_SYMBOL(napi_gro_flush); +static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff *p; + unsigned int maclen = skb->dev->hard_header_len; + + for (p = napi->gro_list; p; p = p->next) { + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= p->vlan_tci ^ skb->vlan_tci; + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_gro_mac_header(skb), + maclen); + NAPI_GRO_CB(p)->same_flow = !diffs; + NAPI_GRO_CB(p)->flush = 0; + } +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -3619,6 +3641,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; + gro_list_prepare(napi, skb); + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { if (ptype->type != type || !ptype->callbacks.gro_receive) @@ -3695,30 +3719,6 @@ normal: goto pull; } -static inline gro_result_t -__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ - struct sk_buff *p; - unsigned int maclen = skb->dev->hard_header_len; - - for (p = napi->gro_list; p; p = p->next) { - unsigned long diffs; - - diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; - diffs |= p->vlan_tci ^ skb->vlan_tci; - if (maclen == ETH_HLEN) - diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); - else if (!diffs) - diffs = memcmp(skb_mac_header(p), - skb_gro_mac_header(skb), - maclen); - NAPI_GRO_CB(p)->same_flow = !diffs; - NAPI_GRO_CB(p)->flush = 0; - } - - return dev_gro_receive(napi, skb); -} static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { @@ -3768,7 +3768,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); - return napi_skb_finish(__napi_gro_receive(napi, skb), skb); + return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -3866,7 +3866,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) if (!skb) return GRO_DROP; - return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); + return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); } EXPORT_SYMBOL(napi_gro_frags); -- cgit v1.1 From a71258d79e3d05632e90c9f7db5ccf929d276529 Mon Sep 17 00:00:00 2001 From: Abhijit Pawar Date: Mon, 10 Dec 2012 19:30:52 +0000 Subject: net: remove obsolete simple_strto This patch removes the redundant occurences of simple_strto Signed-off-by: Abhijit Pawar Signed-off-by: David S. Miller --- net/core/netpoll.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 12c129f72..3151acf 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -706,7 +706,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) *delim = 0; if (*cur == ' ' || *cur == '\t') np_info(np, "warning: whitespace is not allowed\n"); - np->remote_port = simple_strtol(cur, NULL, 10); if (kstrtou16(cur, 10, &np->remote_port)) goto parse_failed; cur = delim; -- cgit v1.1 From 75be437230b06fca87908a787f70de0ce7fbab8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Dec 2012 08:38:29 +0000 Subject: net: gro: avoid double copy in skb_gro_receive() __copy_skb_header(nskb, p) already copied p->cb[], no need to copy it again. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ccbabf5..ac9e44a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3028,7 +3028,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) memcpy(skb_mac_header(nskb), skb_mac_header(p), p->data - skb_mac_header(p)); - *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; skb_shinfo(nskb)->gso_size = pinfo->gso_size; pinfo->gso_size = 0; -- cgit v1.1