diff options
author | Mark Brown <broonie@kernel.org> | 2015-08-21 15:26:37 -0700 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2015-08-21 15:26:37 -0700 |
commit | d033de5ceee8333e4fee3d59a956244d3736102a (patch) | |
tree | 5bdebcac18ed67890571632570c324e0c41a7223 /net | |
parent | 1a8e7fab70c8d7cad2e606e7b21d46e42e51c2fd (diff) | |
parent | f7644cbfcdf03528f0f450f3940c4985b2291f49 (diff) | |
download | op-kernel-dev-d033de5ceee8333e4fee3d59a956244d3736102a.zip op-kernel-dev-d033de5ceee8333e4fee3d59a956244d3736102a.tar.gz |
Merge tag 'v4.2-rc6' into asoc-topology
Linux 4.2-rc6
Diffstat (limited to 'net')
96 files changed, 988 insertions, 581 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 9dd49ca..6e70ddb 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -704,6 +704,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) mutex_unlock(&virtio_9p_lock); + vdev->config->reset(vdev); vdev->config->del_vqs(vdev); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 1997538..3b78e84 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,6 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 3d0f7d2..ad82324 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2312,6 +2312,10 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) return 1; chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e97572b..fa7bfce 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,14 +37,30 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) { - if (!is_skb_forwardable(skb->dev, skb)) { - kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); - br_drop_fake_rtable(skb); - dev_queue_xmit(skb); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + + skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD))) { + int depth; + + if (!__vlan_get_protocol(skb, skb->protocol, &depth)) + goto drop; + + skb_set_network_header(skb, depth); } + dev_queue_xmit(skb); + + return 0; + +drop: + kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e29ad70..c943219 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_htable *mdb; + unsigned long now = jiffies; int err; mdb = mlock_dereference(br->mdb, br); @@ -347,8 +348,9 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); + if (state == MDB_TEMPORARY) + mod_timer(&p->timer, now + br->multicast_membership_interval); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; } @@ -371,6 +373,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -417,20 +420,14 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) { - if (timer_pending(&br->ip4_other_query.timer)) - return -EBUSY; - + if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - } else { - if (timer_pending(&br->ip6_other_query.timer)) - return -EBUSY; - + else ip.u.ip6 = entry->addr.u.ip6; #endif - } spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); @@ -448,6 +445,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (p->port->state == BR_STATE_DISABLED) goto unlock; + entry->state = p->state; rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 742a6c2..0b39dcc 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -39,6 +39,16 @@ static void br_multicast_start_querier(struct net_bridge *br, struct bridge_mcast_own_query *query); static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *port); +static void br_ip4_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group, + __u16 vid); +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group, + __u16 vid); +#endif unsigned int br_mdb_rehash_seq; static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) @@ -1010,9 +1020,15 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, continue; } - err = br_ip4_multicast_add_group(br, port, group, vid); - if (err) - break; + if ((type == IGMPV3_CHANGE_TO_INCLUDE || + type == IGMPV3_MODE_IS_INCLUDE) && + ntohs(grec->grec_nsrcs) == 0) { + br_ip4_multicast_leave_group(br, port, group, vid); + } else { + err = br_ip4_multicast_add_group(br, port, group, vid); + if (err) + break; + } } return err; @@ -1071,10 +1087,17 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, continue; } - err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, - vid); - if (err) - break; + if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || + grec->grec_type == MLD2_MODE_IS_INCLUDE) && + ntohs(*nsrcs) == 0) { + br_ip6_multicast_leave_group(br, port, &grec->grec_mca, + vid); + } else { + err = br_ip6_multicast_add_group(br, port, + &grec->grec_mca, vid); + if (!err) + break; + } } return err; @@ -1393,8 +1416,7 @@ br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED) || - timer_pending(&other_query->timer)) + (port && port->state == BR_STATE_DISABLED)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1402,6 +1424,31 @@ br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + struct net_bridge_port_group __rcu **pp; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + } + goto out; + } + + if (timer_pending(&other_query->timer)) + goto out; + if (br->multicast_querier) { __br_multicast_send_query(br, port, &mp->addr); @@ -1427,28 +1474,6 @@ br_multicast_leave_group(struct net_bridge *br, } } - if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { - struct net_bridge_port_group __rcu **pp; - - for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { - if (p->port != port) - continue; - - rcu_assign_pointer(*pp, p->next); - hlist_del_init(&p->mglist); - del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, port, group, RTM_DELMDB); - - if (!mp->ports && !mp->mglist && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - } - goto out; - } - now = jiffies; time = now + br->multicast_last_member_count * br->multicast_last_member_interval; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d89f4fa..c8b9bcf 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -111,7 +111,7 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct brnf_frag_data { char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; @@ -694,6 +694,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) } #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) { @@ -712,6 +713,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +#endif static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { @@ -742,7 +744,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv4(skb)) - return NF_DROP; + goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -767,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv6(skb)) - return NF_DROP; + goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -782,12 +784,16 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (v6ops) return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); - else - return -EMSGSIZE; + + kfree_skb(skb); + return -EMSGSIZE; } #endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); + drop: + kfree_skb(skb); + return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6d12d26..13b7d1e 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -104,7 +104,7 @@ int br_validate_ipv6(struct sk_buff *skb) { const struct ipv6hdr *hdr; struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev = __in6_dev_get(skb->dev); u32 pkt_len; u8 ip6h_len = sizeof(struct ipv6hdr); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6b67ed3..3da5525 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -457,6 +457,8 @@ static int br_afspec(struct net_bridge *br, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) + return -EINVAL; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (vinfo_start) return -EINVAL; @@ -691,9 +693,17 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *tb[], struct nlattr *data[]) { + struct net_bridge *br = netdev_priv(brdev); + int ret; + if (!data) return 0; - return br_setport(br_port_get_rtnl(dev), data); + + spin_lock_bh(&br->lock); + ret = br_setport(br_port_get_rtnl(dev), data); + spin_unlock_bh(&br->lock); + + return ret; } static int br_port_fill_slave_info(struct sk_buff *skb, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b4b6dab..ed74ffa 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -209,8 +209,9 @@ void br_transmit_config(struct net_bridge_port *p) br_send_config_bpdu(p, &bpdu); p->topology_change_ack = 0; p->config_pending = 0; - mod_timer(&p->hold_timer, - round_jiffies(jiffies + BR_HOLD_TIME)); + if (p->br->stp_enabled == BR_KERNEL_STP) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); } } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a2730e7..4ca449a 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -48,7 +48,8 @@ void br_stp_enable_bridge(struct net_bridge *br) struct net_bridge_port *p; spin_lock_bh(&br->lock); - mod_timer(&br->hello_timer, jiffies + br->hello_time); + if (br->stp_enabled == BR_KERNEL_STP) + mod_timer(&br->hello_timer, jiffies + br->hello_time); mod_timer(&br->gc_timer, jiffies + HZ/10); br_config_bpdu_generation(br); @@ -127,6 +128,7 @@ static void br_stp_start(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); @@ -140,6 +142,10 @@ static void br_stp_start(struct net_bridge *br) if (r == 0) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); + /* Stop hello and hold timers */ + del_timer(&br->hello_timer); + list_for_each_entry(p, &br->port_list, list) + del_timer(&p->hold_timer); } else { br->stp_enabled = BR_KERNEL_STP; br_debug(br, "using kernel STP\n"); @@ -156,12 +162,17 @@ static void br_stp_stop(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; if (br->stp_enabled == BR_USER_STP) { r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); br_info(br, "userspace STP stopped, return code %d\n", r); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); + list_for_each_entry(p, &br->port_list, list) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); spin_lock_bh(&br->lock); br_port_state_selection(br); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7caf7fa..5f0f5af 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,9 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); + if (br->stp_enabled != BR_USER_STP) + mod_timer(&br->hello_timer, + round_jiffies(jiffies + br->hello_time)); } spin_unlock(&br->lock); } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 3cc71b9..cc85891 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -121,12 +121,13 @@ static void caif_flow_ctrl(struct sock *sk, int mode) * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are * not dropped, but CAIF is sending flow off instead. */ -static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static void caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); + bool queued = false; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned int)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { @@ -139,7 +140,8 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) err = sk_filter(sk, skb); if (err) - return err; + goto out; + if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { set_rx_flow_off(cf_sk); net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); @@ -147,21 +149,16 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } skb->dev = NULL; skb_set_owner_r(skb, sk); - /* Cache the SKB length before we tack it onto the receive - * queue. Once it is added it no longer belongs to us and - * may be freed by other threads of control pulling packets - * from the queue. - */ spin_lock_irqsave(&list->lock, flags); - if (!sock_flag(sk, SOCK_DEAD)) + queued = !sock_flag(sk, SOCK_DEAD); + if (queued) __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); - - if (!sock_flag(sk, SOCK_DEAD)) +out: + if (queued) sk->sk_data_ready(sk); else kfree_skb(skb); - return 0; } /* Packet Receive Callback function called from CAIF Stack */ diff --git a/net/can/af_can.c b/net/can/af_can.c index 7933e62..166d436 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) { - if (!(newskb->tstamp.tv64)) - __net_timestamp(newskb); - + if (newskb) netif_rx_ni(newskb); - } /* update statistics */ can_stats.tx_frames++; @@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453..a1ba687 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748..2e67b14 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cb7db32..f30329f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -9,6 +9,7 @@ #include <keys/ceph-type.h> #include <linux/module.h> #include <linux/mount.h> +#include <linux/nsproxy.h> #include <linux/parser.h> #include <linux/sched.h> #include <linux/seq_file.h> @@ -16,8 +17,6 @@ #include <linux/statfs.h> #include <linux/string.h> #include <linux/vmalloc.h> -#include <linux/nsproxy.h> -#include <net/net_namespace.h> #include <linux/ceph/ceph_features.h> @@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt, int i; int ret; + /* + * Don't bother comparing options if network namespaces don't + * match. + */ + if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net))) + return -1; + ret = memcmp(opt1, opt2, ofs); if (ret) return ret; @@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; - if (current->nsproxy->net_ns != &init_net) - return ERR_PTR(-EINVAL); - opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -608,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); fail: + ceph_messenger_fini(&client->msgr); kfree(client); return ERR_PTR(err); } @@ -621,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_osdc_stop(&client->osdc); - ceph_monc_stop(&client->monc); + ceph_messenger_fini(&client->msgr); ceph_debugfs_client_cleanup(client); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1679f47..e3be1d22 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -6,6 +6,7 @@ #include <linux/inet.h> #include <linux/kthread.h> #include <linux/net.h> +#include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> @@ -479,7 +480,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; @@ -1731,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con) static bool addr_is_blank(struct sockaddr_storage *ss) { + struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + switch (ss->ss_family) { case AF_INET: - return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; + return addr->s_addr == htonl(INADDR_ANY); case AF_INET6: - return - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; + return ipv6_addr_any(addr6); + default: + return true; } - return false; } static int addr_port(struct sockaddr_storage *ss) @@ -2944,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr, msgr->tcp_nodelay = tcp_nodelay; atomic_set(&msgr->stopping, 0); + write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); dout("%s %p\n", __func__, msgr); } EXPORT_SYMBOL(ceph_messenger_init); +void ceph_messenger_fini(struct ceph_messenger *msgr) +{ + put_net(read_pnet(&msgr->net)); +} +EXPORT_SYMBOL(ceph_messenger_fini); + static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ diff --git a/net/core/datagram.c b/net/core/datagram.c index b80fb91..4967262 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -131,6 +131,35 @@ out_noerr: goto out; } +static int skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return 0; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return 0; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -165,7 +194,9 @@ out_noerr: struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb, *last; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - unsigned long cpu_flags; - struct sk_buff_head *queue = &sk->sk_receive_queue; int _off = *off; last = (struct sk_buff *)queue; @@ -199,7 +228,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, _off -= skb->len; continue; } - skb->peeked = 1; + + error = skb_set_peeked(skb); + if (error) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, queue); @@ -223,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&queue->lock, cpu_flags); no_packet: *err = error; return NULL; @@ -622,7 +657,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); } - skb->csum_valid = !sum; + if (!skb_shared(skb)) + skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); @@ -642,11 +678,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) netdev_rx_csum_fault(skb->dev); } - /* Save full packet checksum */ - skb->csum = csum; - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - skb->csum_valid = !sum; + if (!skb_shared(skb)) { + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + } return sum; } diff --git a/net/core/dev.c b/net/core/dev.c index 6778a99..a8e4dd4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -677,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - /* If dev->rtnl_link_ops is set, it's a virtual interface. */ - if (dev->rtnl_link_ops) - return 0; - return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); @@ -3452,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (qlen) { @@ -3473,6 +3471,7 @@ enqueue: goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -3775,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3786,7 +3783,7 @@ another_round: skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3816,10 +3813,10 @@ skip_taps: if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) - goto unlock; + goto out; } #endif #ifdef CONFIG_NET_CLS_ACT @@ -3837,7 +3834,7 @@ ncls: if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3849,7 +3846,7 @@ ncls: switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3903,8 +3900,7 @@ drop: ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); +out: return ret; } @@ -3935,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4502,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota) struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { @@ -6139,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -6409,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { @@ -6773,8 +6774,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ diff --git a/net/core/dst.c b/net/core/dst.c index e956ce6..002144be 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -284,7 +284,9 @@ void dst_release(struct dst_entry *dst) int newrefcnt; newrefcnt = atomic_dec_return(&dst->__refcnt); - WARN_ON(newrefcnt < 0); + if (unlikely(newrefcnt < 0)) + net_warn_ratelimited("%s: dst:%p refcnt:%d\n", + __func__, dst, newrefcnt); if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) call_rcu(&dst->rcu_head, dst_destroy_rcu); } diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9dfb88a..92d886f 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,7 +66,7 @@ NOTES. - * avbps is scaled by 2^5, avpps is scaled by 2^10. + * avbps and avpps are scaled by 2^5. * both values are reported as 32 bit unsigned values. bps can overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor @@ -85,10 +85,10 @@ struct gen_estimator struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; + u32 last_packets; + unsigned long avpps; u64 last_bytes; u64 avbps; - u32 last_packets; - u32 avpps; struct rcu_head e_rcu; struct rb_node node; struct gnet_stats_basic_cpu __percpu *cpu_bstats; @@ -118,8 +118,8 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { struct gnet_stats_basic_packed b = {0}; + unsigned long rate; u64 brate; - u32 rate; spin_lock(e->stats_lock); read_lock(&est_lock); @@ -133,10 +133,11 @@ static void est_timer(unsigned long arg) e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); e->rate_est->bps = (e->avbps+0xF)>>5; - rate = (b.packets - e->last_packets)<<(12 - idx); + rate = b.packets - e->last_packets; + rate <<= (7 - idx); e->last_packets = b.packets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); - e->rate_est->pps = (e->avpps+0x1FF)>>10; + e->rate_est->pps = (e->avpps + 0xF) >> 5; skip: read_unlock(&est_lock); spin_unlock(e->stats_lock); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 1f2a126..6441f47 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -23,7 +23,8 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state struct cgroup_cls_state *task_cls_state(struct task_struct *p) { - return css_cls_state(task_css(p, net_cls_cgrp_id)); + return css_cls_state(task_css_check(p, net_cls_cgrp_id, + rcu_read_lock_bh_held())); } EXPORT_SYMBOL_GPL(task_cls_state); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 05badbb..1ebdf1c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3571,13 +3571,6 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); - /* Wait for kthread_stop */ - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } - __set_current_state(TASK_RUNNING); - return 0; } @@ -3769,6 +3762,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) } t->net = pn; + get_task_struct(p); wake_up_process(p); wait_for_completion(&t->start_done); @@ -3891,6 +3885,7 @@ static void __net_exit pg_net_exit(struct net *net) t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); kthread_stop(t->tsk); + put_task_struct(t->tsk); kfree(t); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 01ced4a..dc004b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1328,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1488,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - struct ifla_vf_info ivf; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_get_vf_config) - err = ops->ndo_get_vf_config(dev, ivt->vf, - &ivf); - if (err) - break; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); - break; - } - case IFLA_VF_RATE: { - struct ifla_vf_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - case IFLA_VF_RSS_QUERY_EN: { - struct ifla_vf_rss_query_en *ivrssq_en; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); - ivrssq_en = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rss_query_en) - err = ops->ndo_set_vf_rss_query_en(dev, - ivrssq_en->vf, - ivrssq_en->setting); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + struct ifla_vf_info ivf; + + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); + if (err < 0) + return err; + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RATE]) { + struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RSS_QUERY_EN]) { + struct ifla_vf_rss_query_en *ivrssq_en; + + err = -EOPNOTSUPP; + ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, + ivrssq_en->setting); + if (err < 0) + return err; + } + return err; } @@ -1773,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; @@ -1799,10 +1804,13 @@ static int do_setlink(const struct sk_buff *skb, goto errout; nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) { - if (nla_type(attr) != IFLA_VF_PORT) - continue; - err = nla_parse_nested(port, IFLA_PORT_MAX, - attr, ifla_port_policy); + if (nla_type(attr) != IFLA_VF_PORT || + nla_len(attr) < NLA_HDRLEN) { + err = -EINVAL; + goto errout; + } + err = nla_parse_nested(port, IFLA_PORT_MAX, attr, + ifla_port_policy); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { diff --git a/net/core/sock.c b/net/core/sock.c index 08f16db..193901d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1497,7 +1497,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ - get_net(sock_net(newsk)); + if (likely(newsk->sk_net_refcnt)) + get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -1967,20 +1968,21 @@ static void __release_sock(struct sock *sk) * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on * @timeo: for how long + * @skb: last skb seen on sk_receive_queue * * Now socket state including sk->sk_err is changed only under lock, * hence we may omit checks after joining wait queue. * We check receive queue before schedule() only as optimization; * it is very likely that release_sock() added new data. */ -int sk_wait_data(struct sock *sk, long *timeo) +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { int rc; DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); finish_wait(sk_sleep(sk), &wait); return rc; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 52a9401..b5cf13a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -886,7 +886,7 @@ verify_sock_status: break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 392e29a..b445d49 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev) continue; cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr > PHY_MAX_ADDR) + if (cd->sw_addr >= PHY_MAX_ADDR) continue; if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) @@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev) continue; port_index = be32_to_cpup(port_reg); + if (port_index >= DSA_MAX_PORTS) + break; port_name = of_get_property(port, "label", NULL); if (!port_name) @@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev) goto out_free_chip; } - if (port_index == DSA_MAX_PORTS) - break; } } diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f46e4d1..214d44a 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -207,7 +207,7 @@ found: } else { fq->q.meat += skb->len; } - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { @@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, clone->data_len = clone->len; head->data_len -= clone->len; head->len -= clone->len; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } WARN_ON(head == NULL); @@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 933a928..6c8b1fb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1017,14 +1017,16 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); - r->arp_flags = arp_state_to_flags(neigh); - read_unlock_bh(&neigh->lock); - r->arp_ha.sa_family = dev->type; - strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + if (!(neigh->nud_state & NUD_NOARP)) { + read_lock_bh(&neigh->lock); + memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + r->arp_flags = arp_state_to_flags(neigh); + read_unlock_bh(&neigh->lock); + r->arp_ha.sa_family = dev->type; + strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + err = 0; + } neigh_release(neigh); - err = 0; } return err; } diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 90c0e83..574fad9 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -20,7 +20,7 @@ #include <net/route.h> #include <net/tcp_states.h> -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); - lock_sock(sk); - oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { @@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_set(sk, &rt->dst); err = 0; out: - release_sock(sk); return err; } +EXPORT_SYMBOL(__ip4_datagram_connect); + +int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL(ip4_datagram_connect); /* Because UDP xmit path can manipulate sk_dst_cache without holding diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7498716..2d9cb17 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -882,7 +882,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); - blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } @@ -1740,6 +1739,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) + size += nla_total_size(4); return size; } @@ -1780,6 +1781,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && + nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -1819,6 +1824,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, + [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -2048,6 +2054,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, ifindex, cnf); } + if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && + new_value != old_value) { + ifindex = devinet_conf_ifindex(net, cnf); + inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + ifindex, cnf); + } } return ret; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c6211ed..9c02920 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -13,6 +13,7 @@ struct fib_alias { u8 fa_state; u8 fa_slen; u32 tb_id; + s16 fa_default; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7358ea..3a06586 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1202,23 +1202,40 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) } /* Must be invoked inside of an RCU protected region. */ -void fib_select_default(struct fib_result *res) +void fib_select_default(const struct flowi4 *flp, struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; + u8 slen = 32 - res->prefixlen; int order = -1, last_idx = -1; - struct fib_alias *fa; + struct fib_alias *fa, *fa1 = NULL; + u32 last_prio = res->fi->fib_priority; + u8 last_tos = 0; hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; + if (fa->fa_slen != slen) + continue; + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fa->tb_id != tb->tb_id) + continue; + if (next_fi->fib_priority > last_prio && + fa->fa_tos == last_tos) { + if (last_tos) + continue; + break; + } + if (next_fi->fib_flags & RTNH_F_DEAD) + continue; + last_tos = fa->fa_tos; + last_prio = next_fi->fib_priority; + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; @@ -1228,10 +1245,11 @@ void fib_select_default(struct fib_result *res) if (!fi) { if (next_fi != res->fi) break; + fa1 = fa; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { + &last_idx, fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } fi = next_fi; @@ -1239,20 +1257,21 @@ void fib_select_default(struct fib_result *res) } if (order <= 0 || !fi) { - tb->tb_default = -1; + if (fa1) + fa1->fa_default = -1; goto out; } if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { + fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } if (last_idx >= 0) fib_result_assign(res, last_resort); - tb->tb_default = last_idx; + fa1->fa_default = last_idx; out: return; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 15d3261..37c4bb8 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1171,6 +1171,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; err = switchdev_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, @@ -1222,6 +1223,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = 0; new_fa->fa_slen = slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; /* (Optionally) offload fib entry to switch hardware. */ err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, @@ -1791,8 +1793,6 @@ void fib_table_flush_external(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } } @@ -1862,8 +1862,6 @@ int fib_table_flush(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } @@ -1990,7 +1988,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) return NULL; tb->tb_id = id; - tb->tb_default = -1; tb->tb_num_default = 0; tb->tb_data = (alias ? alias->__data : tb->__data); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9bc2667..c3b1f3a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -152,8 +152,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, inet6_sk(sk)->tclass) < 0) goto errout; - if (ipv6_only_sock(sk) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1)) + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) goto errout; } #endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5e346a0..d0a7c03 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) unsigned int evicted = 0; HLIST_HEAD(expired); -evict_again: spin_lock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &hb->chain, list) { if (!inet_fragq_should_evict(fq)) continue; - if (!del_timer(&fq->timer)) { - /* q expiring right now thus increment its refcount so - * it won't be freed under us and wait until the timer - * has finished executing then destroy it - */ - atomic_inc(&fq->refcnt); - spin_unlock(&hb->chain_lock); - del_timer_sync(&fq->timer); - inet_frag_put(fq, f); - goto evict_again; - } + if (!del_timer(&fq->timer)) + continue; - fq->flags |= INET_FRAG_EVICTED; - hlist_del(&fq->list); - hlist_add_head(&fq->list, &expired); + hlist_add_head(&fq->list_evictor, &expired); ++evicted; } spin_unlock(&hb->chain_lock); - hlist_for_each_entry_safe(fq, n, &expired, list) + hlist_for_each_entry_safe(fq, n, &expired, list_evictor) f->frag_expire((unsigned long) fq); return evicted; @@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) int i; nf->low_thresh = 0; - local_bh_disable(); evict_again: + local_bh_disable(); seq = read_seqbegin(&f->rnd_seqlock); for (i = 0; i < INETFRAGS_HASHSZ ; i++) inet_evict_bucket(f, &f->hash[i]); - if (read_seqretry(&f->rnd_seqlock, seq)) - goto evict_again; - local_bh_enable(); + cond_resched(); + + if (read_seqretry(&f->rnd_seqlock, seq) || + percpu_counter_sum(&nf->mem)) + goto evict_again; percpu_counter_destroy(&nf->mem); } @@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - if (!(fq->flags & INET_FRAG_EVICTED)) - hlist_del(&fq->list); + hlist_del(&fq->list); + fq->flags |= INET_FRAG_COMPLETE; spin_unlock(&hb->chain_lock); } @@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); atomic_dec(&fq->refcnt); - fq->flags |= INET_FRAG_COMPLETE; } } EXPORT_SYMBOL(inet_frag_kill); @@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) fp = xp; } sum = sum_truesize + f->qsize; - sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); kmem_cache_free(f->frags_cachep, q); + + sub_frag_mem_limit(nf, sum); } EXPORT_SYMBOL(inet_frag_destroy); @@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - add_frag_mem_limit(q, f->qsize); + add_frag_mem_limit(nf, f->qsize); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 5f9b063..0cb9165 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -624,22 +624,21 @@ EXPORT_SYMBOL_GPL(inet_hashinfo_init); int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { + unsigned int locksz = sizeof(spinlock_t); unsigned int i, nblocks = 1; - if (sizeof(spinlock_t) != 0) { + if (locksz != 0) { /* allocate 2 cache lines or at least one spinlock per cpu */ - nblocks = max_t(unsigned int, - 2 * L1_CACHE_BYTES / sizeof(spinlock_t), - 1); + nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); /* no more locks than number of hash buckets */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); - hashinfo->ehash_locks = kmalloc_array(nblocks, sizeof(spinlock_t), + hashinfo->ehash_locks = kmalloc_array(nblocks, locksz, GFP_KERNEL | __GFP_NOWARN); if (!hashinfo->ehash_locks) - hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t)); + hashinfo->ehash_locks = vmalloc(nblocks * locksz); if (!hashinfo->ehash_locks) return -ENOMEM; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a50dc6d..921138f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if (!(qp->q.flags & INET_FRAG_EVICTED)) { + if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp) kfree_skb(fp); fp = xp; } while (fp); - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; @@ -351,7 +351,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ - end = offset + skb->len - ihl; + end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ @@ -381,7 +381,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (!pskb_pull(skb, ihl)) + if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -455,7 +455,7 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - sub_frag_mem_limit(&qp->q, free_it->truesize); + sub_frag_mem_limit(qp->q.net, free_it->truesize); kfree_skb(free_it); } } @@ -479,7 +479,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - add_frag_mem_limit(&qp->q, skb->truesize); + add_frag_mem_limit(qp->q.net, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; @@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&qp->q, clone->truesize); + add_frag_mem_limit(qp->q.net, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); head->next = NULL; head->dev = dev; @@ -641,6 +641,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->frag_off = 0; } + ip_send_check(iph); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4c2c3ba..626d9e5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, EXPORT_SYMBOL(ip_tunnel_encap); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 95c9b6e..92305a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; - struct arpt_entry *e, *back; + struct arpt_entry *e, **jumpstack; const char *indev, *outdev; const void *table_base; + unsigned int cpu, stackidx = 0; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + cpu = smp_processor_id(); /* * Ensure we load private-> members after we've fetched the base * pointer. */ smp_read_barrier_depends(); table_base = private->entries; + jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; e = get_entry(table_base, private->hook_entry[hook]); - back = get_entry(table_base, private->underflow[hook]); acpar.in = state->in; acpar.out = state->out; @@ -312,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - e = back; - back = get_entry(table_base, back->comefrom); + if (stackidx == 0) { + e = get_entry(table_base, + private->underflow[hook]); + } else { + e = jumpstack[--stackidx]; + e = arpt_next_entry(e); + } continue; } if (table_base + v != arpt_next_entry(e)) { - /* Save old back ptr in next entry */ - struct arpt_entry *next = arpt_next_entry(e); - next->comefrom = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (stackidx >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[stackidx++] = e; } e = get_entry(table_base, v); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d0362a2..e681b85 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2176,7 +2176,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!res.prefixlen && res.table->tb_num_default > 1 && res.type == RTN_UNICAST && !fl4->flowi4_oif) - fib_select_default(&res); + fib_select_default(fl4, &res); if (!fl4->saddr) fl4->saddr = FIB_RES_PREFSRC(net, res); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f40567..45534a5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -780,7 +780,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -1575,7 +1575,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; - struct sk_buff *skb; + struct sk_buff *skb, *last; u32 urg_hole = 0; if (unlikely(flags & MSG_ERRQUEUE)) @@ -1635,7 +1635,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Next get a buffer. */ + last = skb_peek_tail(&sk->sk_receive_queue); skb_queue_walk(&sk->sk_receive_queue, skb) { + last = skb; /* Now that we have two receive queues this * shouldn't happen. */ @@ -1754,8 +1756,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + } else { + sk_wait_data(sk, &timeo, last); + } if (user_recv) { int chunk; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 684f095..728f5b3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1917,14 +1917,13 @@ void tcp_enter_loss(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - bool new_recovery = false; + bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { - new_recovery = true; tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 62d908e..b10a889 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -204,6 +204,16 @@ out: fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464e..57990c9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e893cd1..08b6204 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, }, }; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0a05b35..c53331c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1650,6 +1650,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; @@ -1664,6 +1665,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ndisc_send_unsol_na(dev); in6_dev_put(idev); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&nd_tbl, dev); + break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8..6d02498 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8..f1159bb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1a1122a..6090969 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -369,10 +369,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - - if (rt->rt6i_pcpu) - free_percpu(rt->rt6i_pcpu); - + free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8fd9feb..8dab4e5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo) if (signal_pending(current)) break; rc = 0; - if (sk_wait_data(sk, &timeo)) + if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; @@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, release_sock(sk); lock_sock(sk); } else - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 29236e8..c09c013 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -723,6 +723,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) debugfs_remove_recursive(sdata->vif.debugfs_dir); sdata->vif.debugfs_dir = NULL; + sdata->debugfs.subdir_stations = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ed1edac..553ac6d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1863,10 +1863,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata) ieee80211_teardown_sdata(sdata); } -/* - * Remove all interfaces, may only be called at hardware unregistration - * time because it doesn't do RCU-safe list removals. - */ void ieee80211_remove_interfaces(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata, *tmp; @@ -1875,14 +1871,21 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); - /* - * Close all AP_VLAN interfaces first, as otherwise they - * might be closed while the AP interface they belong to - * is closed, causing unregister_netdevice_many() to crash. + /* Before destroying the interfaces, make sure they're all stopped so + * that the hardware is stopped. Otherwise, the driver might still be + * iterating the interfaces during the shutdown, e.g. from a worker + * or from RX processing or similar, and if it does so (using atomic + * iteration) while we're manipulating the list, the iteration will + * crash. + * + * After this, the hardware should be stopped and the driver should + * have stopped all of its activities, so that we can do RCU-unaware + * manipulations of the interface list below. */ - list_for_each_entry(sdata, &local->interfaces, list) - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - dev_close(sdata->dev); + cfg80211_shutdown_all_interfaces(local->hw.wiphy); + + WARN(local->open_count, "%s: open count remains %d\n", + wiphy_name(local->hw.wiphy), local->open_count); mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 5438d13..3b59099 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -306,7 +306,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action == WLAN_SP_MESH_PEERING_CONFIRM) { /* AID */ pos = skb_put(skb, 2); - put_unaligned_le16(plid, pos + 2); + put_unaligned_le16(plid, pos); } if (ieee80211_add_srates_ie(sdata, skb, true, band) || ieee80211_add_ext_srates_ie(sdata, skb, true, band) || @@ -1122,6 +1122,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, WLAN_SP_MESH_PEERING_CONFIRM) { baseaddr += 4; baselen += 4; + + if (baselen > len) + return; } ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems); mesh_process_plink_frame(sdata, mgmt, &elems); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 06b60980..b676b9f 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -76,6 +76,22 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (sdata->vif.type != NL80211_IFTYPE_STATION) continue; ieee80211_mgd_quiesce(sdata); + /* If suspended during TX in progress, and wowlan + * is enabled (connection will be active) there + * can be a race where the driver is put out + * of power-save due to TX and during suspend + * dynamic_ps_timer is cancelled and TX packet + * is flushed, leaving the driver in ACTIVE even + * after resuming until dynamic_ps_timer puts + * driver back in DOZE. + */ + if (sdata->u.mgd.associated && + sdata->u.mgd.powersave && + !(local->hw.conf.flags & IEEE80211_CONF_PS)) { + local->hw.conf.flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + } } err = drv_suspend(local, wowlan); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index ad31b2d..8db6e29 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -60,6 +60,7 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *ch; struct cfg80211_chan_def chandef; int i, subband_start; + struct wiphy *wiphy = sdata->local->hw.wiphy; for (i = start; i <= end; i += spacing) { if (!ch_cnt) @@ -70,9 +71,8 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, /* we will be active on the channel */ cfg80211_chandef_create(&chandef, ch, NL80211_CHAN_NO_HT); - if (cfg80211_reg_can_beacon(sdata->local->hw.wiphy, - &chandef, - sdata->wdev.iftype)) { + if (cfg80211_reg_can_beacon_relax(wiphy, &chandef, + sdata->wdev.iftype)) { ch_cnt++; /* * check if the next channel is also part of diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8410bb3..b823350 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1117,7 +1117,9 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, queued = true; info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; + info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS | + IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_STATUS_EOSP; __skb_queue_tail(&tid_tx->pending, skb); if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER) purge_skb = __skb_dequeue(&tid_tx->pending); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5d2b806..38fbc19 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 285eae3..24c5542 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (sched->add_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { - if (sched->upd_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->upd_dest) sched->upd_dest(svc, dest); } } @@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, struct ip_vs_scheduler *sched; sched = rcu_dereference_protected(svc->scheduler, 1); - if (sched->del_dest) + if (sched && sched->del_dest) sched->del_dest(svc, dest); } } @@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_use_count_inc(); /* Lookup the scheduler by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - ret = -ENOENT; - goto out_err; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + ret = -ENOENT; + goto out_err; + } } if (u->pe_name && *u->pe_name) { @@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) - goto out_err; - sched = NULL; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) + goto out_err; + sched = NULL; + } /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); @@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { - struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* * Lookup the scheduler, by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - return -ENOENT; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + return -ENOENT; + } } old_sched = sched; @@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { + if (old_sched) { + ip_vs_unbind_scheduler(svc, old_sched); + RCU_INIT_POINTER(svc->scheduler, NULL); + /* Wait all svc->sched_data users */ + synchronize_rcu(); + } /* Bind the new scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) { - old_sched = sched; - goto out; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + ip_vs_scheduler_put(sched); + goto out; + } } - /* Unbind the old scheduler on success */ - ip_vs_unbind_scheduler(svc, old_sched); } /* @@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - sched->name); + sched_name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - sched->name, + sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, sched->name, + svc->fwmark, sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; struct ip_vs_kstats kstats; + char *sched_name; sched = rcu_dereference_protected(src->scheduler, 1); + sched_name = sched ? sched->name : "none"; dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; struct ip_vs_kstats kstats; + char *sched_name; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } sched = rcu_dereference_protected(svc->scheduler, 1); + sched_name = sched ? sched->name : "none"; pe = rcu_dereference_protected(svc->pe, 1); - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 199760c..7e81416 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can not be set to NULL */ + /* svc->scheduler can be set to NULL only by caller */ } @@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { - struct ip_vs_scheduler *sched; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; - sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - sched->name, svc->fwmark, svc->fwmark, msg); + sched_name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b08ba95..d99ad93 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp->control, pkts); + ip_vs_sync_conn(net, cp, pkts); } } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index bf66a86..258a0b0 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -505,6 +504,13 @@ err_put: return -1; err_unreach: + /* The ip6_link_failure function requires the dev field to be set + * in order to get the net (further for the sake of fwmark + * reflection). + */ + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + dst_link_failure(skb); return -1; } @@ -523,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); } return ret; } +/* In the event of a remote destination, it's possible that we would have + * matches against an old socket (particularly a TIME-WAIT socket). This + * causes havoc down the line (ip_local_out et. al. expect regular sockets + * and invalid memory accesses will happen) so simply drop the association + * in this case. +*/ +static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) +{ + /* If dev is set, the packet came from the LOCAL_IN callback and + * not from a local TCP socket. + */ + if (skb->dev) + skb_orphan(skb); +} + /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) @@ -538,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 1); + + /* Remove the early_demux association unless it's bound for the + * exact same port and address on this host after translation. + */ + if (!local || cp->vport != cp->dport || + !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) + ip_vs_drop_early_demux_sk(skb); + if (!local) { skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; + return ret; } @@ -557,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); if (!local) { + ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else @@ -845,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, struct ipv6hdr *old_ipv6h = NULL; #endif + ip_vs_drop_early_demux_sk(skb); + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 13fad86..651039a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +/* Released via destroy_conntrack() */ +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +{ + struct nf_conn *tmpl; + + tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL); + if (tmpl == NULL) + return NULL; + + tmpl->status = IPS_TEMPLATE; + write_pnet(&tmpl->ct_net, net); + +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif + atomic_set(&tmpl->ct_general.use, 0); + + return tmpl; +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kfree(tmpl); + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); + +static void nf_ct_tmpl_free(struct nf_conn *tmpl) +{ + nf_ct_ext_destroy(tmpl); + nf_ct_ext_free(tmpl); + kfree(tmpl); +} + static void destroy_conntrack(struct nf_conntrack *nfct) { @@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); + if (unlikely(nf_ct_is_template(ct))) { + nf_ct_tmpl_free(ct); + return; + } rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto && l4proto->destroy) @@ -540,28 +584,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); -/* deletion from this larval template list happens via nf_ct_put() */ -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) -{ - struct ct_pcpu *pcpu; - - __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); - nf_conntrack_get(&tmpl->ct_general); - - /* add this conntrack to the (per cpu) tmpl list */ - local_bh_disable(); - tmpl->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu); - - spin_lock(&pcpu->lock); - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->tmpl); - spin_unlock_bh(&pcpu->lock); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); - /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net) spin_lock_init(&pcpu->lock); INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL); } net->ct.stat = alloc_percpu(struct ip_conntrack_stat); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 7a17070..b45a422 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } static inline int expect_matches(const struct nf_conntrack_expect *a, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1c2394..6b8b0ab 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone, } err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) - goto err_exp; - - return 0; -err_exp: nf_ct_expect_put(exp); err_ct: nf_ct_put(ct); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index cd60d39..8a8b2ab 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -213,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook], + verdict = nf_iterate(entry->state.hook_list, skb, &entry->state, &elem); } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 789feea..71f1e9f 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net) static int __net_init synproxy_net_init(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - struct nf_conntrack_tuple t; struct nf_conn *ct; int err = -ENOMEM; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); if (IS_ERR(ct)) { err = PTR_ERR(ct); goto err1; @@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net) if (!nfct_synproxy_ext_add(ct)) goto err2; - nf_conntrack_tmpl_insert(net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8b117c9..0c0e8ec 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) } } +enum { + NFNL_BATCH_FAILURE = (1 << 0), + NFNL_BATCH_DONE = (1 << 1), + NFNL_BATCH_REPLAY = (1 << 2), +}; + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; - bool success = true, done = false; static LIST_HEAD(err_list); + u32 status; int err; if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: + status = 0; + skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); @@ -336,10 +344,10 @@ replay: if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ nfnl_err_reset(&err_list); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } else if (type == NFNL_MSG_BATCH_END) { - done = true; + status |= NFNL_BATCH_DONE; goto done; } else if (type < NLMSG_MIN_TYPE) { err = -EINVAL; @@ -382,11 +390,8 @@ replay: * original skb. */ if (err == -EAGAIN) { - nfnl_err_reset(&err_list); - ss->abort(oskb); - nfnl_unlock(subsys_id); - kfree_skb(skb); - goto replay; + status |= NFNL_BATCH_REPLAY; + goto next; } } ack: @@ -402,7 +407,7 @@ ack: */ nfnl_err_reset(&err_list); netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } /* We don't stop processing the batch on errors, thus, @@ -410,19 +415,26 @@ ack: * triggers. */ if (err) - success = false; + status |= NFNL_BATCH_FAILURE; } - +next: msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; skb_pull(skb, msglen); } done: - if (success && done) + if (status & NFNL_BATCH_REPLAY) { + ss->abort(oskb); + nfnl_err_reset(&err_list); + nfnl_unlock(subsys_id); + kfree_skb(skb); + goto replay; + } else if (status == NFNL_BATCH_DONE) { ss->commit(oskb); - else + } else { ss->abort(oskb); + } nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 75747ae..c663003 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -184,7 +184,6 @@ out: static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { - struct nf_conntrack_tuple t; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); ret = PTR_ERR(ct); if (IS_ERR(ct)) goto err2; @@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err3; } - - nf_conntrack_tmpl_insert(par->net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); out: info->ct = ct; return 0; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f407ebc1..29d2c31 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dea9253..d8e2e39 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -158,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt) out: spin_unlock(&netlink_tap_lock); - if (found && nt->module) + if (found) module_put(nt->module); return found ? 0 : -ENODEV; @@ -357,25 +357,52 @@ err1: return NULL; } + +static void +__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, + unsigned int order) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct sk_buff_head *queue; + struct netlink_ring *ring; + + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); +} + static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool closing, bool tx_ring) + bool tx_ring) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; - struct sk_buff_head *queue; void **pg_vec = NULL; unsigned int order = 0; - int err; ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - if (!closing) { - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - } + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; if (req->nm_block_nr) { if (ring->pg_vec != NULL) @@ -407,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, return -EINVAL; } - err = -EBUSY; mutex_lock(&nlk->pg_vec_lock); - if (closing || atomic_read(&nlk->mapped) == 0) { - err = 0; - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); + if (atomic_read(&nlk->mapped) == 0) { + __netlink_set_ring(sk, req, tx_ring, pg_vec, order); + mutex_unlock(&nlk->pg_vec_lock); + return 0; } + mutex_unlock(&nlk->pg_vec_lock); if (pg_vec) free_pg_vec(pg_vec, order, req->nm_block_nr); - return err; + + return -EBUSY; } static void netlink_mm_open(struct vm_area_struct *vma) @@ -900,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk) memset(&req, 0, sizeof(req)); if (nlk->rx_ring.pg_vec) - netlink_set_ring(sk, &req, true, false); + __netlink_set_ring(sk, &req, false, NULL, 0); memset(&req, 0, sizeof(req)); if (nlk->tx_ring.pg_vec) - netlink_set_ring(sk, &req, true, true); + __netlink_set_ring(sk, &req, true, NULL, 0); } #endif /* CONFIG_NETLINK_MMAP */ @@ -2223,7 +2238,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; - err = netlink_set_ring(sk, &req, false, + err = netlink_set_ring(sk, &req, optname == NETLINK_TX_RING); break; } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4613df8..6552394 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -752,7 +752,7 @@ int ovs_flow_init(void) BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) - + (num_possible_nodes() + + (nr_node_ids * sizeof(struct flow_stats *)), 0, 0, NULL); if (flow_cache == NULL) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9e8741..ed458b3 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2403,7 +2403,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); - if (tp_len > dev->mtu + dev->hard_header_len) { + if (likely(tp_len >= 0) && + tp_len > dev->mtu + dev->hard_header_len) { struct ethhdr *ehdr; /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual @@ -2784,7 +2785,7 @@ static int packet_release(struct socket *sock) static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); - const struct net_device *dev_curr; + struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; @@ -2808,15 +2809,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) po->num = proto; po->prot_hook.type = proto; - - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; packet_cached_dev_assign(po, dev); } + if (dev_curr) + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 273b8bf..657ba9f 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, } ibmr = rds_ib_alloc_fmr(rds_ibdev); - if (IS_ERR(ibmr)) + if (IS_ERR(ibmr)) { + rds_ib_dev_put(rds_ibdev); return ibmr; + } ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); if (ret == 0) diff --git a/net/rds/transport.c b/net/rds/transport.c index 8b4a6cd..83498e1 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(rds_trans_unregister); void rds_trans_put(struct rds_transport *trans) { - if (trans && trans->t_owner) + if (trans) module_put(trans->t_owner); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index af427a3..43ec926 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -408,7 +409,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1d56903..d0edeb7 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -27,9 +27,10 @@ struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; - char *bpf_name; + const char *bpf_name; u32 bpf_fd; u16 bpf_num_ops; + bool is_ebpf; }; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, @@ -207,6 +208,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; + cfg->is_ebpf = false; return 0; } @@ -241,18 +243,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_fd = bpf_fd; cfg->bpf_name = name; cfg->filter = fp; + cfg->is_ebpf = true; return 0; } +static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) +{ + if (cfg->is_ebpf) + bpf_prog_put(cfg->filter); + else + bpf_prog_destroy(cfg->filter); + + kfree(cfg->bpf_ops); + kfree(cfg->bpf_name); +} + +static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, + struct tcf_bpf_cfg *cfg) +{ + cfg->is_ebpf = tcf_bpf_is_ebpf(prog); + cfg->filter = prog->filter; + + cfg->bpf_ops = prog->bpf_ops; + cfg->bpf_name = prog->bpf_name; +} + static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *act, int replace, int bind) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; - struct tcf_bpf_cfg cfg; bool is_bpf, is_ebpf; int ret; @@ -301,6 +325,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); + if (ret != ACT_P_CREATED) + tcf_bpf_prog_fill_cfg(prog, &old); + prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; @@ -316,29 +343,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_hash_insert(act); + else + tcf_bpf_cfg_cleanup(&old); return ret; destroy_fp: - if (is_ebpf) - bpf_prog_put(cfg.filter); - else - bpf_prog_destroy(cfg.filter); - - kfree(cfg.bpf_ops); - kfree(cfg.bpf_name); - + tcf_bpf_cfg_cleanup(&cfg); return ret; } static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - const struct tcf_bpf *prog = act->priv; + struct tcf_bpf_cfg tmp; - if (tcf_bpf_is_ebpf(prog)) - bpf_prog_put(prog->filter); - else - bpf_prog_destroy(prog->filter); + tcf_bpf_prog_fill_cfg(act->priv, &tmp); + tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17e6d66..ff8b466 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,13 +68,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else { - p = to_pedit(a); - tcf_hash_release(a, bind); if (bind) return 0; + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; - + p = to_pedit(a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c79ecfd..e5168f8 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -378,7 +378,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; if (oldprog) { - list_replace_rcu(&prog->link, &oldprog->link); + list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); } else { diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 76bc3a2..bb2a0f5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -425,6 +425,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (!fnew) goto err2; + tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + fold = (struct flow_filter *)*arg; if (fold) { err = -EINVAL; @@ -486,7 +488,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, fnew->mask = ~0U; fnew->tp = tp; get_random_bytes(&fnew->hashrnd, 4); - tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); } fnew->perturb_timer.function = flow_perturbation; @@ -526,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (*arg == 0) list_add_tail_rcu(&fnew->list, &head->filters); else - list_replace_rcu(&fnew->list, &fold->list); + list_replace_rcu(&fold->list, &fnew->list); *arg = (unsigned long)fnew; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9d37ccd..2f3d03f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -499,7 +499,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) fnew; if (fold) { - list_replace_rcu(&fnew->list, &fold->list); + list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, fl_destroy_filter); } else { diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 93d5742..6a783af 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -385,6 +385,19 @@ static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + qdisc_qstats_backlog_dec(sch, skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + q->head = q->tail = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index d75993f..21ca33c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -155,14 +155,23 @@ static unsigned int fq_codel_drop(struct Qdisc *sch) skb = dequeue_head(flow); len = qdisc_pkt_len(skb); q->backlogs[idx] -= len; - kfree_skb(skb); sch->q.qlen--; qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); + kfree_skb(skb); flow->dropped++; return idx; } +static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch) +{ + unsigned int prev_backlog; + + prev_backlog = sch->qstats.backlog; + fq_codel_drop(sch); + return prev_backlog - sch->qstats.backlog; +} + static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); @@ -604,7 +613,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { .enqueue = fq_codel_enqueue, .dequeue = fq_codel_dequeue, .peek = qdisc_peek_dequeued, - .drop = fq_codel_drop, + .drop = fq_codel_qdisc_drop, .init = fq_codel_init, .reset = fq_codel_reset, .destroy = fq_codel_destroy, diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 89f8fcf..ade9445 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -216,6 +216,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .peek = qdisc_peek_head, .init = plug_init, .change = plug_change, + .reset = qdisc_reset_queue, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 7d14926..52f75a5 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -306,10 +306,10 @@ drop: len = qdisc_pkt_len(skb); slot->backlog -= len; sfq_dec(q, x); - kfree_skb(skb); sch->q.qlen--; qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); + kfree_skb(skb); return len; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1425ec2..17bef01 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2200,12 +2200,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; - if (sctp_sk(sk)->subscribe.sctp_data_io_event) - pr_warn_ratelimited(DEPRECATED "%s (pid %d) " - "Requested SCTP_SNDRCVINFO event.\n" - "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n", - current->comm, task_pid_nr(current)); - /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, * if there is no data to be sent or retransmit, the stack will * immediately send up this notification. diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9825ff0..6255d14 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -240,8 +240,8 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); if (!req) goto not_found; - /* Note: this 'free' request adds it to xprt->bc_pa_list */ - xprt_free_bc_request(req); + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + xprt->bc_alloc_count++; } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); @@ -336,7 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); - xprt->bc_alloc_count--; + xprt_dec_alloc_count(xprt, 1); spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cbc6af9..23608eb 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1902,6 +1902,7 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { case -EAGAIN: + case -ENOBUFS: break; default: dprint_status(task); @@ -1928,7 +1929,6 @@ call_transmit_status(struct rpc_task *task) case -ECONNABORTED: case -EADDRINUSE: case -ENOTCONN: - case -ENOBUFS: case -EPIPE: rpc_task_force_reencode(task); } @@ -2057,12 +2057,13 @@ call_status(struct rpc_task *task) case -ECONNABORTED: rpc_force_rebind(clnt); case -EADDRINUSE: - case -ENOBUFS: rpc_delay(task, 3*HZ); case -EPIPE: case -ENOTCONN: task->tk_action = call_bind; break; + case -ENOBUFS: + rpc_delay(task, HZ>>2); case -EAGAIN: task->tk_action = call_transmit; break; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e193c2b..0030376 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -527,6 +527,10 @@ static int xs_local_send_request(struct rpc_task *task) true, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - req->rq_bytes_sent, status); + + if (status == -EAGAIN && sock_writeable(transport->inet)) + status = -ENOBUFS; + if (likely(sent > 0) || status == 0) { req->rq_bytes_sent += sent; req->rq_xmit_bytes_sent += sent; @@ -539,6 +543,7 @@ static int xs_local_send_request(struct rpc_task *task) switch (status) { case -ENOBUFS: + break; case -EAGAIN: status = xs_nospace(task); break; @@ -589,6 +594,9 @@ static int xs_udp_send_request(struct rpc_task *task) if (status == -EPERM) goto process_status; + if (status == -EAGAIN && sock_writeable(transport->inet)) + status = -ENOBUFS; + if (sent > 0 || status == 0) { req->rq_xmit_bytes_sent += sent; if (sent >= req->rq_slen) @@ -669,9 +677,6 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); - if (unlikely(sent == 0 && status < 0)) - break; - /* If we've sent the entire packet, immediately * reset the count of bytes sent. */ req->rq_bytes_sent += sent; @@ -681,18 +686,21 @@ static int xs_tcp_send_request(struct rpc_task *task) return 0; } - if (sent != 0) - continue; - status = -EAGAIN; - break; + if (status < 0) + break; + if (sent == 0) { + status = -EAGAIN; + break; + } } + if (status == -EAGAIN && sk_stream_is_writeable(transport->inet)) + status = -ENOBUFS; switch (status) { case -ENOTSOCK: status = -ENOTCONN; /* Should we call xs_close() here? */ break; - case -ENOBUFS: case -EAGAIN: status = xs_nospace(task); break; @@ -703,6 +711,7 @@ static int xs_tcp_send_request(struct rpc_task *task) case -ECONNREFUSED: case -ENOTCONN: case -EADDRINUSE: + case -ENOBUFS: case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 84f77a0..9f2add3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -171,8 +171,10 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) * released. */ - attr->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_attr_set(dev, attr); + if (err != -EOPNOTSUPP) { + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + } return err; } @@ -249,8 +251,10 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) * released. */ - obj->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_obj_add(dev, obj); + if (err != -EOPNOTSUPP) { + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + } return err; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 46b6ed5..3a7567f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 915b328..59cabc9 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -797,23 +797,18 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, return false; } -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) +static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype, + bool check_no_ir) { bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_RADAR; - trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); - /* - * Under certain conditions suggested by some regulatory bodies a - * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag - * only if such relaxations are not enabled and the conditions are not - * met. - */ - if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan)) + if (check_no_ir) prohibited_flags |= IEEE80211_CHAN_NO_IR; if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && @@ -827,8 +822,36 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, trace_cfg80211_return_bool(res); return res; } + +bool cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true); +} EXPORT_SYMBOL(cfg80211_reg_can_beacon); +bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + bool check_no_ir; + + ASSERT_RTNL(); + + /* + * Under certain conditions suggested by some regulatory bodies a + * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag + * only if such relaxations are not enabled and the conditions are not + * met. + */ + check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype, + chandef->chan); + + return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); +} +EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax); + int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c264eff..76b4157 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2003,7 +2003,8 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) { + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, + iftype)) { result = -EINVAL; break; } @@ -3403,8 +3404,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } else if (!nl80211_get_ap_channel(rdev, ¶ms)) return -EINVAL; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, - wdev->iftype)) + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; if (info->attrs[NL80211_ATTR_ACL_POLICY]) { @@ -6492,8 +6493,8 @@ skip_beacons: if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, - wdev->iftype)) + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; err = cfg80211_chandef_dfs_required(wdev->wiphy, @@ -10170,7 +10171,8 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, return -EINVAL; /* we will be active on the TDLS link */ - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, wdev->iftype)) + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, + wdev->iftype)) return -EINVAL; /* don't allow switching to DFS channels */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d359e06..aa2d754 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -544,15 +544,15 @@ static int call_crda(const char *alpha2) reg_regdb_query(alpha2); if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) { - pr_info("Exceeded CRDA call max attempts. Not calling CRDA\n"); + pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n"); return -EINVAL; } if (!is_world_regdom((char *) alpha2)) - pr_info("Calling CRDA for country: %c%c\n", + pr_debug("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); else - pr_info("Calling CRDA to update world regulatory domain\n"); + pr_debug("Calling CRDA to update world regulatory domain\n"); return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); } @@ -1589,7 +1589,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: - return cfg80211_reg_can_beacon(wiphy, &chandef, iftype); + return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: return cfg80211_chandef_usable(wiphy, &chandef, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index af3617c..a808279 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2358,20 +2358,23 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, TRACE_EVENT(cfg80211_reg_can_beacon, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype), - TP_ARGS(wiphy, chandef, iftype), + enum nl80211_iftype iftype, bool check_no_ir), + TP_ARGS(wiphy, chandef, iftype, check_no_ir), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY __field(enum nl80211_iftype, iftype) + __field(bool, check_no_ir) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->iftype = iftype; + __entry->check_no_ir = check_no_ir; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d", - WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype, + BOOL_TO_STR(__entry->check_no_ir)) ); TRACE_EVENT(cfg80211_chandef_dfs_required, |