diff options
Diffstat (limited to 'net')
67 files changed, 946 insertions, 278 deletions
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 476709b..4663c3d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1557,7 +1557,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); - if (!hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && + test_bit(HCI_UP, &hdev->flags)) { /* Execute vendor specific shutdown routine */ if (hdev->shutdown) hdev->shutdown(hdev); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 4096089..e29ad70 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -170,7 +170,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, struct br_port_msg *bpm; struct nlattr *nest, *nest2; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0e4ddb8..4b5c236 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -394,7 +394,7 @@ errout: * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask) + struct net_device *dev, u32 filter_mask, int nlflags) { struct net_bridge_port *port = br_port_get_rtnl(dev); @@ -402,7 +402,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) return 0; - return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags, filter_mask, dev); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6ca0251..3362c29 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -828,7 +828,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port); int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask); + u32 filter_mask, int nlflags); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/core/dev.c b/net/core/dev.c index 1796cef5..2c1c67f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3079,7 +3079,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - if (next_cpu != RPS_NO_CPU) { + if (next_cpu < nr_cpu_ids) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * If the desired CPU (where last recvmsg was done) is * different from current CPU (one in the rx-queue flow * table entry), switch if one of the following holds: - * - Current CPU is unset (equal to RPS_NO_CPU). + * - Current CPU is unset (>= nr_cpu_ids). * - Current CPU is offline. * - The current CPU's queue tail has advanced beyond the * last packet that was enqueued using this table entry. @@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * have been dequeued, thus preserving in order delivery. */ if (unlikely(tcpu != next_cpu) && - (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || + (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; rflow = set_rps_cpu(dev, skb, rflow, next_cpu); } - if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { + if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; goto done; @@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, struct rps_dev_flow_table *flow_table; struct rps_dev_flow *rflow; bool expire = true; - int cpu; + unsigned int cpu; rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; cpu = ACCESS_ONCE(rflow->cpu); - if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < (int)(10 * flow_table->mask))) @@ -5209,7 +5209,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) + if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 78fc04a..572af00 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -601,7 +601,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) } err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - RTM_GETNSID, net, peer, -1); + RTM_NEWNSID, net, peer, -1); if (err < 0) goto err_out; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 358d52a..666e092 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2854,7 +2854,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask) + u32 flags, u32 mask, int nlflags) { struct nlmsghdr *nlh; struct ifinfomsg *ifm; @@ -2863,7 +2863,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_device *br_dev = netdev_master_upper_dev_get(dev); - nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); if (nlh == NULL) return -EMSGSIZE; @@ -2969,7 +2969,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev, filter_mask) < 0) + skb, portid, seq, dev, filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -2977,7 +2978,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && ops->ndo_bridge_getlink(skb, portid, seq, dev, - filter_mask) < 0) + filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -3018,7 +3020,7 @@ static int rtnl_bridge_notify(struct net_device *dev) goto errout; } - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0); if (err < 0) goto errout; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d1967da..3cfff2a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -280,13 +280,14 @@ nodata: EXPORT_SYMBOL(__alloc_skb); /** - * build_skb - build a network buffer + * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of fragment, or 0 if head was kmalloced + * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator. + * @frag_size is 0, otherwise data should come from the page allocator + * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : @@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data, unsigned int frag_size) +struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; @@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - skb->head_frag = frag_size != 0; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) return skb; } + +/* build_skb() is wrapper over __build_skb(), that specifically + * takes care of skb->head and skb->pfmemalloc + * This means that if @frag_size is not zero, then @data must be backed + * by a page fragment, not kmalloc() or vmalloc() + */ +struct sk_buff *build_skb(void *data, unsigned int frag_size) +{ + struct sk_buff *skb = __build_skb(data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (virt_to_head_page(data)->pfmemalloc) + skb->pfmemalloc = 1; + } + return skb; +} EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { @@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, gfp_t gfp = gfp_mask; if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); nc->frag.size = PAGE_SIZE << (page ? order : 0); } diff --git a/net/core/sock.c b/net/core/sock.c index e891bcf..292f422 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1474,8 +1474,8 @@ void sk_release_kernel(struct sock *sk) return; sock_hold(sk); - sock_net_set(sk, get_net(&init_net)); sock_release(sk->sk_socket); + sock_net_set(sk, get_net(&init_net)); sock_put(sk); } EXPORT_SYMBOL(sk_release_kernel); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2b4f21d..ccf4c56 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -453,7 +453,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) iph->saddr, iph->daddr); if (req) { nsk = dccp_check_req(sk, skb, req); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9d05510..5165571 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -301,7 +301,8 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) &iph->daddr, inet6_iif(skb)); if (req) { nsk = dccp_check_req(sk, skb, req); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5f56666..30addee 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -186,8 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req); - inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_add(sk, req, child); out: return child; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 079a224..e6f6cc3 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -633,7 +633,7 @@ static int dsa_of_probe(struct device *dev) if (cd->sw_addr > PHY_MAX_ADDR) continue; - if (!of_property_read_u32(np, "eeprom-length", &eeprom_len)) + if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) cd->eeprom_len = eeprom_len; for_each_available_child_of_node(child, port) { diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 05dab29..4adfd4d 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -3,7 +3,9 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o obj-y += 6lowpan/ ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ - header_ops.o sysfs.o nl802154.o + header_ops.o sysfs.o nl802154.o trace.o ieee802154_socket-y := socket.o +CFLAGS_trace.o := -I$(src) + ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 1b9d25f6..346c666 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -175,6 +175,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) int rc = -ENOBUFS; struct net_device *dev; int type = __IEEE802154_DEV_INVALID; + unsigned char name_assign_type; pr_debug("%s\n", __func__); @@ -190,8 +191,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0') return -EINVAL; /* phy name should be null-terminated */ + name_assign_type = NET_NAME_USER; } else { devname = "wpan%d"; + name_assign_type = NET_NAME_ENUM; } if (strlen(devname) >= IFNAMSIZ) @@ -221,7 +224,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) } dev = rdev_add_virtual_intf_deprecated(wpan_phy_to_rdev(phy), devname, - type); + name_assign_type, type); if (IS_ERR(dev)) { rc = PTR_ERR(dev); goto nla_put_failure; diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index a4daf91..f3c12f6 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -589,7 +589,7 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info) return rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL802154_ATTR_IFNAME]), - type, extended_addr); + NET_NAME_USER, type, extended_addr); } static int nl802154_del_interface(struct sk_buff *skb, struct genl_info *info) diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 7c46732..7b5a9dd 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -4,13 +4,16 @@ #include <net/cfg802154.h> #include "core.h" +#include "trace.h" static inline struct net_device * rdev_add_virtual_intf_deprecated(struct cfg802154_registered_device *rdev, - const char *name, int type) + const char *name, + unsigned char name_assign_type, + int type) { return rdev->ops->add_virtual_intf_deprecated(&rdev->wpan_phy, name, - type); + name_assign_type, type); } static inline void @@ -22,75 +25,131 @@ rdev_del_virtual_intf_deprecated(struct cfg802154_registered_device *rdev, static inline int rdev_add_virtual_intf(struct cfg802154_registered_device *rdev, char *name, + unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr) { - return rdev->ops->add_virtual_intf(&rdev->wpan_phy, name, type, + int ret; + + trace_802154_rdev_add_virtual_intf(&rdev->wpan_phy, name, type, extended_addr); + ret = rdev->ops->add_virtual_intf(&rdev->wpan_phy, name, + name_assign_type, type, + extended_addr); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_del_virtual_intf(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev) { - return rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev); + int ret; + + trace_802154_rdev_del_virtual_intf(&rdev->wpan_phy, wpan_dev); + ret = rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel) { - return rdev->ops->set_channel(&rdev->wpan_phy, page, channel); + int ret; + + trace_802154_rdev_set_channel(&rdev->wpan_phy, page, channel); + ret = rdev->ops->set_channel(&rdev->wpan_phy, page, channel); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_cca_mode(struct cfg802154_registered_device *rdev, const struct wpan_phy_cca *cca) { - return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); + int ret; + + trace_802154_rdev_set_cca_mode(&rdev->wpan_phy, cca); + ret = rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_pan_id(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 pan_id) { - return rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); + int ret; + + trace_802154_rdev_set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); + ret = rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_short_addr(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 short_addr) { - return rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); + int ret; + + trace_802154_rdev_set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); + ret = rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_backoff_exponent(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, u8 min_be, u8 max_be) { - return rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev, + int ret; + + trace_802154_rdev_set_backoff_exponent(&rdev->wpan_phy, wpan_dev, min_be, max_be); + ret = rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev, + min_be, max_be); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_max_csma_backoffs(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, u8 max_csma_backoffs) { - return rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev, - max_csma_backoffs); + int ret; + + trace_802154_rdev_set_csma_backoffs(&rdev->wpan_phy, wpan_dev, + max_csma_backoffs); + ret = rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev, + max_csma_backoffs); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_max_frame_retries(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, s8 max_frame_retries) { - return rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev, + int ret; + + trace_802154_rdev_set_max_frame_retries(&rdev->wpan_phy, wpan_dev, max_frame_retries); + ret = rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev, + max_frame_retries); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } static inline int rdev_set_lbt_mode(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, bool mode) { - return rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); + int ret; + + trace_802154_rdev_set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); + ret = rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; } #endif /* __CFG802154_RDEV_OPS */ diff --git a/net/ieee802154/trace.c b/net/ieee802154/trace.c new file mode 100644 index 0000000..95f997f --- /dev/null +++ b/net/ieee802154/trace.c @@ -0,0 +1,7 @@ +#include <linux/module.h> + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" + +#endif diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h new file mode 100644 index 0000000..5ac25eb --- /dev/null +++ b/net/ieee802154/trace.h @@ -0,0 +1,247 @@ +/* Based on net/wireless/tracing.h */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cfg802154 + +#if !defined(__RDEV_CFG802154_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ) +#define __RDEV_CFG802154_OPS_TRACE + +#include <linux/tracepoint.h> + +#include <net/cfg802154.h> + +#define MAXNAME 32 +#define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) +#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \ + wpan_phy_name(wpan_phy), \ + MAXNAME) +#define WPAN_PHY_PR_FMT "%s" +#define WPAN_PHY_PR_ARG __entry->wpan_phy_name + +#define WPAN_DEV_ENTRY __field(u32, identifier) +#define WPAN_DEV_ASSIGN (__entry->identifier) = (!IS_ERR_OR_NULL(wpan_dev) \ + ? wpan_dev->identifier : 0) +#define WPAN_DEV_PR_FMT "wpan_dev(%u)" +#define WPAN_DEV_PR_ARG (__entry->identifier) + +#define WPAN_CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \ + __field(enum nl802154_cca_opts, cca_opt) +#define WPAN_CCA_ASSIGN \ + do { \ + (__entry->cca_mode) = cca->mode; \ + (__entry->cca_opt) = cca->opt; \ + } while (0) +#define WPAN_CCA_PR_FMT "cca_mode: %d, cca_opt: %d" +#define WPAN_CCA_PR_ARG __entry->cca_mode, __entry->cca_opt + +#define BOOL_TO_STR(bo) (bo) ? "true" : "false" + +/************************************************************* + * rdev->ops traces * + *************************************************************/ + +TRACE_EVENT(802154_rdev_add_virtual_intf, + TP_PROTO(struct wpan_phy *wpan_phy, char *name, + enum nl802154_iftype type, __le64 extended_addr), + TP_ARGS(wpan_phy, name, type, extended_addr), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + __string(vir_intf_name, name ? name : "<noname>") + __field(enum nl802154_iftype, type) + __field(__le64, extended_addr) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + __assign_str(vir_intf_name, name ? name : "<noname>"); + __entry->type = type; + __entry->extended_addr = extended_addr; + ), + TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, ea %llx", + WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type, + __le64_to_cpu(__entry->extended_addr)) +); + +TRACE_EVENT(802154_rdev_del_virtual_intf, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), + TP_ARGS(wpan_phy, wpan_dev), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG, + WPAN_DEV_PR_ARG) +); + +TRACE_EVENT(802154_rdev_set_channel, + TP_PROTO(struct wpan_phy *wpan_phy, u8 page, u8 channel), + TP_ARGS(wpan_phy, page, channel), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + __field(u8, page) + __field(u8, channel) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + __entry->page = page; + __entry->channel = channel; + ), + TP_printk(WPAN_PHY_PR_FMT ", page: %d, channel: %d", WPAN_PHY_PR_ARG, + __entry->page, __entry->channel) +); + +TRACE_EVENT(802154_rdev_set_cca_mode, + TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca), + TP_ARGS(wpan_phy, cca), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_CCA_ENTRY + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_CCA_ASSIGN; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_CCA_PR_FMT, WPAN_PHY_PR_ARG, + WPAN_CCA_PR_ARG) +); + +DECLARE_EVENT_CLASS(802154_le16_template, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + __le16 le16arg), + TP_ARGS(wpan_phy, wpan_dev, le16arg), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(__le16, le16arg) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->le16arg = le16arg; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", pan id: 0x%04x", + WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, + __le16_to_cpu(__entry->le16arg)) +); + +DEFINE_EVENT(802154_le16_template, 802154_rdev_set_pan_id, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + __le16 le16arg), + TP_ARGS(wpan_phy, wpan_dev, le16arg) +); + +DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + __le16 le16arg), + TP_ARGS(wpan_phy, wpan_dev, le16arg), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", sa: 0x%04x", + WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, + __le16_to_cpu(__entry->le16arg)) +); + +TRACE_EVENT(802154_rdev_set_backoff_exponent, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + u8 min_be, u8 max_be), + TP_ARGS(wpan_phy, wpan_dev, min_be, max_be), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(u8, min_be) + __field(u8, max_be) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->min_be = min_be; + __entry->max_be = max_be; + ), + + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT + ", min be: %d, max_be: %d", WPAN_PHY_PR_ARG, + WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be) +); + +TRACE_EVENT(802154_rdev_set_csma_backoffs, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + u8 max_csma_backoffs), + TP_ARGS(wpan_phy, wpan_dev, max_csma_backoffs), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(u8, max_csma_backoffs) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->max_csma_backoffs = max_csma_backoffs; + ), + + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT + ", max csma backoffs: %d", WPAN_PHY_PR_ARG, + WPAN_DEV_PR_ARG, __entry->max_csma_backoffs) +); + +TRACE_EVENT(802154_rdev_set_max_frame_retries, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + s8 max_frame_retries), + TP_ARGS(wpan_phy, wpan_dev, max_frame_retries), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(s8, max_frame_retries) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->max_frame_retries = max_frame_retries; + ), + + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT + ", max frame retries: %d", WPAN_PHY_PR_ARG, + WPAN_DEV_PR_ARG, __entry->max_frame_retries) +); + +TRACE_EVENT(802154_rdev_set_lbt_mode, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + bool mode), + TP_ARGS(wpan_phy, wpan_dev, mode), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(bool, mode) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->mode = mode; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT + ", lbt mode: %s", WPAN_PHY_PR_ARG, + WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode)) +); + +TRACE_EVENT(802154_rdev_return_int, + TP_PROTO(struct wpan_phy *wpan_phy, int ret), + TP_ARGS(wpan_phy, ret), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + __field(int, ret) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + __entry->ret = ret; + ), + TP_printk(WPAN_PHY_PR_FMT ", returned: %d", WPAN_PHY_PR_ARG, + __entry->ret) +); + +#endif /* !__RDEV_CFG802154_OPS_TRACE || TRACE_HEADER_MULTI_READ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5c3dd62..8976ca4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); +/* return true if req was found in the syn_table[] */ +static bool reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req) +{ + struct listen_sock *lopt = queue->listen_opt; + struct request_sock **prev; + bool found = false; + + spin_lock(&queue->syn_wait_lock); + + for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; + prev = &(*prev)->dl_next) { + if (*prev == req) { + *prev = req->dl_next; + found = true; + break; + } + } + + spin_unlock(&queue->syn_wait_lock); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); + return found; +} + +void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +{ + if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + reqsk_put(req); + } +} +EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); + static void reqsk_timer_handler(unsigned long data) { struct request_sock *req = (struct request_sock *)data; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bb77ebd..4d32262 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -224,14 +224,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler->idiag_get_info(sk, r, info); if (sk->sk_state < TCP_TIME_WAIT) { - int err = 0; + union tcp_cc_info info; + size_t sz = 0; + int attr; rcu_read_lock(); ca_ops = READ_ONCE(icsk->icsk_ca_ops); if (ca_ops && ca_ops->get_info) - err = ca_ops->get_info(sk, ext, skb); + sz = ca_ops->get_info(sk, ext, &attr, &info); rcu_read_unlock(); - if (err < 0) + if (sz && nla_put(skb, attr, sz, &info) < 0) goto errout; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a93f260..05ff44b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -158,6 +158,7 @@ void ping_unhash(struct sock *sk) if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); + sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a78540f..bff62fc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -962,10 +962,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (dst_metric_locked(dst, RTAX_MTU)) return; - if (dst->dev->mtu < mtu) - return; - - if (rt->rt_pmtu && rt->rt_pmtu < mtu) + if (ipv4_mtu(dst) < mtu) return; if (mtu < ip_rt_min_pmtu) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8c5cd9e..46efa03 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -252,6 +252,7 @@ #include <linux/types.h> #include <linux/fcntl.h> #include <linux/poll.h> +#include <linux/inet_diag.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/skbuff.h> @@ -2592,7 +2593,7 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ -void tcp_get_info(const struct sock *sk, struct tcp_info *info) +void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2663,6 +2664,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) rate = READ_ONCE(sk->sk_max_pacing_rate); info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + + spin_lock_bh(&sk->sk_lock.slock); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -2734,6 +2740,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; } + case TCP_CC_INFO: { + const struct tcp_congestion_ops *ca_ops; + union tcp_cc_info info; + size_t sz = 0; + int attr; + + if (get_user(len, optlen)) + return -EFAULT; + + ca_ops = icsk->icsk_ca_ops; + if (ca_ops && ca_ops->get_info) + sz = ca_ops->get_info(sk, ~0U, &attr, &info); + + len = min_t(unsigned int, len, sz); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &info, len)) + return -EFAULT; + return 0; + } case TCP_QUICKACK: val = !icsk->icsk_ack.pingpong; break; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 4376016..4c41c12 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -277,7 +277,8 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) } } -static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); @@ -286,18 +287,17 @@ static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) */ if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) || ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcp_dctcp_info info; - - memset(&info, 0, sizeof(info)); + memset(info, 0, sizeof(struct tcp_dctcp_info)); if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) { - info.dctcp_enabled = 1; - info.dctcp_ce_state = (u16) ca->ce_state; - info.dctcp_alpha = ca->dctcp_alpha; - info.dctcp_ab_ecn = ca->acked_bytes_ecn; - info.dctcp_ab_tot = ca->acked_bytes_total; + info->dctcp.dctcp_enabled = 1; + info->dctcp.dctcp_ce_state = (u16) ca->ce_state; + info->dctcp.dctcp_alpha = ca->dctcp_alpha; + info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; + info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; } - return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); + *attr = INET_DIAG_DCTCPINFO; + return sizeof(*info); } return 0; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index e3d87ac..3c673d5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -206,6 +206,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, skb_set_owner_r(skb2, child); __skb_queue_tail(&child->sk_receive_queue, skb2); tp->syn_data_acked = 1; + tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; } else { end_seq = TCP_SKB_CB(skb)->seq + 1; } diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 67476f0..f71002e 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -300,24 +300,25 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) } /* Extract info for Tcp socket info provided via netlink. */ -static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb) +static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct illinois *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info info = { - .tcpv_enabled = 1, - .tcpv_rttcnt = ca->cnt_rtt, - .tcpv_minrtt = ca->base_rtt, - }; + info->vegas.tcpv_enabled = 1; + info->vegas.tcpv_rttcnt = ca->cnt_rtt; + info->vegas.tcpv_minrtt = ca->base_rtt; + info->vegas.tcpv_rtt = 0; - if (info.tcpv_rttcnt > 0) { + if (info->vegas.tcpv_rttcnt > 0) { u64 t = ca->sum_rtt; - do_div(t, info.tcpv_rttcnt); - info.tcpv_rtt = t; + do_div(t, info->vegas.tcpv_rttcnt); + info->vegas.tcpv_rtt = t; } - return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + *attr = INET_DIAG_VEGASINFO; + return sizeof(struct tcpvegas_info); } return 0; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3a4d9b34..bc790ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1820,14 +1820,12 @@ advance_sp: for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; - tcp_mark_lost_retrans(sk); - - tcp_verify_left_out(tp); - if ((state.reord < tp->fackets_out) && ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); + tcp_mark_lost_retrans(sk); + tcp_verify_left_out(tp); out: #if FASTRETRANS_DEBUG > 0 @@ -3280,6 +3278,24 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); } +/* If we update tp->snd_una, also update tp->bytes_acked */ +static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) +{ + u32 delta = ack - tp->snd_una; + + tp->bytes_acked += delta; + tp->snd_una = ack; +} + +/* If we update tp->rcv_nxt, also update tp->bytes_received */ +static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) +{ + u32 delta = seq - tp->rcv_nxt; + + tp->bytes_received += delta; + tp->rcv_nxt = seq; +} + /* Update our send window. * * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 @@ -3315,7 +3331,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 } } - tp->snd_una = ack; + tcp_snd_una_update(tp, ack); return flag; } @@ -3497,7 +3513,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * Note, we use the fact that SND.UNA>=SND.WL2. */ tcp_update_wl(tp, ack_seq); - tp->snd_una = ack; + tcp_snd_una_update(tp, ack); flag |= FLAG_WIN_UPDATE; tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); @@ -4236,7 +4252,7 @@ static void tcp_ofo_queue(struct sock *sk) tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (!eaten) __skb_queue_tail(&sk->sk_receive_queue, skb); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) @@ -4404,7 +4420,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int __skb_pull(skb, hdrlen); eaten = (tail && tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; - tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); @@ -4497,7 +4513,7 @@ queue_and_out: eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); } - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) @@ -5245,7 +5261,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); __skb_pull(skb, tcp_header_len); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); eaten = 1; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3571f2b..fc1c658 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) { nsk = tcp_check_req(sk, skb, req, false); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d6311..e5d7649 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req); - inet_csk_reqsk_queue_removed(sk, req); - + inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_add(sk, req, child); + /* Warning: caller must not call reqsk_put(req); + * child stole last reference on it. + */ return child; listen_overflow: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8d7e0..a369e8a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2812,39 +2812,65 @@ begin_fwd: } } -/* Send a fin. The caller locks the socket for us. This cannot be - * allowed to fail queueing a FIN frame under any circumstances. +/* We allow to exceed memory limits for FIN packets to expedite + * connection tear down and (memory) recovery. + * Otherwise tcp_send_fin() could be tempted to either delay FIN + * or even be forced to close flow without any FIN. + */ +static void sk_forced_wmem_schedule(struct sock *sk, int size) +{ + int amt, status; + + if (size <= sk->sk_forward_alloc) + return; + amt = sk_mem_pages(size); + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + sk_memory_allocated_add(sk, amt, &status); +} + +/* Send a FIN. The caller locks the socket for us. + * We should try to send a FIN packet really hard, but eventually give up. */ void tcp_send_fin(struct sock *sk) { + struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_write_queue_tail(sk); - int mss_now; - /* Optimization, tack on the FIN if we have a queue of - * unsent frames. But be careful about outgoing SACKS - * and IP options. + /* Optimization, tack on the FIN if we have one skb in write queue and + * this skb was not yet sent, or we are under memory pressure. + * Note: in the latter case, FIN packet will be sent after a timeout, + * as TCP stack thinks it has already been transmitted. */ - mss_now = tcp_current_mss(sk); - - if (tcp_send_head(sk)) { - TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; - TCP_SKB_CB(skb)->end_seq++; + if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { +coalesce: + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; + if (!tcp_send_head(sk)) { + /* This means tskb was already sent. + * Pretend we included the FIN on previous transmit. + * We need to set tp->snd_nxt to the value it would have + * if FIN had been sent. This is because retransmit path + * does not change tp->snd_nxt. + */ + tp->snd_nxt++; + return; + } } else { - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); - if (skb) - break; - yield(); + skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + if (unlikely(!skb)) { + if (tskb) + goto coalesce; + return; } + skb_reserve(skb, MAX_TCP_HEADER); + sk_forced_wmem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); } /* We get here when a process closes a file descriptor (either due to diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c71a1b8..a6cea1d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -286,18 +286,19 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) } /* Extract info for Tcp socket info provided via netlink. */ -int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct vegas *ca = inet_csk_ca(sk); + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info info = { - .tcpv_enabled = ca->doing_vegas_now, - .tcpv_rttcnt = ca->cntRTT, - .tcpv_rtt = ca->baseRTT, - .tcpv_minrtt = ca->minRTT, - }; - - return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + info->vegas.tcpv_enabled = ca->doing_vegas_now, + info->vegas.tcpv_rttcnt = ca->cntRTT, + info->vegas.tcpv_rtt = ca->baseRTT, + info->vegas.tcpv_minrtt = ca->minRTT, + + *attr = INET_DIAG_VEGASINFO; + return sizeof(struct tcpvegas_info); } return 0; } diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index e8a6b33..ef9da53 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -19,6 +19,7 @@ void tcp_vegas_init(struct sock *sk); void tcp_vegas_state(struct sock *sk, u8 ca_state); void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); -int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); +size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info); #endif /* __TCP_VEGAS_H */ diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index b3c57cc..c10732e 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -256,18 +256,19 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) } /* Extract info for Tcp socket info provided via netlink. */ -static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) +static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct westwood *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info info = { - .tcpv_enabled = 1, - .tcpv_rtt = jiffies_to_usecs(ca->rtt), - .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), - }; + info->vegas.tcpv_enabled = 1; + info->vegas.tcpv_rttcnt = 0; + info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt), + info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), - return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + *attr = INET_DIAG_VEGASINFO; + return sizeof(struct tcpvegas_info); } return 0; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b5e6cc1..a38d3ac 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1260,16 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6gre_tunnel_stats; - ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tunnel_stats->syncp); - } - return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7fde1f2..c217775 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -886,22 +886,45 @@ static int ip6_dst_lookup_tail(struct sock *sk, #endif int err; - if (!*dst) - *dst = ip6_route_output(net, sk, fl6); - - err = (*dst)->error; - if (err) - goto out_err_release; + /* The correct way to handle this would be to do + * ip6_route_get_saddr, and then ip6_route_output; however, + * the route-specific preferred source forces the + * ip6_route_output call _before_ ip6_route_get_saddr. + * + * In source specific routing (no src=any default route), + * ip6_route_output will fail given src=any saddr, though, so + * that's why we try it again later. + */ + if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { + struct rt6_info *rt; + bool had_dst = *dst != NULL; - if (ipv6_addr_any(&fl6->saddr)) { - struct rt6_info *rt = (struct rt6_info *) *dst; + if (!had_dst) + *dst = ip6_route_output(net, sk, fl6); + rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; err = ip6_route_get_saddr(net, rt, &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, &fl6->saddr); if (err) goto out_err_release; + + /* If we had an erroneous initial result, pretend it + * never existed and let the SA-enabled version take + * over. + */ + if (!had_dst && (*dst)->error) { + dst_release(*dst); + *dst = NULL; + } } + if (!*dst) + *dst = ip6_route_output(net, sk, fl6); + + err = (*dst)->error; + if (err) + goto out_err_release; + #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * Here if the dst entry we've looked up diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5c48293..d358888 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2245,9 +2245,10 @@ int ip6_route_get_saddr(struct net *net, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt); + struct inet6_dev *idev = + rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; int err = 0; - if (rt->rt6i_prefsrc.plen) + if (rt && rt->rt6i_prefsrc.plen) *saddr = rt->rt6i_prefsrc.addr; else err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad51df8..b6575d6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -946,7 +946,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { nsk = tcp_check_req(sk, skb, req, false); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b4ac596..bab5c63 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -819,13 +819,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * (because if we remove a STA after ops->remove_interface() * the driver will have removed the vif info already!) * - * This is relevant only in WDS mode, in all other modes we've - * already removed all stations when disconnecting or similar, - * so warn otherwise. + * In WDS mode a station must exist here and be flushed, for + * AP_VLANs stations may exist since there's nothing else that + * would have removed them, but in other modes there shouldn't + * be any stations. */ flushed = sta_info_flush(sdata); - WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || - (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || + (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1))); /* don't count this interface for promisc/allmulti while it is down */ if (sdata->flags & IEEE80211_SDATA_ALLMULTI) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 12971b7..2880f2a 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -66,6 +66,7 @@ static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ + .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, sta.addr), .key_len = ETH_ALEN, @@ -157,8 +158,24 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + struct rhash_head *tmp; + const struct bucket_table *tbl; + + rcu_read_lock(); + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - return rhashtable_lookup_fast(&local->sta_hash, addr, sta_rht_params); + for_each_sta_info(local, tbl, addr, sta, tmp) { + if (sta->sdata == sdata) { + rcu_read_unlock(); + /* this is safe as the caller must already hold + * another rcu read section or the mutex + */ + return sta; + } + } + rcu_read_unlock(); + return NULL; } /* diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index 5d9f68c..70be9c7 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -22,13 +22,14 @@ static struct net_device * ieee802154_add_iface_deprecated(struct wpan_phy *wpan_phy, - const char *name, int type) + const char *name, + unsigned char name_assign_type, int type) { struct ieee802154_local *local = wpan_phy_priv(wpan_phy); struct net_device *dev; rtnl_lock(); - dev = ieee802154_if_add(local, name, type, + dev = ieee802154_if_add(local, name, name_assign_type, type, cpu_to_le64(0x0000000000000000ULL)); rtnl_unlock(); @@ -45,12 +46,14 @@ static void ieee802154_del_iface_deprecated(struct wpan_phy *wpan_phy, static int ieee802154_add_iface(struct wpan_phy *phy, const char *name, + unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr) { struct ieee802154_local *local = wpan_phy_priv(phy); struct net_device *err; - err = ieee802154_if_add(local, name, type, extended_addr); + err = ieee802154_if_add(local, name, name_assign_type, type, + extended_addr); return PTR_ERR_OR_ZERO(err); } diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index bebd70f..127ba18 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -182,7 +182,8 @@ void ieee802154_iface_exit(void); void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata); struct net_device * ieee802154_if_add(struct ieee802154_local *local, const char *name, - enum nl802154_iftype type, __le64 extended_addr); + unsigned char name_assign_type, enum nl802154_iftype type, + __le64 extended_addr); void ieee802154_remove_interfaces(struct ieee802154_local *local); #endif /* __IEEE802154_I_H */ diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 38b56f9..91b75ab 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -522,7 +522,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, struct net_device * ieee802154_if_add(struct ieee802154_local *local, const char *name, - enum nl802154_iftype type, __le64 extended_addr) + unsigned char name_assign_type, enum nl802154_iftype type, + __le64 extended_addr) { struct net_device *ndev = NULL; struct ieee802154_sub_if_data *sdata = NULL; @@ -531,7 +532,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, ASSERT_RTNL(); ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name, - NET_NAME_UNKNOWN, ieee802154_if_setup); + name_assign_type, ieee802154_if_setup); if (!ndev) return ERR_PTR(-ENOMEM); diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index dcf7395..5b2be12 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -134,7 +134,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template) for (i = 0; i < ARRAY_SIZE(key->tfm); i++) { key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); - if (!key->tfm[i]) + if (IS_ERR(key->tfm[i])) goto err_tfm; if (crypto_aead_setkey(key->tfm[i], template->key, IEEE802154_LLSEC_KEY_SIZE)) @@ -144,7 +144,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template) } key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); - if (!key->tfm0) + if (IS_ERR(key->tfm0)) goto err_tfm; if (crypto_blkcipher_setkey(key->tfm0, template->key, diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 8500378..08cb32d 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -161,18 +161,21 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) rtnl_lock(); - dev = ieee802154_if_add(local, "wpan%d", NL802154_IFTYPE_NODE, + dev = ieee802154_if_add(local, "wpan%d", NET_NAME_ENUM, + NL802154_IFTYPE_NODE, cpu_to_le64(0x0000000000000000ULL)); if (IS_ERR(dev)) { rtnl_unlock(); rc = PTR_ERR(dev); - goto out_wq; + goto out_phy; } rtnl_unlock(); return 0; +out_phy: + wpan_phy_unregister(local->phy); out_wq: destroy_workqueue(local->workqueue); out: diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index db8a2ea..7b3f732 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) return rt; } +static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->mpls_ptr); +} + static bool mpls_output_possible(const struct net_device *dev) { return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); @@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct mpls_route *rt; struct mpls_entry_decoded dec; struct net_device *out_dev; + struct mpls_dev *mdev; unsigned int hh_len; unsigned int new_header_size; unsigned int mtu; @@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Careful this entire function runs inside of an rcu critical section */ + mdev = mpls_dev_get(dev); + if (!mdev || !mdev->input_enabled) + goto drop; + if (skb->pkt_type != PACKET_HOST) goto drop; @@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (!dev) goto errout; - /* For now just support ethernet devices */ + /* Ensure this is a supported device */ err = -EINVAL; - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) + if (!mpls_dev_get(dev)) goto errout; err = -EINVAL; @@ -428,10 +438,89 @@ errout: return err; } +#define MPLS_PERDEV_SYSCTL_OFFSET(field) \ + (&((struct mpls_dev *)0)->field) + +static const struct ctl_table mpls_dev_table[] = { + { + .procname = "input", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), + }, + { } +}; + +static int mpls_dev_sysctl_register(struct net_device *dev, + struct mpls_dev *mdev) +{ + char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; + struct ctl_table *table; + int i; + + table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); + if (!table) + goto out; + + /* Table data contains only offsets relative to the base of + * the mdev at this point, so make them absolute. + */ + for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) + table[i].data = (char *)mdev + (uintptr_t)table[i].data; + + snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); + + mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); + if (!mdev->sysctl) + goto free; + + return 0; + +free: + kfree(table); +out: + return -ENOBUFS; +} + +static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) +{ + struct ctl_table *table; + + table = mdev->sysctl->ctl_table_arg; + unregister_net_sysctl_table(mdev->sysctl); + kfree(table); +} + +static struct mpls_dev *mpls_add_dev(struct net_device *dev) +{ + struct mpls_dev *mdev; + int err = -ENOMEM; + + ASSERT_RTNL(); + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(err); + + err = mpls_dev_sysctl_register(dev, mdev); + if (err) + goto free; + + rcu_assign_pointer(dev->mpls_ptr, mdev); + + return mdev; + +free: + kfree(mdev); + return ERR_PTR(err); +} + static void mpls_ifdown(struct net_device *dev) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + struct mpls_dev *mdev; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -443,14 +532,35 @@ static void mpls_ifdown(struct net_device *dev) continue; rt->rt_dev = NULL; } + + mdev = mpls_dev_get(dev); + if (!mdev) + return; + + mpls_dev_sysctl_unregister(mdev); + + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + + kfree(mdev); } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mpls_dev *mdev; switch(event) { + case NETDEV_REGISTER: + /* For now just support ethernet devices */ + if ((dev->type == ARPHRD_ETHER) || + (dev->type == ARPHRD_LOOPBACK)) { + mdev = mpls_add_dev(dev); + if (IS_ERR(mdev)) + return notifier_from_errno(PTR_ERR(mdev)); + } + break; + case NETDEV_UNREGISTER: mpls_ifdown(dev); break; @@ -536,6 +646,15 @@ int nla_get_labels(const struct nlattr *nla, if ((dec.bos != bos) || dec.ttl || dec.tc) return -EINVAL; + switch (dec.label) { + case MPLS_LABEL_IMPLNULL: + /* RFC3032: This is a label that an LSR may + * assign and distribute, but which never + * actually appears in the encapsulation. + */ + return -EINVAL; + } + label[i] = dec.label; } *labels = nla_labels; @@ -816,7 +935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) } /* In case the predefined labels need to be populated */ - if (limit > LABEL_IPV4_EXPLICIT_NULL) { + if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; rt0 = mpls_rt_alloc(lo->addr_len); if (!rt0) @@ -826,7 +945,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt0->rt_via_table = NEIGH_LINK_TABLE; memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); } - if (limit > LABEL_IPV6_EXPLICIT_NULL) { + if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; rt2 = mpls_rt_alloc(lo->addr_len); if (!rt2) @@ -854,15 +973,15 @@ static int resize_platform_label_table(struct net *net, size_t limit) memcpy(labels, old, cp_size); /* If needed set the predefined labels */ - if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) && - (limit > LABEL_IPV6_EXPLICIT_NULL)) { - RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2); + if ((old_limit <= MPLS_LABEL_IPV6NULL) && + (limit > MPLS_LABEL_IPV6NULL)) { + RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); rt2 = NULL; } - if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) && - (limit > LABEL_IPV4_EXPLICIT_NULL)) { - RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0); + if ((old_limit <= MPLS_LABEL_IPV4NULL) && + (limit > MPLS_LABEL_IPV4NULL)) { + RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); rt0 = NULL; } @@ -912,7 +1031,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write, return ret; } -static struct ctl_table mpls_table[] = { +static const struct ctl_table mpls_table[] = { { .procname = "platform_labels", .data = NULL, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index fb6de920..b064c34 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,16 +1,6 @@ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H -#define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ -#define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */ -#define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ -#define LABEL_IMPLICIT_NULL 3 /* RFC3032 */ -#define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ -#define LABEL_GAL 13 /* RFC5586 */ -#define LABEL_OAM_ALERT 14 /* RFC3429 */ -#define LABEL_EXTENSION 15 /* RFC7274 */ - - struct mpls_shim_hdr { __be32 label_stack_entry; }; @@ -22,6 +12,12 @@ struct mpls_entry_decoded { u8 bos; }; +struct mpls_dev { + int input_enabled; + + struct ctl_table_header *sysctl; +}; + struct sk_buff; static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 78af83b..ad9d11f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4340,7 +4340,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: - desc->len = sizeof(data->verdict); break; case NFT_JUMP: case NFT_GOTO: @@ -4355,10 +4354,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, chain->use++; data->verdict.chain = chain; - desc->len = sizeof(data); break; } + desc->len = sizeof(data->verdict); desc->type = NFT_DATA_VERDICT; return 0; } diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 57d3e1a..0522fc9 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -63,6 +63,8 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 62cabee..635dbba 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -108,6 +108,8 @@ static int nft_reject_inet_dump(struct sk_buff *skb, if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19909d0..daa0b81 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, if (data == NULL) return NULL; - skb = build_skb(data, size); + skb = __build_skb(data, size); if (skb == NULL) vfree(data); - else { - skb->head_frag = 0; + else skb->destructor = netlink_skb_destructor; - } return skb; } @@ -3141,7 +3139,6 @@ static const struct rhashtable_params netlink_rhashtable_params = { .key_len = netlink_compare_arg_len, .obj_hashfn = netlink_hash, .obj_cmpfn = netlink_compare, - .max_size = 65536, .automatic_shrinking = true, }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5102c3c..b5989c6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2311,11 +2311,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(&po->sk, hlen + tlen + sizeof(struct sockaddr_ll), - 0, &err); + !need_wait, &err); - if (unlikely(skb == NULL)) + if (unlikely(skb == NULL)) { + /* we assume the socket was initially writeable ... */ + if (likely(len_sum > 0)) + err = len_sum; goto out_status; - + } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); if (tp_len > dev->mtu + dev->hard_header_len) { diff --git a/net/rds/connection.c b/net/rds/connection.c index 14f04139..da6da57 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -126,7 +126,10 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, struct rds_transport *loop_trans; unsigned long flags; int ret; + struct rds_transport *otrans = trans; + if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP) + goto new_conn; rcu_read_lock(); conn = rds_conn_lookup(head, laddr, faddr, trans); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && @@ -142,6 +145,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, if (conn) goto out; +new_conn: conn = kmem_cache_zalloc(rds_conn_slab, gfp); if (!conn) { conn = ERR_PTR(-ENOMEM); @@ -230,13 +234,22 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, /* Creating normal conn */ struct rds_connection *found; - found = rds_conn_lookup(head, laddr, faddr, trans); + if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP) + found = NULL; + else + found = rds_conn_lookup(head, laddr, faddr, trans); if (found) { trans->conn_free(conn->c_transport_data); kmem_cache_free(rds_conn_slab, conn); conn = found; } else { - hlist_add_head_rcu(&conn->c_hash_node, head); + if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) || + (otrans->t_type != RDS_TRANS_TCP)) { + /* Only the active side should be added to + * reconnect list for TCP. + */ + hlist_add_head_rcu(&conn->c_hash_node, head); + } rds_cong_add_conn(conn); rds_conn_count++; } diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 31b74f5..8a09ee7 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -183,8 +183,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ - if (dp && dp->dp_ack_seq) - rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); + if (dp) { + /* dp structure start is not guaranteed to be 8 bytes aligned. + * Since dp_ack_seq is 64-bit extended load operations can be + * used so go through get_unaligned to avoid unaligned errors. + */ + __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq); + + if (dp_ack_seq) + rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq), + NULL); + } rds_connect_complete(conn); } diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index f9f564a..973109c7 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -62,6 +62,7 @@ void rds_tcp_state_change(struct sock *sk) case TCP_ESTABLISHED: rds_connect_complete(conn); break; + case TCP_CLOSE_WAIT: case TCP_CLOSE: rds_conn_drop(conn); default: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 23ab4dcd..0da49e3 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -45,12 +45,45 @@ static void rds_tcp_accept_worker(struct work_struct *work); static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker); static struct socket *rds_tcp_listen_sock; +static int rds_tcp_keepalive(struct socket *sock) +{ + /* values below based on xs_udp_default_timeout */ + int keepidle = 5; /* send a probe 'keepidle' secs after last data */ + int keepcnt = 5; /* number of unack'ed probes before declaring dead */ + int keepalive = 1; + int ret = 0; + + ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&keepalive, sizeof(keepalive)); + if (ret < 0) + goto bail; + + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, + (char *)&keepcnt, sizeof(keepcnt)); + if (ret < 0) + goto bail; + + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, + (char *)&keepidle, sizeof(keepidle)); + if (ret < 0) + goto bail; + + /* KEEPINTVL is the interval between successive probes. We follow + * the model in xs_tcp_finish_connecting() and re-use keepidle. + */ + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, + (char *)&keepidle, sizeof(keepidle)); +bail: + return ret; +} + static int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; struct rds_connection *conn; int ret; struct inet_sock *inet; + struct rds_tcp_connection *rs_tcp; ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, &new_sock); @@ -63,6 +96,10 @@ static int rds_tcp_accept_one(struct socket *sock) if (ret < 0) goto out; + ret = rds_tcp_keepalive(new_sock); + if (ret < 0) + goto out; + rds_tcp_tune(new_sock); inet = inet_sk(new_sock->sk); @@ -77,6 +114,15 @@ static int rds_tcp_accept_one(struct socket *sock) ret = PTR_ERR(conn); goto out; } + /* An incoming SYN request came in, and TCP just accepted it. + * We always create a new conn for listen side of TCP, and do not + * add it to the c_hash_list. + * + * If the client reboots, this conn will need to be cleaned up. + * rds_tcp_state_change() will do that cleanup + */ + rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; + WARN_ON(!rs_tcp || rs_tcp->t_sock); /* * see the comment above rds_queue_delayed_reconnect() diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8e47251..295d14b 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -63,7 +63,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, skb->mark = c->mark; /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; - nf_ct_put(c); goto out; } @@ -82,7 +81,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, nf_ct_put(c); out: - skb->nfct = NULL; spin_unlock(&ca->tcf_lock); return ca->tcf_action; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8b0470e..b6ef9a0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -308,12 +308,11 @@ replay: case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); if (err == 0) { - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); - if (tcf_destroy(tp, false)) { - struct tcf_proto *next = rtnl_dereference(tp->next); + struct tcf_proto *next = rtnl_dereference(tp->next); + tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); - } } goto errout; case RTM_GETTFILTER: diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index de28f8e..7a0bdb1 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -164,7 +164,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = DEFAULT_CODEL_LIMIT; - codel_params_init(&q->params); + codel_params_init(&q->params, sch); codel_vars_init(&q->vars); codel_stats_init(&q->stats); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 1e52dec..c244c45b 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -391,7 +391,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) q->perturbation = prandom_u32(); INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); - codel_params_init(&q->cparams); + codel_params_init(&q->cparams, sch); codel_stats_init(&q->cstats); q->cparams.ecn = true; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index a4ca451..634529e 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -229,7 +229,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) break; } - if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { + if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) { q->backlog += qdisc_pkt_len(skb); return qdisc_enqueue_tail(skb, sch); } @@ -553,7 +553,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.limit = q->limit; opt.DP = q->DP; - opt.backlog = q->backlog; + opt.backlog = gred_backlog(table, q, sch); opt.prio = q->prio; opt.qth_min = q->parms.qth_min >> q->parms.Wlog; opt.qth_max = q->parms.qth_max >> q->parms.Wlog; diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 1ec19f6..eeeba5a 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -793,20 +793,26 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, { u32 value_follows; int err; + struct page *scratch; + + scratch = alloc_page(GFP_KERNEL); + if (!scratch) + return -ENOMEM; + xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE); /* res->status */ err = gssx_dec_status(xdr, &res->status); if (err) - return err; + goto out_free; /* res->context_handle */ err = gssx_dec_bool(xdr, &value_follows); if (err) - return err; + goto out_free; if (value_follows) { err = gssx_dec_ctx(xdr, res->context_handle); if (err) - return err; + goto out_free; } else { res->context_handle = NULL; } @@ -814,11 +820,11 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, /* res->output_token */ err = gssx_dec_bool(xdr, &value_follows); if (err) - return err; + goto out_free; if (value_follows) { err = gssx_dec_buffer(xdr, res->output_token); if (err) - return err; + goto out_free; } else { res->output_token = NULL; } @@ -826,14 +832,17 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, /* res->delegated_cred_handle */ err = gssx_dec_bool(xdr, &value_follows); if (err) - return err; + goto out_free; if (value_follows) { /* we do not support upcall servers sending this data. */ - return -EINVAL; + err = -EINVAL; + goto out_free; } /* res->options */ err = gssx_dec_option_array(xdr, &res->options); +out_free: + __free_page(scratch); return err; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3613e72..70e3dac 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -591,14 +591,14 @@ void tipc_bearer_stop(struct net *net) /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, - struct tipc_bearer *bearer) + struct tipc_bearer *bearer, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_BEARER_GET); + nlflags, TIPC_NL_BEARER_GET); if (!hdr) return -EMSGSIZE; @@ -657,7 +657,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!bearer) continue; - err = __tipc_nl_add_bearer(&msg, bearer); + err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); if (err) break; } @@ -705,7 +705,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_bearer(&msg, bearer); + err = __tipc_nl_add_bearer(&msg, bearer, 0); if (err) goto err_out; rtnl_unlock(); @@ -857,14 +857,14 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) } static int __tipc_nl_add_media(struct tipc_nl_msg *msg, - struct tipc_media *media) + struct tipc_media *media, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_MEDIA_GET); + nlflags, TIPC_NL_MEDIA_GET); if (!hdr) return -EMSGSIZE; @@ -916,7 +916,8 @@ int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); for (; media_info_array[i] != NULL; i++) { - err = __tipc_nl_add_media(&msg, media_info_array[i]); + err = __tipc_nl_add_media(&msg, media_info_array[i], + NLM_F_MULTI); if (err) break; } @@ -963,7 +964,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_media(&msg, media); + err = __tipc_nl_add_media(&msg, media, 0); if (err) goto err_out; rtnl_unlock(); diff --git a/net/tipc/link.c b/net/tipc/link.c index a6b30df..43a515d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1145,11 +1145,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) } /* Synchronize with parallel link if applicable */ if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { - link_handle_out_of_seq_msg(l_ptr, skb); - if (link_synch(l_ptr)) - link_retrieve_defq(l_ptr, &head); - skb = NULL; - goto unlock; + if (!link_synch(l_ptr)) + goto unlock; } l_ptr->next_in_no++; if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) @@ -2013,7 +2010,7 @@ msg_full: /* Caller should hold appropriate locks to protect the link */ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, - struct tipc_link *link) + struct tipc_link *link, int nlflags) { int err; void *hdr; @@ -2022,7 +2019,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_LINK_GET); + nlflags, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; @@ -2095,7 +2092,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, if (!node->links[i]) continue; - err = __tipc_nl_add_link(net, msg, node->links[i]); + err = __tipc_nl_add_link(net, msg, node->links[i], NLM_F_MULTI); if (err) return err; } @@ -2143,7 +2140,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); - tipc_node_put(node); if (err) goto out; @@ -2210,7 +2206,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_link(net, &msg, link); + err = __tipc_nl_add_link(net, &msg, link, 0); if (err) goto err_out; diff --git a/net/tipc/server.c b/net/tipc/server.c index ab6183c..77ff03e 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -102,7 +102,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - sk_release_kernel(sk); + sock_release(sock); con->sock = NULL; } @@ -321,12 +321,9 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); + ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); if (ret < 0) return NULL; - - sk_change_net(sock->sk, s->net); - ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, (char *)&s->imp, sizeof(s->imp)); if (ret < 0) @@ -376,7 +373,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) create_err: kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); return NULL; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ee90d74..9074b5c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1764,13 +1764,14 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { u32 dnode, dport = 0; - int err = -TIPC_ERR_NO_PORT; + int err; struct sk_buff *skb; struct tipc_sock *tsk; struct tipc_net *tn; struct sock *sk; while (skb_queue_len(inputq)) { + err = -TIPC_ERR_NO_PORT; skb = NULL; dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 99f7012..a73a226 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; - struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = file_inode(filp); - /* - * Socket ? - */ + /* Socket ? */ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { struct socket *sock = SOCKET_I(inode); struct sock *s = sock->sk; - /* - * PF_UNIX ? - */ + /* PF_UNIX ? */ if (s && sock->ops && sock->ops->family == PF_UNIX) u_sock = s; } return u_sock; } -/* - * Keep the number of times in flight count for the file - * descriptor if it is for an AF_UNIX socket. +/* Keep the number of times in flight count for the file + * descriptor if it is for an AF_UNIX socket. */ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + if (s) { struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); + if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -142,10 +139,13 @@ void unix_inflight(struct file *fp) void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + if (s) { struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); + if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; @@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), spin_lock(&x->sk_receive_queue.lock); skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { - /* - * Do we have file descriptors ? - */ + /* Do we have file descriptors ? */ if (UNIXCB(skb).fp) { bool hit = false; - /* - * Process the descriptors of this socket - */ + /* Process the descriptors of this socket */ int nfd = UNIXCB(skb).fp->count; struct file **fp = UNIXCB(skb).fp->fp; + while (nfd--) { - /* - * Get the socket the fd matches - * if it indeed does so - */ + /* Get the socket the fd matches if it indeed does so */ struct sock *sk = unix_get_socket(*fp++); + if (sk) { struct unix_sock *u = unix_sk(sk); - /* - * Ignore non-candidates, they could + /* Ignore non-candidates, they could * have been added to the queues after * starting the garbage collection */ if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { hit = true; + func(u); } } @@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), static void scan_children(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) { - if (x->sk_state != TCP_LISTEN) + if (x->sk_state != TCP_LISTEN) { scan_inflight(x, func, hitlist); - else { + } else { struct sk_buff *skb; struct sk_buff *next; struct unix_sock *u; LIST_HEAD(embryos); - /* - * For a listening socket collect the queued embryos + /* For a listening socket collect the queued embryos * and perform a scan on them as well. */ spin_lock(&x->sk_receive_queue.lock); skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { u = unix_sk(skb->sk); - /* - * An embryo cannot be in-flight, so it's safe + /* An embryo cannot be in-flight, so it's safe * to use the list link. */ BUG_ON(!list_empty(&u->link)); @@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk) static void inc_inflight_move_tail(struct unix_sock *u) { atomic_long_inc(&u->inflight); - /* - * If this still might be part of a cycle, move it to the end + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over */ @@ -263,8 +255,7 @@ static bool gc_in_progress; void wait_for_unix_gc(void) { - /* - * If number of inflight sockets is insane, + /* If number of inflight sockets is insane, * force a garbage collect right now. */ if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) @@ -288,8 +279,7 @@ void unix_gc(void) goto out; gc_in_progress = true; - /* - * First, select candidates for garbage collection. Only + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. * @@ -320,15 +310,13 @@ void unix_gc(void) } } - /* - * Now remove all internal in-flight reference to children of + /* Now remove all internal in-flight reference to children of * the candidates. */ list_for_each_entry(u, &gc_candidates, link) scan_children(&u->sk, dec_inflight, NULL); - /* - * Restore the references for children of all candidates, + /* Restore the references for children of all candidates, * which have remaining references. Do this recursively, so * only those remain, which form cyclic references. * @@ -350,8 +338,7 @@ void unix_gc(void) } list_del(&cursor); - /* - * not_cycle_list contains those sockets which do not make up a + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ while (!list_empty(¬_cycle_list)) { @@ -360,8 +347,7 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* - * Now gc_candidates contains only garbage. Restore original + /* Now gc_candidates contains only garbage. Restore original * inflight counters for these as well, and remove the skbuffs * which are creating the cycle(s). */ |