diff options
Diffstat (limited to 'net')
128 files changed, 1287 insertions, 712 deletions
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 3315b9a..4026f19 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -32,10 +32,21 @@ #include "bat_v_elp.h" #include "bat_v_ogm.h" +#include "hard-interface.h" #include "hash.h" #include "originator.h" #include "packet.h" +static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) +{ + /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can + * set the interface as ACTIVE right away, without any risk of race + * condition + */ + if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) + hard_iface->if_status = BATADV_IF_ACTIVE; +} + static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) { int ret; @@ -274,6 +285,7 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", + .bat_iface_activate = batadv_v_iface_activate, .bat_iface_enable = batadv_v_iface_enable, .bat_iface_disable = batadv_v_iface_disable, .bat_iface_update_mac = batadv_v_iface_update_mac, diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index e96d7c7..3e6b262 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -568,6 +568,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT + * @vid: VLAN identifier * * An originator O is selected if and only if its DHT_ID value is one of three * closest values (from the LEFT, with wrap around if needed) then the hash @@ -576,7 +577,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * -batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, + unsigned short vid) { int select; batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; @@ -592,7 +594,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) return NULL; dat.ip = ip_dst; - dat.vid = 0; + dat.vid = vid; ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, BATADV_DAT_ADDR_MAX); @@ -612,6 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key + * @vid: VLAN identifier * @packet_subtype: unicast4addr packet subtype to use * * This function copies the skb with pskb_copy() and is sent as unicast packet @@ -622,7 +625,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, - int packet_subtype) + unsigned short vid, int packet_subtype) { int i; bool ret = false; @@ -631,7 +634,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *tmp_skb; struct batadv_dat_candidate *cand; - cand = batadv_dat_select_candidates(bat_priv, ip); + cand = batadv_dat_select_candidates(bat_priv, ip, vid); if (!cand) goto out; @@ -1022,7 +1025,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, ret = true; } else { /* Send the request to the DHT */ - ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_GET); } out: @@ -1150,8 +1153,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply */ - batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); } /** diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b22b277..0a7deaf2 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -407,6 +407,9 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) batadv_update_min_mtu(hard_iface->soft_iface); + if (bat_priv->bat_algo_ops->bat_iface_activate) + bat_priv->bat_algo_ops->bat_iface_activate(hard_iface); + out: if (primary_if) batadv_hardif_put(primary_if); @@ -572,8 +575,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if = NULL; - if (hard_iface->if_status == BATADV_IF_ACTIVE) - batadv_hardif_deactivate_interface(hard_iface); + batadv_hardif_deactivate_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index e4cbb07..c355a82 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -250,7 +250,6 @@ static void batadv_neigh_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; - struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_algo_ops *bao; @@ -262,13 +261,7 @@ static void batadv_neigh_node_release(struct kref *ref) batadv_neigh_ifinfo_put(neigh_ifinfo); } - hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, - neigh_node->addr); - if (hardif_neigh) { - /* batadv_hardif_neigh_get() increases refcount too */ - batadv_hardif_neigh_put(hardif_neigh); - batadv_hardif_neigh_put(hardif_neigh); - } + batadv_hardif_neigh_put(neigh_node->hardif_neigh); if (bao->bat_neigh_free) bao->bat_neigh_free(neigh_node); @@ -663,6 +656,11 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, ether_addr_copy(neigh_node->addr, neigh_addr); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; + neigh_node->last_seen = jiffies; + + /* increment unique neighbor refcount */ + kref_get(&hardif_neigh->refcount); + neigh_node->hardif_neigh = hardif_neigh; /* extra reference for return */ kref_init(&neigh_node->refcount); @@ -672,9 +670,6 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); spin_unlock_bh(&orig_node->neigh_list_lock); - /* increment unique neighbor refcount */ - kref_get(&hardif_neigh->refcount); - batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 4dd646a..b781bf7 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -105,6 +105,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_put(orig_ifinfo); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 3ce06e0..7641785 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -675,6 +675,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->bcast_queue_left); + batadv_forw_packet_free(forw_packet); } } @@ -702,6 +705,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + batadv_forw_packet_free(forw_packet); } } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0710379..8a136b6 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -408,11 +408,17 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) + goto dropped; + vhdr = (struct vlan_ethhdr *)skb->data; if (vhdr->h_vlan_encapsulated_proto != ethertype) @@ -424,8 +430,6 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) - goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); /* should not be necessary anymore as we use skb_pull_rcsum() diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 0b43e86..9b4551a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -215,6 +215,8 @@ static void batadv_tt_local_entry_release(struct kref *ref) tt_local_entry = container_of(ref, struct batadv_tt_local_entry, common.refcount); + batadv_softif_vlan_put(tt_local_entry->vlan); + kfree_rcu(tt_local_entry, common.rcu); } @@ -673,6 +675,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, kref_get(&tt_local->common.refcount); tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; + tt_local->vlan = vlan; /* the batman interface mac and multicast addresses should never be * purged @@ -991,7 +994,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; - struct batadv_softif_vlan *vlan; struct hlist_head *head; unsigned short vid; u32 i; @@ -1027,14 +1029,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) last_seen_msecs = last_seen_msecs % 1000; no_purge = tt_common_entry->flags & np_flag; - - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan) { - seq_printf(seq, "Cannot retrieve VLAN %d\n", - BATADV_PRINT_VID(vid)); - continue; - } - seq_printf(seq, " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, @@ -1052,9 +1046,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), no_purge ? 0 : last_seen_secs, no_purge ? 0 : last_seen_msecs, - vlan->tt.crc); - - batadv_softif_vlan_put(vlan); + tt_local->vlan->tt.crc); } rcu_read_unlock(); } @@ -1099,7 +1091,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, { struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - struct batadv_softif_vlan *vlan; void *tt_entry_exists; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); @@ -1139,14 +1130,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, /* extra call to free the local tt entry */ batadv_tt_local_entry_put(tt_local_entry); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan) - goto out; - - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - out: if (tt_local_entry) batadv_tt_local_entry_put(tt_local_entry); @@ -1219,7 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; - struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; u32 i; @@ -1241,14 +1223,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) struct batadv_tt_local_entry, common); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, - tt_common_entry->vid); - if (vlan) { - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - } - batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); @@ -3309,7 +3283,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; - struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -3339,13 +3312,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_tt_local_entry, common); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); - if (vlan) { - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - } - batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9abfb3e..1e47fbe 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -433,6 +433,7 @@ struct batadv_hardif_neigh_node { * @ifinfo_lock: lock protecting private ifinfo members and list * @if_incoming: pointer to incoming hard-interface * @last_seen: when last packet via this neighbor was received + * @hardif_neigh: hardif_neigh of this neighbor * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ @@ -444,6 +445,7 @@ struct batadv_neigh_node { spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ struct batadv_hard_iface *if_incoming; unsigned long last_seen; + struct batadv_hardif_neigh_node *hardif_neigh; struct kref refcount; struct rcu_head rcu; }; @@ -1073,10 +1075,12 @@ struct batadv_tt_common_entry { * struct batadv_tt_local_entry - translation table local entry data * @common: general translation table data * @last_seen: timestamp used for purging stale tt local entries + * @vlan: soft-interface vlan of the entry */ struct batadv_tt_local_entry { struct batadv_tt_common_entry common; unsigned long last_seen; + struct batadv_softif_vlan *vlan; }; /** @@ -1250,6 +1254,8 @@ struct batadv_forw_packet { * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list * @name: name of the algorithm + * @bat_iface_activate: start routing mechanisms when hard-interface is brought + * up * @bat_iface_enable: init routing info when hard-interface is enabled * @bat_iface_disable: de-init routing info when hard-interface is disabled * @bat_iface_update_mac: (re-)init mac addresses of the protocol information @@ -1277,6 +1283,7 @@ struct batadv_forw_packet { struct batadv_algo_ops { struct hlist_node list; char *name; + void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface); int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 263b4de..60a3dbf 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include <asm/uaccess.h> #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 253bc77..7dbc80d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -61,6 +61,19 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) e->flags |= MDB_FLAGS_OFFLOAD; } +static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip) +{ + memset(ip, 0, sizeof(struct br_ip)); + ip->vid = entry->vid; + ip->proto = entry->addr.proto; + if (ip->proto == htons(ETH_P_IP)) + ip->u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip->u.ip6 = entry->addr.u.ip6; +#endif +} + static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { @@ -243,9 +256,45 @@ static inline size_t rtnl_mdb_nlmsg_size(void) + nla_total_size(sizeof(struct br_mdb_entry)); } -static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, - int type, struct net_bridge_port_group *pg) +struct br_mdb_complete_info { + struct net_bridge_port *port; + struct br_ip ip; +}; + +static void br_mdb_complete(struct net_device *dev, int err, void *priv) { + struct br_mdb_complete_info *data = priv; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port *port = data->port; + struct net_bridge *br = port->br; + + if (err) + goto err; + + spin_lock_bh(&br->multicast_lock); + mdb = mlock_dereference(br->mdb, br); + mp = br_mdb_ip_get(mdb, &data->ip); + if (!mp) + goto out; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + p->flags |= MDB_PG_FLAGS_OFFLOAD; + } +out: + spin_unlock_bh(&br->multicast_lock); +err: + kfree(priv); +} + +static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, + struct br_mdb_entry *entry, int type) +{ + struct br_mdb_complete_info *complete_info; struct switchdev_obj_port_mdb mdb = { .obj = { .id = SWITCHDEV_OBJ_ID_PORT_MDB, @@ -268,9 +317,14 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, mdb.obj.orig_dev = port_dev; if (port_dev && type == RTM_NEWMDB) { - err = switchdev_port_obj_add(port_dev, &mdb.obj); - if (!err && pg) - pg->flags |= MDB_PG_FLAGS_OFFLOAD; + complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); + if (complete_info) { + complete_info->port = p; + __mdb_entry_to_br_ip(entry, &complete_info->ip); + mdb.obj.complete_priv = complete_info; + mdb.obj.complete = br_mdb_complete; + switchdev_port_obj_add(port_dev, &mdb.obj); + } } else if (port_dev && type == RTM_DELMDB) { switchdev_port_obj_del(port_dev, &mdb.obj); } @@ -291,21 +345,21 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } -void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, - int type) +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type, u8 flags) { struct br_mdb_entry entry; memset(&entry, 0, sizeof(entry)); - entry.ifindex = pg->port->dev->ifindex; - entry.addr.proto = pg->addr.proto; - entry.addr.u.ip4 = pg->addr.u.ip4; + entry.ifindex = port->dev->ifindex; + entry.addr.proto = group->proto; + entry.addr.u.ip4 = group->u.ip4; #if IS_ENABLED(CONFIG_IPV6) - entry.addr.u.ip6 = pg->addr.u.ip6; + entry.addr.u.ip6 = group->u.ip6; #endif - entry.vid = pg->addr.vid; - __mdb_entry_fill_flags(&entry, pg->flags); - __br_mdb_notify(dev, &entry, type, pg); + entry.vid = group->vid; + __mdb_entry_fill_flags(&entry, flags); + __br_mdb_notify(dev, port, &entry, type); } static int nlmsg_populate_rtr_fill(struct sk_buff *skb, @@ -450,8 +504,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group, unsigned char state, - struct net_bridge_port_group **pg) + struct br_ip *group, unsigned char state) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -482,7 +535,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); - *pg = p; if (state == MDB_TEMPORARY) mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -490,8 +542,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, } static int __br_mdb_add(struct net *net, struct net_bridge *br, - struct br_mdb_entry *entry, - struct net_bridge_port_group **pg) + struct br_mdb_entry *entry) { struct br_ip ip; struct net_device *dev; @@ -509,18 +560,10 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; - memset(&ip, 0, sizeof(ip)); - ip.vid = entry->vid; - ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) - ip.u.ip4 = entry->addr.u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - else - ip.u.ip6 = entry->addr.u.ip6; -#endif + __mdb_entry_to_br_ip(entry, &ip); spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip, entry->state, pg); + ret = br_mdb_add_group(br, p, &ip, entry->state); spin_unlock_bh(&br->multicast_lock); return ret; } @@ -528,7 +571,6 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); - struct net_bridge_port_group *pg; struct net_bridge_vlan_group *vg; struct net_device *dev, *pdev; struct br_mdb_entry *entry; @@ -558,15 +600,15 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) if (br_vlan_enabled(br) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; - err = __br_mdb_add(net, br, entry, &pg); + err = __br_mdb_add(net, br, entry); if (err) break; - __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); + __br_mdb_notify(dev, p, entry, RTM_NEWMDB); } } else { - err = __br_mdb_add(net, br, entry, &pg); + err = __br_mdb_add(net, br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); + __br_mdb_notify(dev, p, entry, RTM_NEWMDB); } return err; @@ -584,15 +626,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; - memset(&ip, 0, sizeof(ip)); - ip.vid = entry->vid; - ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) - ip.u.ip4 = entry->addr.u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - else - ip.u.ip6 = entry->addr.u.ip6; -#endif + __mdb_entry_to_br_ip(entry, &ip); spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); @@ -662,12 +696,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) entry->vid = v->vid; err = __br_mdb_del(br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); + __br_mdb_notify(dev, p, entry, RTM_DELMDB); } } else { err = __br_mdb_del(br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); + __br_mdb_notify(dev, p, entry, RTM_DELMDB); } return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a4c15df..6852f3c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -283,7 +283,8 @@ static void br_multicast_del_pg(struct net_bridge *br, rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - br_mdb_notify(br->dev, p, RTM_DELMDB); + br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, + p->flags); call_rcu_bh(&p->rcu, br_multicast_free_pg); if (!mp->ports && !mp->mglist && @@ -705,7 +706,7 @@ static int br_multicast_add_group(struct net_bridge *br, if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, p, RTM_NEWMDB); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0); found: mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -1278,6 +1279,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); __be32 group; int err = 0; @@ -1288,14 +1290,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == sizeof(*ih)) { + if (skb->len == offset + sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= sizeof(*ih3)) { + } else if (skb->len >= offset + sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1356,6 +1358,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; @@ -1365,8 +1368,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == sizeof(*mld)) { - if (!pskb_may_pull(skb, sizeof(*mld))) { + if (skb->len == offset + sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } @@ -1375,7 +1378,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else { - if (!pskb_may_pull(skb, sizeof(*mld2q))) { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } @@ -1461,7 +1464,8 @@ br_multicast_leave_group(struct net_bridge *br, hlist_del_init(&p->mglist); del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, p, RTM_DELMDB); + br_mdb_notify(br->dev, port, group, RTM_DELMDB, + p->flags); if (!mp->ports && !mp->mglist && netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1b5d145..d9da857 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -560,8 +560,8 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, unsigned char flags); void br_mdb_init(void); void br_mdb_uninit(void); -void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, - int type); +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type, u8 flags); void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, int type); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e234490..9cb7044 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -582,7 +582,7 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) int err; err = switchdev_port_attr_set(br->dev, &attr); - if (err) + if (err && err != -EOPNOTSUPP) return err; br->ageing_time = t; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 67b2e27..5a61f35 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -370,7 +370,11 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0); + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); + if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) { + request_module("ebt_%s", m->u.name); + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); + } if (IS_ERR(match)) return PTR_ERR(match); m->u.match = match; @@ -1521,6 +1525,8 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; @@ -2332,6 +2338,8 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index adc8d72..77f7e7a 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -40,7 +40,8 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ -static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, +static void nft_reject_br_send_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, const struct net_device *dev, int hook) { @@ -48,7 +49,6 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _oth; - struct net *net = sock_net(oldskb->sk); if (!nft_bridge_iphdr_validate(oldskb)) return; @@ -75,7 +75,8 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, br_deliver(br_port_get_rcu(dev), nskb); } -static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, +static void nft_reject_br_send_v4_unreach(struct net *net, + struct sk_buff *oldskb, const struct net_device *dev, int hook, u8 code) { @@ -86,7 +87,6 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, void *payload; __wsum csum; u8 proto; - struct net *net = sock_net(oldskb->sk); if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; @@ -273,17 +273,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, - pkt->hook); + nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, + pkt->in, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, nft_reject_icmp_code(priv->icmp_code)); break; } diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 6b923bc..2bc5965 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -293,13 +293,9 @@ int ceph_auth_create_authorizer(struct ceph_auth_client *ac, } EXPORT_SYMBOL(ceph_auth_create_authorizer); -void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a) +void ceph_auth_destroy_authorizer(struct ceph_authorizer *a) { - mutex_lock(&ac->mutex); - if (ac->ops && ac->ops->destroy_authorizer) - ac->ops->destroy_authorizer(ac, a); - mutex_unlock(&ac->mutex); + a->destroy(a); } EXPORT_SYMBOL(ceph_auth_destroy_authorizer); diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 8c93fa8..5f836f0 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -16,7 +16,6 @@ static void reset(struct ceph_auth_client *ac) struct ceph_auth_none_info *xi = ac->private; xi->starting = true; - xi->built_authorizer = false; } static void destroy(struct ceph_auth_client *ac) @@ -39,6 +38,27 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac, + struct ceph_none_authorizer *au) +{ + void *p = au->buf; + void *const end = p + sizeof(au->buf); + int ret; + + ceph_encode_8_safe(&p, end, 1, e_range); + ret = ceph_entity_name_encode(ac->name, &p, end); + if (ret < 0) + return ret; + + ceph_encode_64_safe(&p, end, ac->global_id, e_range); + au->buf_len = p - (void *)au->buf; + dout("%s built authorizer len %d\n", __func__, au->buf_len); + return 0; + +e_range: + return -ERANGE; +} + static int build_request(struct ceph_auth_client *ac, void *buf, void *end) { return 0; @@ -57,32 +77,32 @@ static int handle_reply(struct ceph_auth_client *ac, int result, return result; } +static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a) +{ + kfree(a); +} + /* - * build an 'authorizer' with our entity_name and global_id. we can - * reuse a single static copy since it is identical for all services - * we connect to. + * build an 'authorizer' with our entity_name and global_id. it is + * identical for all services we connect to. */ static int ceph_auth_none_create_authorizer( struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth) { - struct ceph_auth_none_info *ai = ac->private; - struct ceph_none_authorizer *au = &ai->au; - void *p, *end; + struct ceph_none_authorizer *au; int ret; - if (!ai->built_authorizer) { - p = au->buf; - end = p + sizeof(au->buf); - ceph_encode_8(&p, 1); - ret = ceph_entity_name_encode(ac->name, &p, end - 8); - if (ret < 0) - goto bad; - ceph_decode_need(&p, end, sizeof(u64), bad2); - ceph_encode_64(&p, ac->global_id); - au->buf_len = p - (void *)au->buf; - ai->built_authorizer = true; - dout("built authorizer len %d\n", au->buf_len); + au = kmalloc(sizeof(*au), GFP_NOFS); + if (!au) + return -ENOMEM; + + au->base.destroy = ceph_auth_none_destroy_authorizer; + + ret = ceph_auth_none_build_authorizer(ac, au); + if (ret) { + kfree(au); + return ret; } auth->authorizer = (struct ceph_authorizer *) au; @@ -92,17 +112,6 @@ static int ceph_auth_none_create_authorizer( auth->authorizer_reply_buf_len = sizeof (au->reply_buf); return 0; - -bad2: - ret = -ERANGE; -bad: - return ret; -} - -static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a) -{ - /* nothing to do */ } static const struct ceph_auth_client_ops ceph_auth_none_ops = { @@ -114,7 +123,6 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, - .destroy_authorizer = ceph_auth_none_destroy_authorizer, }; int ceph_auth_none_init(struct ceph_auth_client *ac) @@ -127,7 +135,6 @@ int ceph_auth_none_init(struct ceph_auth_client *ac) return -ENOMEM; xi->starting = true; - xi->built_authorizer = false; ac->protocol = CEPH_AUTH_NONE; ac->private = xi; diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h index 059a3ce..6202153 100644 --- a/net/ceph/auth_none.h +++ b/net/ceph/auth_none.h @@ -12,6 +12,7 @@ */ struct ceph_none_authorizer { + struct ceph_authorizer base; char buf[128]; int buf_len; char reply_buf[0]; @@ -19,8 +20,6 @@ struct ceph_none_authorizer { struct ceph_auth_none_info { bool starting; - bool built_authorizer; - struct ceph_none_authorizer au; /* we only need one; it's static */ }; int ceph_auth_none_init(struct ceph_auth_client *ac); diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 9e43a31..a0905f0 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -565,6 +565,14 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, return -EAGAIN; } +static void ceph_x_destroy_authorizer(struct ceph_authorizer *a) +{ + struct ceph_x_authorizer *au = (void *)a; + + ceph_x_authorizer_cleanup(au); + kfree(au); +} + static int ceph_x_create_authorizer( struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth) @@ -581,6 +589,8 @@ static int ceph_x_create_authorizer( if (!au) return -ENOMEM; + au->base.destroy = ceph_x_destroy_authorizer; + ret = ceph_x_build_authorizer(ac, th, au); if (ret) { kfree(au); @@ -643,16 +653,6 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, return ret; } -static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a) -{ - struct ceph_x_authorizer *au = (void *)a; - - ceph_x_authorizer_cleanup(au); - kfree(au); -} - - static void ceph_x_reset(struct ceph_auth_client *ac) { struct ceph_x_info *xi = ac->private; @@ -770,7 +770,6 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .create_authorizer = ceph_x_create_authorizer, .update_authorizer = ceph_x_update_authorizer, .verify_authorizer_reply = ceph_x_verify_authorizer_reply, - .destroy_authorizer = ceph_x_destroy_authorizer, .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, .destroy = ceph_x_destroy, diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 40b1a3c..21a5af9 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -26,6 +26,7 @@ struct ceph_x_ticket_handler { struct ceph_x_authorizer { + struct ceph_authorizer base; struct ceph_crypto_key session_key; struct ceph_buffer *buf; unsigned int service; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1831f63..a550289 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -269,7 +269,7 @@ static void _ceph_msgr_exit(void) } BUG_ON(zero_page == NULL); - page_cache_release(zero_page); + put_page(zero_page); zero_page = NULL; ceph_msgr_slab_exit(); @@ -282,7 +282,7 @@ int ceph_msgr_init(void) BUG_ON(zero_page != NULL); zero_page = ZERO_PAGE(0); - page_cache_get(zero_page); + get_page(zero_page); /* * The number of active work items is limited by the number of @@ -1602,7 +1602,7 @@ static int write_partial_skip(struct ceph_connection *con) dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { - size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); + size_t size = min(con->out_skip, (int) PAGE_SIZE); ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); if (ret <= 0) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 32355d9d..40a53a7 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1087,10 +1087,8 @@ static void put_osd(struct ceph_osd *osd) dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), atomic_read(&osd->o_ref) - 1); if (atomic_dec_and_test(&osd->o_ref)) { - struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; - if (osd->o_auth.authorizer) - ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); + ceph_auth_destroy_authorizer(osd->o_auth.authorizer); kfree(osd); } } @@ -2984,7 +2982,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, struct ceph_auth_handshake *auth = &o->o_auth; if (force_new && auth->authorizer) { - ceph_auth_destroy_authorizer(ac, auth->authorizer); + ceph_auth_destroy_authorizer(auth->authorizer); auth->authorizer = NULL; } if (!auth->authorizer) { diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index c7c220a..6864007 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -56,7 +56,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) size_t bit = pl->room; int ret; - memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), + memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK), buf, bit); pl->length += bit; pl->room -= bit; @@ -67,7 +67,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) return ret; } - memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); + memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK), buf, len); pl->length += len; pl->room -= len; return 0; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 10297f7..00d2601 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -95,19 +95,19 @@ int ceph_copy_user_to_page_vector(struct page **pages, loff_t off, size_t len) { int i = 0; - int po = off & ~PAGE_CACHE_MASK; + int po = off & ~PAGE_MASK; int left = len; int l, bad; while (left > 0) { - l = min_t(int, PAGE_CACHE_SIZE-po, left); + l = min_t(int, PAGE_SIZE-po, left); bad = copy_from_user(page_address(pages[i]) + po, data, l); if (bad == l) return -EFAULT; data += l - bad; left -= l - bad; po += l - bad; - if (po == PAGE_CACHE_SIZE) { + if (po == PAGE_SIZE) { po = 0; i++; } @@ -121,17 +121,17 @@ void ceph_copy_to_page_vector(struct page **pages, loff_t off, size_t len) { int i = 0; - size_t po = off & ~PAGE_CACHE_MASK; + size_t po = off & ~PAGE_MASK; size_t left = len; while (left > 0) { - size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_SIZE-po, left); memcpy(page_address(pages[i]) + po, data, l); data += l; left -= l; po += l; - if (po == PAGE_CACHE_SIZE) { + if (po == PAGE_SIZE) { po = 0; i++; } @@ -144,17 +144,17 @@ void ceph_copy_from_page_vector(struct page **pages, loff_t off, size_t len) { int i = 0; - size_t po = off & ~PAGE_CACHE_MASK; + size_t po = off & ~PAGE_MASK; size_t left = len; while (left > 0) { - size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_SIZE-po, left); memcpy(data, page_address(pages[i]) + po, l); data += l; left -= l; po += l; - if (po == PAGE_CACHE_SIZE) { + if (po == PAGE_SIZE) { po = 0; i++; } @@ -168,25 +168,25 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector); */ void ceph_zero_page_vector_range(int off, int len, struct page **pages) { - int i = off >> PAGE_CACHE_SHIFT; + int i = off >> PAGE_SHIFT; - off &= ~PAGE_CACHE_MASK; + off &= ~PAGE_MASK; dout("zero_page_vector_page %u~%u\n", off, len); /* leading partial page? */ if (off) { - int end = min((int)PAGE_CACHE_SIZE, off + len); + int end = min((int)PAGE_SIZE, off + len); dout("zeroing %d %p head from %d\n", i, pages[i], (int)off); zero_user_segment(pages[i], off, end); len -= (end - off); i++; } - while (len >= PAGE_CACHE_SIZE) { + while (len >= PAGE_SIZE) { dout("zeroing %d %p len=%d\n", i, pages[i], len); - zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); - len -= PAGE_CACHE_SIZE; + zero_user_segment(pages[i], 0, PAGE_SIZE); + len -= PAGE_SIZE; i++; } /* trailing partial page? */ diff --git a/net/core/dev.c b/net/core/dev.c index b9bcbe7..5c925ac 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2802,7 +2802,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_CSUM_MASK; + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -4439,6 +4439,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/core/filter.c b/net/core/filter.c index b7177d0..ca7f832 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1149,7 +1149,8 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, + bool locked) { struct sk_filter *fp, *old_fp; @@ -1165,10 +1166,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + old_fp = rcu_dereference_protected(sk->sk_filter, locked); rcu_assign_pointer(sk->sk_filter, fp); - if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1247,7 +1246,8 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked) { struct bpf_prog *prog = __get_filter(fprog, sk); int err; @@ -1255,7 +1255,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, locked); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1263,7 +1263,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return 0; } -EXPORT_SYMBOL_GPL(sk_attach_filter); +EXPORT_SYMBOL_GPL(__sk_attach_filter); + +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); +} int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { @@ -1309,7 +1314,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); if (err < 0) { bpf_prog_put(prog); return err; @@ -1764,6 +1769,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { case offsetof(struct bpf_tunnel_key, tunnel_label): + case offsetof(struct bpf_tunnel_key, tunnel_ext): goto set_compat; case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): /* Fixup deprecated structure layouts here, so we have @@ -1849,6 +1855,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { case offsetof(struct bpf_tunnel_key, tunnel_label): + case offsetof(struct bpf_tunnel_key, tunnel_ext): case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): /* Fixup deprecated structure layouts here, so we have * a common path later on. @@ -1861,7 +1868,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) return -EINVAL; } } - if (unlikely(!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label)) + if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) || + from->tunnel_ext)) return -EINVAL; skb_dst_drop(skb); @@ -2247,7 +2255,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int sk_detach_filter(struct sock *sk) +int __sk_detach_filter(struct sock *sk, bool locked) { int ret = -ENOENT; struct sk_filter *filter; @@ -2255,8 +2263,7 @@ int sk_detach_filter(struct sock *sk) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + filter = rcu_dereference_protected(sk->sk_filter, locked); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -2265,7 +2272,12 @@ int sk_detach_filter(struct sock *sk) return ret; } -EXPORT_SYMBOL_GPL(sk_detach_filter); +EXPORT_SYMBOL_GPL(__sk_detach_filter); + +int sk_detach_filter(struct sock *sk) +{ + return __sk_detach_filter(sk, sock_owned_by_user(sk)); +} int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) diff --git a/net/core/flow.c b/net/core/flow.c index 1033725..3937b1b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work) list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); spin_unlock_bh(&xfrm->flow_cache_gc_lock); - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) + list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { flow_entry_kill(fce, xfrm); + atomic_dec(&xfrm->flow_cache_gc_count); + WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0); + } } static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, @@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, struct netns_xfrm *xfrm) { if (deleted) { + atomic_add(deleted, &xfrm->flow_cache_gc_count); fcp->hash_count -= deleted; spin_lock_bh(&xfrm->flow_cache_gc_lock); list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); @@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_count > fc->high_watermark) flow_cache_shrink(fc, fcp); + if (fcp->hash_count > 2 * fc->high_watermark || + atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) { + atomic_inc(&net->xfrm.flow_cache_genid); + flo = ERR_PTR(-ENOBUFS); + goto ret_object; + } + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->net = net; @@ -446,6 +457,7 @@ int flow_cache_init(struct net *net) INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); mutex_init(&net->xfrm.flow_flush_sem); + atomic_set(&net->xfrm.flow_cache_gc_count, 0); fc->hash_shift = 10; fc->low_watermark = 2 * flow_cache_hash_size(fc); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f206677..65763c2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -909,6 +909,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1179,14 +1180,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) return -EMSGSIZE; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d04c2d1..e561f9f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4502,13 +4502,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) + if (err) { + __skb_pull(skb, offset); return err; + } + skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; - __skb_pull(skb, offset); skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); + __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; diff --git a/net/core/sock.c b/net/core/sock.c index b67b9ae..7e73c26 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -221,7 +221,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" + "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , + "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -237,7 +238,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" + "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , + "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -253,7 +255,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" + "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , + "clock-AF_MAX" }; /* diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 607a14f..b1dc096 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1034,10 +1034,13 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); + err = -EINVAL; dev_out = init_net.loopback_dev; + if (!dev_out->dn_ptr) + goto out; + err = -EADDRNOTAVAIL; dev_hold(dev_out); if (!fld.daddr) { fld.daddr = @@ -1110,6 +1113,8 @@ source_ok: if (dev_out == NULL) goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); + if (!dn_db) + goto e_inval; /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); @@ -1151,6 +1156,8 @@ select_source: dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); + if (!dev_out->dn_ptr) + goto e_inval; fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8a9246d..63566ec 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -904,7 +904,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (!prim) { - pr_warn("%s: bug: prim == NULL\n", __func__); + /* if the device has been deleted, we don't perform + * address promotion + */ + if (!in_dev->dead) + pr_warn("%s: bug: prim == NULL\n", __func__); return; } if (iprim && iprim != prim) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d97268e..2b68418 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; fi->fib_metrics[type - 1] = val; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a0586b4..a6962cc 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -195,6 +195,17 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, u8 proto = NAPI_GRO_CB(skb)->proto; const struct net_offload **offloads; + /* We can clear the encap_mark for FOU as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + + /* Flag this frame as already having an outer encap header */ + NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -217,8 +228,6 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, int err = -ENOSYS; const struct net_offload **offloads; - udp_tunnel_gro_complete(skb, nhoff); - rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -227,6 +236,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, err = ops->callbacks.gro_complete(skb, nhoff); + skb_set_inner_mac_header(skb, nhoff); + out_unlock: rcu_read_unlock(); @@ -352,6 +363,17 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, } } + /* We can clear the encap_mark for GUE as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + + /* Flag this frame as already having an outer encap header */ + NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); @@ -392,6 +414,8 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff, err = ops->callbacks.gro_complete(skb, nhoff + guehlen); + skb_set_inner_mac_header(skb, nhoff + guehlen); + out_unlock: rcu_read_unlock(); return err; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index c47539d..6a5bd43 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -150,6 +150,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) goto out; + /* We can only support GRE_CSUM if we can track the location of + * the GRE header. In the case of FOU/GUE we cannot because the + * outer UDP header displaces the GRE header leaving us in a state + * of limbo. + */ + if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou) + goto out; + type = greh->protocol; rcu_read_lock(); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bc68ece..0d9e9d7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -470,6 +470,7 @@ static int inet_reuseport_add_sock(struct sock *sk, const struct sock *sk2, bool match_wildcard)) { + struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; struct sock *sk2; struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); @@ -479,6 +480,7 @@ static int inet_reuseport_add_sock(struct sock *sk, sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && + inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && saddr_same(sk, sk2, false)) return reuseport_add_sock(sk, sk2); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 31936d3..4cc8421 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -179,6 +179,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags) return flags; } +/* Fills in tpi and returns header length to be pulled. */ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -238,7 +239,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto, false); + return hdr_len; } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -341,7 +342,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (parse_gre_header(skb, &tpi, &csum_err) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -397,7 +398,10 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) iph->saddr, iph->daddr, tpi->key); if (tunnel) { - skb_pop_mac_header(skb); + if (tunnel->dev->type != ARPHRD_NONE) + skb_pop_mac_header(skb); + else + skb_reset_mac_header(skb); if (tunnel->collect_md) { __be16 flags; __be64 tun_id; @@ -419,6 +423,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -428,7 +433,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + hdr_len = parse_gre_header(skb, &tpi, &csum_err); + if (hdr_len < 0) + goto drop; + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) @@ -523,7 +531,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb, return ip_route_output_key(net, fl); } -static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) +static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, + __be16 proto) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -575,7 +584,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) } flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); - build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), + build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -616,7 +625,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, const struct iphdr *tnl_params; if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; } @@ -660,7 +669,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; } @@ -862,9 +871,16 @@ static void __gre_tunnel_init(struct net_device *dev) dev->hw_features |= GRE_FEATURES; if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported. */ - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. + */ + if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || + (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + /* Can use a lockless transmit, unless we generate * output sequences */ @@ -886,7 +902,7 @@ static int ipgre_tunnel_init(struct net_device *dev) netif_keep_dst(dev); dev->addr_len = 4; - if (iph->daddr) { + if (iph->daddr && !tunnel->collect_md) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { if (!iph->saddr) @@ -895,8 +911,9 @@ static int ipgre_tunnel_init(struct net_device *dev) dev->header_ops = &ipgre_header_ops; } #endif - } else + } else if (!tunnel->collect_md) { dev->header_ops = &ipgre_header_ops; + } return ip_tunnel_init(dev); } @@ -939,6 +956,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) if (flags & (GRE_VERSION|GRE_ROUTING)) return -EINVAL; + if (data[IFLA_GRE_COLLECT_METADATA] && + data[IFLA_GRE_ENCAP_TYPE] && + nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) + return -EINVAL; + return 0; } @@ -1012,6 +1034,8 @@ static void ipgre_netlink_parms(struct net_device *dev, struct ip_tunnel *t = netdev_priv(dev); t->collect_md = true; + if (dev->type == ARPHRD_IPGRE) + dev->type = ARPHRD_NONE; } } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6aad019..a69ed94 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -326,12 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (!IS_ERR(rt)) { tdev = rt->dst.dev; - dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, - fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) dev->flags |= IFF_POINTOPOINT; + + dst_cache_reset(&tunnel->dst_cache); } if (!tdev && tunnel->parms.link) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 02dd990..6165f30 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -372,8 +372,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || - nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || - nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || + nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || + nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5cf10b7..a917903 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ int err; + int mtu; if (!dst) { dev->stats.tx_carrier_errors++; @@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_count = 0; } + mtu = dst_mtu(dst); + if (skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->protocol == htons(ETH_P_IP)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + } else { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } + + dst_release(dst); + goto tx_error; + } + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index bf08192..4133b0f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -359,11 +359,12 @@ unsigned int arpt_do_table(struct sk_buff *skb, } /* All zeroes == unconditional rule. */ -static inline bool unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_entry *e) { static const struct arpt_arp uncond; - return memcmp(arp, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct arpt_entry) && + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if @@ -402,11 +403,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->arp)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -474,14 +474,12 @@ next: return 1; } -static inline int check_entry(const struct arpt_entry *e, const char *name) +static inline int check_entry(const struct arpt_entry *e) { const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) { - duprintf("arp_tables: arp check failed %p %s.\n", e, name); + if (!arp_checkentry(&e->arp)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) return -EINVAL; @@ -522,10 +520,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) struct xt_target *target; int ret; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -557,7 +551,7 @@ static bool check_underflow(const struct arpt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->arp)) + if (!unconditional(e)) return false; t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -576,9 +570,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -590,6 +586,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -598,9 +598,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -969,6 +969,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, sizeof(struct arpt_get_entries) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1233,7 +1234,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1246,7 +1248,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e, name); + ret = check_entry((struct arpt_entry *)e); if (ret) return ret; @@ -1662,6 +1664,7 @@ static int compat_get_entries(struct net *net, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index dd8c80d..8f8713b 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -81,6 +81,12 @@ static int __init arptable_filter_init(void) return ret; } + ret = arptable_filter_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&arptable_filter_net_ops); + kfree(arpfilter_ops); + } + return ret; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e53f8d6..631c100 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ipt_ip *ip) +static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; - return memcmp(ip, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ipt_entry) && + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; #undef FWINV } @@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ip)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -476,11 +476,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->ip)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -569,14 +568,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e, const char *name) +check_entry(const struct ipt_entry *e) { const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, name); + if (!ip_checkentry(&e->ip)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -666,10 +663,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -721,7 +714,7 @@ static bool check_underflow(const struct ipt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ip)) + if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -741,9 +734,11 @@ check_entry_size_and_hooks(struct ipt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -755,6 +750,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -763,9 +762,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -1157,6 +1156,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1493,7 +1493,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1506,7 +1507,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e, name); + ret = check_entry((struct ipt_entry *)e); if (ret) return ret; @@ -1935,6 +1936,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 7b8fbb35..db5b875 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -18,10 +18,10 @@ #include <net/netfilter/nf_conntrack_synproxy.h> static struct iphdr * -synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) +synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, + __be32 daddr) { struct iphdr *iph; - struct net *net = sock_net(skb->sk); skb_reset_network_header(skb); iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); @@ -40,14 +40,12 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) } static void -synproxy_send_tcp(const struct synproxy_net *snet, +synproxy_send_tcp(struct net *net, const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct iphdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { - struct net *net = nf_ct_net(snet->tmpl); - nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)nth - nskb->head; @@ -72,7 +70,7 @@ free_nskb: } static void -synproxy_send_client_synack(const struct synproxy_net *snet, +synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -91,7 +89,7 @@ synproxy_send_client_synack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -109,15 +107,16 @@ synproxy_send_client_synack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_syn(const struct synproxy_net *snet, +synproxy_send_server_syn(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *nskb; struct iphdr *iph, *niph; struct tcphdr *nth; @@ -132,7 +131,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -153,12 +152,12 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_ack(const struct synproxy_net *snet, +synproxy_send_server_ack(struct net *net, const struct ip_ct_tcp *state, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) @@ -177,7 +176,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -193,11 +192,11 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void -synproxy_send_client_ack(const struct synproxy_net *snet, +synproxy_send_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -215,7 +214,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -231,15 +230,16 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool -synproxy_recv_client_ack(const struct synproxy_net *snet, +synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); int mss; mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); @@ -255,7 +255,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); - synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + synproxy_send_server_syn(net, skb, th, opts, recv_seq); return true; } @@ -263,7 +263,8 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(par->net); + struct net *net = par->net; + struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; @@ -292,12 +293,12 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(snet, skb, th, &opts); + synproxy_send_client_synack(net, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); return NF_DROP; } @@ -308,7 +309,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(nhs->net); + struct net *net = nhs->net; + struct synproxy_net *snet = synproxy_pernet(net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; @@ -365,7 +367,7 @@ static unsigned int ipv4_synproxy_hook(void *priv, * therefore we need to add 1 to make the SYN sequence * number match the one of first SYN. */ - if (synproxy_recv_client_ack(snet, skb, th, &opts, + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq) + 1)) this_cpu_inc(snet->stats->cookie_retrans); @@ -391,12 +393,12 @@ static unsigned int ipv4_synproxy_hook(void *priv, XT_SYNPROXY_OPT_SACK_PERM); swap(opts.tsval, opts.tsecr); - synproxy_send_server_ack(snet, state, skb, th, &opts); + synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); swap(opts.tsval, opts.tsecr); - synproxy_send_client_ack(snet, skb, th, &opts); + synproxy_send_client_ack(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 02c6229..60398a9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1438,9 +1438,9 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #endif } -static struct rtable *rt_dst_alloc(struct net_device *dev, - unsigned int flags, u16 type, - bool nopolicy, bool noxfrm, bool will_cache) +struct rtable *rt_dst_alloc(struct net_device *dev, + unsigned int flags, u16 type, + bool nopolicy, bool noxfrm, bool will_cache) { struct rtable *rt; @@ -1468,6 +1468,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, return rt; } +EXPORT_SYMBOL(rt_dst_alloc); /* called in rcu_read_lock() section */ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2045,6 +2046,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ if (fi && res->prefixlen < 4) fi = NULL; + } else if ((type == RTN_LOCAL) && (orig_oif != 0) && + (orig_oif != dev_out->ifindex)) { + /* For local routes that require a particular output interface + * we do not want to cache the result. Caching the result + * causes incorrect behaviour when there are multiple source + * addresses on the interface, the end result being that if the + * intended recipient is waiting on that interface for the + * packet he won't receive it because it will be delivered on + * the loopback interface and the IP_PKTINFO ipi_ifindex will + * be set to the loopback interface as well. + */ + fi = NULL; } fnhe = NULL; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e6e65f7..c124c3c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1309,6 +1309,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); + tcp_skb_collapse_tstamp(prev, skb); tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -3098,7 +3099,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, shinfo = skb_shinfo(skb); if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1)) + !before(shinfo->tskey, prior_snd_una) && + before(shinfo->tskey, tcp_sk(sk)->snd_una)) __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d2dc01..79a03b8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2441,6 +2441,20 @@ u32 __tcp_select_window(struct sock *sk) return window; } +void tcp_skb_collapse_tstamp(struct sk_buff *skb, + const struct sk_buff *next_skb) +{ + const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); + u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; + + if (unlikely(tsflags)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + + shinfo->tx_flags |= tsflags; + shinfo->tskey = next_shinfo->tskey; + } +} + /* Collapses two adjacent SKB's during retransmission. */ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { @@ -2484,6 +2498,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb)); + tcp_skb_collapse_tstamp(skb, next_skb); + sk_wmem_free_skb(sk, next_skb); } @@ -2624,8 +2640,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(&skb->skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 08eed5e..a2e7f55 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -339,8 +339,13 @@ found: hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock(&hslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, - &hslot2->head); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_nulls_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); + else + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); hslot2->count++; spin_unlock(&hslot2->lock); } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0ed2daf..e330c0e 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -399,6 +399,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) uh->len = newlen; + /* Set encapsulation before calling into inner gro_complete() functions + * to make them set up the inner offsets. + */ + skb->encapsulation = 1; + rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); @@ -421,9 +426,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; - skb->encapsulation = 1; - skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); - return err; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 27aed1a..8ec4b30 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3176,35 +3176,9 @@ static void addrconf_gre_config(struct net_device *dev) } #endif -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -/* If the host route is cached on the addr struct make sure it is associated - * with the proper table. e.g., enslavement can change and if so the cached - * host route needs to move to the new table. - */ -static void l3mdev_check_host_rt(struct inet6_dev *idev, - struct inet6_ifaddr *ifp) -{ - if (ifp->rt) { - u32 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - - if (tb_id != ifp->rt->rt6i_table->tb6_id) { - ip6_del_rt(ifp->rt); - ifp->rt = NULL; - } - } -} -#else -static void l3mdev_check_host_rt(struct inet6_dev *idev, - struct inet6_ifaddr *ifp) -{ -} -#endif - static int fixup_permanent_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - l3mdev_check_host_rt(idev, ifp); - if (!ifp->rt) { struct rt6_info *rt; @@ -3255,6 +3229,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; @@ -3303,6 +3278,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; if (event == NETDEV_UP) { + /* restore routes for permanent addresses */ + addrconf_permanent_addr(dev); + if (!addrconf_qdisc_ok(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", @@ -3336,9 +3314,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } - /* restore routes for permanent addresses */ - addrconf_permanent_addr(dev); - switch (dev->type) { #if IS_ENABLED(CONFIG_IPV6_SIT) case ARPHRD_SIT: @@ -3413,6 +3388,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) addrconf_type_change(dev, event); break; + + case NETDEV_CHANGEUPPER: + info = ptr; + + /* flush all routes if dev is linked to or unlinked from + * an L3 master device (e.g., VRF) + */ + if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + addrconf_ifdown(dev, 0); } return NOTIFY_OK; @@ -3438,6 +3422,12 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event) ipv6_mc_unmap(idev); } +static bool addr_is_local(const struct in6_addr *addr) +{ + return ipv6_addr_type(addr) & + (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); +} + static int addrconf_ifdown(struct net_device *dev, int how) { struct net *net = dev_net(dev); @@ -3495,7 +3485,8 @@ restart: * address is retained on a down event */ if (!keep_addr || - !(ifa->flags & IFA_F_PERMANENT)) { + !(ifa->flags & IFA_F_PERMANENT) || + addr_is_local(&ifa->addr)) { hlist_del_init_rcu(&ifa->addr_lst); goto restart; } @@ -3539,17 +3530,23 @@ restart: INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + struct rt6_info *rt = NULL; + addrconf_del_dad_work(ifa); write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); - if (keep_addr && (ifa->flags & IFA_F_PERMANENT)) { + if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && + !addr_is_local(&ifa->addr)) { /* set state to skip the notifier below */ state = INET6_IFADDR_STATE_DEAD; ifa->state = 0; if (!(ifa->flags & IFA_F_NODAD)) ifa->flags |= IFA_F_TENTATIVE; + + rt = ifa->rt; + ifa->rt = NULL; } else { state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; @@ -3560,6 +3557,9 @@ restart: spin_unlock_bh(&ifa->lock); + if (rt) + ip6_del_rt(rt); + if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); @@ -5325,10 +5325,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (rt) ip6_del_rt(rt); } - dst_hold(&ifp->rt->dst); - - ip6_del_rt(ifp->rt); - + if (ifp->rt) { + dst_hold(&ifp->rt->dst); + ip6_del_rt(ifp->rt); + } rt_genid_bump_ipv6(net); break; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4281621..9dd3882 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,18 +40,114 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } +static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = sk->sk_protocol; + fl6->daddr = sk->sk_v6_daddr; + fl6->saddr = np->saddr; + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = inet->inet_dport; + fl6->fl6_sport = inet->inet_sport; + fl6->flowlabel = np->flow_label; + + if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + + if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) + fl6->flowi6_oif = np->mcast_oif; + + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); +} + +int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) +{ + struct ip6_flowlabel *flowlabel = NULL; + struct in6_addr *final_p, final; + struct ipv6_txoptions *opt; + struct dst_entry *dst; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 fl6; + int err = 0; + + if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + flowlabel = fl6_sock_lookup(sk, np->flow_label); + if (!flowlabel) + return -EINVAL; + } + ip6_datagram_flow_key_init(&fl6, sk); + + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); + final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto out; + } + + if (fix_sk_saddr) { + if (ipv6_addr_any(&np->saddr)) + np->saddr = fl6.saddr; + + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + sk->sk_v6_rcv_saddr = fl6.saddr; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } + } + + ip6_dst_store(sk, dst, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? + &np->saddr : +#endif + NULL); + +out: + fl6_sock_release(flowlabel); + return err; +} + +void ip6_datagram_release_cb(struct sock *sk) +{ + struct dst_entry *dst; + + if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return; + + rcu_read_lock(); + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || + dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + ip6_datagram_dst_update(sk, false); +} +EXPORT_SYMBOL_GPL(ip6_datagram_release_cb); + static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p, final; - struct dst_entry *dst; - struct flowi6 fl6; - struct ip6_flowlabel *flowlabel = NULL; - struct ipv6_txoptions *opt; + struct in6_addr *daddr; int addr_type; int err; + __be32 fl6_flowlabel = 0; if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) @@ -66,15 +162,8 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { - fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (!flowlabel) - return -EINVAL; - } - } + if (np->sndflow) + fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -145,7 +234,7 @@ ipv4_connected: } sk->sk_v6_daddr = *daddr; - np->flow_label = fl6.flowlabel; + np->flow_label = fl6_flowlabel; inet->inet_dport = usin->sin6_port; @@ -154,59 +243,13 @@ ipv4_connected: * destination cache for it. */ - fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = sk->sk_v6_daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - - if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - - if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl6.flowi6_oif = np->mcast_oif; - - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - - rcu_read_lock(); - opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); - final_p = fl6_update_dst(&fl6, opt, &final); - rcu_read_unlock(); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - err = 0; - if (IS_ERR(dst)) { - err = PTR_ERR(dst); + err = ip6_datagram_dst_update(sk, true); + if (err) goto out; - } - - /* source address lookup done in ip6_dst_lookup */ - - if (ipv6_addr_any(&np->saddr)) - np->saddr = fl6.saddr; - - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - sk->sk_v6_rcv_saddr = fl6.saddr; - inet->inet_rcv_saddr = LOOPBACK4_IPV6; - if (sk->sk_prot->rehash) - sk->sk_prot->rehash(sk); - } - - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? - &sk->sk_v6_daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl6.saddr, &np->saddr) ? - &np->saddr : -#endif - NULL); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: - fl6_sock_release(flowlabel); return err; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0a37ddc..0013cac 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -445,6 +445,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; + else + iif = l3mdev_master_ifindex(skb->dev); /* * Must not send error if the source does not uniquely @@ -499,9 +501,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); - dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 2ae3c4f..41f18de 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -120,8 +120,7 @@ nla_put_failure: static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { - /* No encapsulation overhead */ - return 0; + return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */ } static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9428345d..bc972e7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1090,8 +1090,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu, unsigned int flags, - const struct flowi6 *fl6) + int exthdrlen, int transhdrlen, int mtu, + unsigned int flags, const struct flowi6 *fl6) { struct sk_buff *skb; @@ -1116,7 +1116,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ - skb_reset_network_header(skb); + skb_set_network_header(skb, exthdrlen); /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; @@ -1358,7 +1358,7 @@ emsgsize: (rt->dst.dev->features & NETIF_F_UFO) && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, + hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); if (err) goto error; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index eb2ac4b..1f20345 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -252,12 +252,12 @@ static int ip6_tnl_create2(struct net_device *dev) t = netdev_priv(dev); + dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 84f9baf..86b67b7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ip6t_entry) && + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * @@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ipv6)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -488,11 +488,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && - unconditional(&e->ipv6)) || visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -581,14 +580,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ip6t_entry *e, const char *name) +check_entry(const struct ip6t_entry *e) { const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); + if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -679,10 +676,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -733,7 +726,7 @@ static bool check_underflow(const struct ip6t_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ipv6)) + if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -753,9 +746,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -767,6 +762,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -775,9 +774,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -1169,6 +1168,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1505,7 +1505,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1518,7 +1519,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e, name); + ret = check_entry((struct ip6t_entry *)e); if (ret) return ret; @@ -1944,6 +1945,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ed44663..6f32944 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -338,9 +338,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, return rt; } -static struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags) { struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); @@ -364,6 +364,7 @@ static struct rt6_info *ip6_dst_alloc(struct net *net, return rt; } +EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { @@ -1417,8 +1418,20 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { + struct dst_entry *dst; + ip6_update_pmtu(skb, sock_net(sk), mtu, sk->sk_bound_dev_if, sk->sk_mark); + + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || + dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) + return; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + ip6_datagram_dst_update(sk, false); + bh_unlock_sock(sk); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); @@ -1737,6 +1750,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 711d209..f443c6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -810,8 +810,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + fl6.flowi6_oif = oif; + } + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fd25e44..6bc5c66 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -843,8 +843,8 @@ start_lookup: flush_stack(stack, count, skb, count - 1); } else { if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); consume_skb(skb); } return 0; @@ -1539,6 +1539,7 @@ struct proto udpv6_prot = { .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .backlog_rcv = __udpv6_queue_rcv_skb, + .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index afca2eb..6edfa99 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net, memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, sizeof(udp_conf.peer_ip6)); udp_conf.use_udp6_tx_checksums = - cfg->udp6_zero_tx_checksums; + ! cfg->udp6_zero_tx_checksums; udp_conf.use_udp6_rx_checksums = - cfg->udp6_zero_rx_checksums; + ! cfg->udp6_zero_rx_checksums; } else #endif { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ec22078..42de4cc 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 6b54ff3..cd47990 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,12 +136,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -169,6 +168,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b3c52e3..8ae3ed9 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; + memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 2839811..74142d0 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -343,8 +343,10 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *chandef) { - if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) + if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) { + ieee80211_recalc_chanctx_min_def(local, ctx); return; + } WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 804575f..4220035 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1719,6 +1719,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); +enum ieee80211_sta_rx_bandwidth +ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width); +enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta); +void ieee80211_sta_set_rx_nss(struct sta_info *sta); void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 453b4e74..e1cb22c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1761,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } @@ -1847,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 5b6aec1..002244b 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -530,7 +530,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, const u8 *target_addr, *orig_addr; const u8 *da; u8 target_flags, ttl, flags; - u32 orig_sn, target_sn, lifetime, target_metric; + u32 orig_sn, target_sn, lifetime, target_metric = 0; bool reply = false; bool forward = true; bool root_is_gate; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d20bab5..861b93f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -67,6 +67,7 @@ static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ + .insecure_elasticity = true, /* Disable chain-length checks. */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), @@ -258,11 +259,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) } /* Caller must hold local->sta_mtx */ -static void sta_info_hash_add(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_add(struct ieee80211_local *local, + struct sta_info *sta) { - rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -524,7 +525,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_BLOCK_BA); /* make the station visible */ - sta_info_hash_add(local, sta); + err = sta_info_hash_add(local, sta); + if (err) + goto out_drop_sta; list_add_tail_rcu(&sta->list, &local->sta_list); @@ -557,6 +560,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_remove: sta_info_hash_del(local, sta); list_del_rcu(&sta->list); + out_drop_sta: local->num_sta--; synchronize_net(); __cleanup_single_sta(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 053f5c4..62193f4 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -377,7 +377,6 @@ DECLARE_EWMA(signal, 1024, 8) * @uploaded: set to true when sta is uploaded to the driver * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) - * @beacon_loss_count: number of times beacon loss has triggered * @rcu_head: RCU head used for freeing this station struct * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index c9eeb3f..a29ea81 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -4,7 +4,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015 - 2016 Intel Deutschland GmbH * * This file is GPLv2 as found in COPYING. */ @@ -15,6 +15,7 @@ #include <linux/rtnetlink.h> #include "ieee80211_i.h" #include "driver-ops.h" +#include "rate.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) @@ -302,7 +303,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, /* IEEE802.11ac-2013 Table E-4 */ u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; struct cfg80211_chan_def uc = sta->tdls_chandef; - enum nl80211_chan_width max_width = ieee80211_get_sta_bw(&sta->sta); + enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(sta); int i; /* only support upgrading non-narrow channels up to 80Mhz */ @@ -313,7 +314,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, if (max_width > NL80211_CHAN_WIDTH_80) max_width = NL80211_CHAN_WIDTH_80; - if (uc.width == max_width) + if (uc.width >= max_width) return; /* * Channel usage constrains in the IEEE802.11ac-2013 specification only @@ -324,6 +325,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, for (i = 0; i < ARRAY_SIZE(centers_80mhz); i++) if (abs(uc.chan->center_freq - centers_80mhz[i]) <= 30) { uc.center_freq1 = centers_80mhz[i]; + uc.center_freq2 = 0; uc.width = NL80211_CHAN_WIDTH_80; break; } @@ -332,7 +334,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, return; /* proceed to downgrade the chandef until usable or the same */ - while (uc.width > max_width && + while (uc.width > max_width || !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc, sdata->wdev.iftype)) ieee80211_chandef_downgrade(&uc); @@ -1242,18 +1244,44 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, return ret; } -static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata) +static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + enum nl80211_chan_width width; + struct ieee80211_supported_band *sband; mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (conf) { + width = conf->def.width; + sband = local->hw.wiphy->bands[conf->def.chan->band]; ctx = container_of(conf, struct ieee80211_chanctx, conf); ieee80211_recalc_chanctx_chantype(local, ctx); + + /* if width changed and a peer is given, update its BW */ + if (width != conf->def.width && sta && + test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) { + enum ieee80211_sta_rx_bandwidth bw; + + bw = ieee80211_chan_width_to_rx_bw(conf->def.width); + bw = min(bw, ieee80211_sta_cap_rx_bw(sta)); + if (bw != sta->sta.bandwidth) { + sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, sta, + IEEE80211_RC_BW_CHANGED); + /* + * if a TDLS peer BW was updated, we need to + * recalc the chandef width again, to get the + * correct chanctx min_def + */ + ieee80211_recalc_chanctx_chantype(local, ctx); + } + } + } mutex_unlock(&local->chanctx_mtx); } @@ -1350,8 +1378,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, break; } - iee80211_tdls_recalc_chanctx(sdata); - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, peer); if (!sta) { @@ -1360,6 +1386,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, break; } + iee80211_tdls_recalc_chanctx(sdata, sta); iee80211_tdls_recalc_ht_protection(sdata, sta); set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); @@ -1390,7 +1417,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, iee80211_tdls_recalc_ht_protection(sdata, NULL); mutex_unlock(&local->sta_mtx); - iee80211_tdls_recalc_chanctx(sdata); + iee80211_tdls_recalc_chanctx(sdata, NULL); break; default: ret = -ENOTSUPP; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 62ad532..21f6602 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1116,11 +1116,15 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, reset_agg_timer = true; } else { queued = true; + if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) { + clear_sta_flag(tx->sta, WLAN_STA_SP); + ps_dbg(tx->sta->sdata, + "STA %pM aid %d: SP frame queued, close the SP w/o telling the peer\n", + tx->sta->sta.addr, tx->sta->sta.aid); + } info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS | - IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_STATUS_EOSP; + info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; __skb_queue_tail(&tid_tx->pending, skb); if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER) purge_skb = __skb_dequeue(&tid_tx->pending); @@ -1247,7 +1251,8 @@ static void ieee80211_drv_tx(struct ieee80211_local *local, struct txq_info *txqi; u8 ac; - if (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE) + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) goto tx_normal; if (!ieee80211_is_data(hdr->frame_control)) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 89e04d5..e590e2e 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -319,7 +319,30 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) return IEEE80211_STA_RX_BW_80; } -static enum ieee80211_sta_rx_bandwidth +enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta) +{ + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + u32 cap_width; + + if (!vht_cap->vht_supported) { + if (!sta->sta.ht_cap.ht_supported) + return NL80211_CHAN_WIDTH_20_NOHT; + + return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20; + } + + cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + + if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) + return NL80211_CHAN_WIDTH_160; + else if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) + return NL80211_CHAN_WIDTH_80P80; + + return NL80211_CHAN_WIDTH_80; +} + +enum ieee80211_sta_rx_bandwidth ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) { switch (width) { @@ -347,10 +370,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) bw = ieee80211_sta_cap_rx_bw(sta); bw = min(bw, sta->cur_max_bandwidth); - - /* do not cap the BW of TDLS WIDER_BW peers by the bss */ - if (!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) - bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index b18c5ed..0b80a71 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -543,6 +543,9 @@ static struct net_device *find_outdev(struct net *net, if (!dev) return ERR_PTR(-ENODEV); + if (IS_ERR(dev)) + return dev; + /* The caller is holding rtnl anyways, so release the dev reference */ dev_put(dev); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index b0bc475..2e8e7e5 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -95,7 +95,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7e6568c..a748b0c 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -497,6 +497,26 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } +/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need + * a separate reference counter + */ +static inline void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + +static inline void +__ip_set_put_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + BUG_ON(set->ref_netlink == 0); + set->ref_netlink--; + write_unlock_bh(&ip_set_ref_lock); +} + /* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced @@ -1002,7 +1022,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s && s->ref) { + if (s && (s->ref || s->ref_netlink)) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1024,7 +1044,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!s) { ret = -ENOENT; goto out; - } else if (s->ref) { + } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1171,6 +1191,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; + if (from->ref_netlink || to->ref_netlink) + return -EBUSY; + strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); @@ -1206,7 +1229,7 @@ ip_set_dump_done(struct netlink_callback *cb) if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); } return 0; } @@ -1328,7 +1351,7 @@ dump_last: if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); - set->ref++; + set->ref_netlink++; } write_unlock_bh(&ip_set_ref_lock); nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, @@ -1396,7 +1419,7 @@ release_refcount: if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); cb->args[IPSET_CB_ARG0] = 0; } out: diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e5336ab..d32fd6b 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1082,7 +1082,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) goto nla_put_failure; #endif - if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 24c6c19..a2a89e4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -458,7 +458,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + n * set->dsize))) goto nla_put_failure; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index afde5f5..e27fd17 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); -static __read_mostly spinlock_t nf_conntrack_locks_all_lock; +static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) @@ -1778,6 +1778,7 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { + static atomic64_t unique_id; int ret = -ENOMEM; int cpu; @@ -1800,7 +1801,8 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu", + (u64)atomic64_inc_return(&unique_id)); if (!net->ct.slabname) goto err_slabname; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 278f3b9..7cc1d9c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -410,6 +410,8 @@ static void tcp_options(const struct sk_buff *skb, length--; continue; default: + if (length < 2) + return; opsize=*ptr++; if (opsize < 2) /* "silly options" */ return; @@ -470,6 +472,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 4c2b4c0..dbd0803 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -96,6 +96,8 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, return -EINVAL; if (flags & NFACCT_F_OVERQUOTA) return -EINVAL; + if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA]) + return -EINVAL; size += sizeof(u64); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7542999..cb5b630 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -582,7 +582,12 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, /* nfnetlink_unicast will either free the nskb or add it to a socket */ err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { - queue->queue_user_dropped++; + if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { + failopen = 1; + err = 0; + } else { + queue->queue_user_dropped++; + } goto err_out_unlock; } diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 29d2c31..daf45da 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -236,6 +236,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); + cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 215fc08..330ebd6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -688,7 +688,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->portid) { + if (nlk->portid && nlk->bound) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 234a733..ce94729 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -7,7 +7,9 @@ config OPENVSWITCH depends on INET depends on !NF_CONNTRACK || \ (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ - (!NF_NAT || NF_NAT))) + (!NF_NAT || NF_NAT) && \ + (!NF_NAT_IPV4 || NF_NAT_IPV4) && \ + (!NF_NAT_IPV6 || NF_NAT_IPV6))) select LIBCRC32C select MPLS select NET_MPLS_GSO diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e9dd47b..879185f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -461,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { - set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked, true); memcpy(&flow_key->ipv6.addr.src, masked, sizeof(flow_key->ipv6.addr.src)); @@ -483,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, NULL, &flags) != NEXTHDR_ROUTING); - set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked, recalc_csum); memcpy(&flow_key->ipv6.addr.dst, masked, sizeof(flow_key->ipv6.addr.dst)); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index dc5eb29..10c84d8 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -367,6 +367,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; + skb_orphan(skb); memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); err = nf_ct_frag6_gather(net, skb, user); if (err) @@ -535,14 +536,15 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (skb->protocol == htons(ETH_P_IP) && + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto push; -#if IS_ENABLED(CONFIG_NF_NAT_IPV6) - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + skb->protocol == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; int hdrlen = ipv6_skip_exthdr(skb, @@ -557,7 +559,6 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, err = NF_DROP; goto push; } -#endif } /* Non-ICMP, fall thru to initialize if needed. */ case IP_CT_NEW: @@ -664,11 +665,12 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, /* Determine NAT type. * Check if the NAT type can be deduced from the tracked connection. - * Make sure expected traffic is NATted only when committing. + * Make sure new expected connections (IP_CT_RELATED) are NATted only + * when committing. */ if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && ct->status & IPS_NAT_MASK && - (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) { + (ctinfo != IP_CT_RELATED || info->commit)) { /* NAT an established or related connection like before. */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) /* This is the REPLY direction for a connection @@ -774,6 +776,19 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, return -EINVAL; } + /* Userspace may decide to perform a ct lookup without a helper + * specified followed by a (recirculate and) commit with one. + * Therefore, for unconfirmed connections which we will commit, + * we need to attach the helper here. + */ + if (!nf_ct_is_confirmed(ct) && info->commit && + info->helper && !nfct_help(ct)) { + int err = __nf_ct_try_assign_helper(ct, info->ct, + GFP_ATOMIC); + if (err) + return err; + } + /* Call the helper only if: * - nf_conntrack_in() was executed above ("!cached") for a * confirmed connection, or @@ -968,7 +983,8 @@ static int parse_nat(const struct nlattr *attr, break; case OVS_NAT_ATTR_IP_MIN: - nla_memcpy(&info->range.min_addr, a, nla_len(a)); + nla_memcpy(&info->range.min_addr, a, + sizeof(info->range.min_addr)); info->range.flags |= NF_NAT_RANGE_MAP_IPS; break; @@ -1238,7 +1254,8 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, } if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { - if (info->family == NFPROTO_IPV4) { + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + info->family == NFPROTO_IPV4) { if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) || (info->range.max_addr.ip @@ -1246,8 +1263,8 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip)))) return false; -#if IS_ENABLED(CONFIG_NF_NAT_IPV6) - } else if (info->family == NFPROTO_IPV6) { + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + info->family == NFPROTO_IPV6) { if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, &info->range.min_addr.in6) || (memcmp(&info->range.max_addr.in6, @@ -1256,7 +1273,6 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, &info->range.max_addr.in6)))) return false; -#endif } else { return false; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1ecfa71..18d0bec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3521,6 +3521,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); + memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; i->next = po->mclist; po->mclist = i; @@ -4151,7 +4152,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { - WARN(1, "Tx-ring is not supported.\n"); + net_warn_ratelimited("Tx-ring is not supported.\n"); goto out; } diff --git a/net/rds/cong.c b/net/rds/cong.c index e6144b8..6641bcf 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -299,7 +299,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - __set_bit_le(off, (void *)map->m_page_addrs[i]); + set_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) @@ -313,7 +313,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - __clear_bit_le(off, (void *)map->m_page_addrs[i]); + clear_bit_le(off, (void *)map->m_page_addrs[i]); } static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 8764970..310cabc 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -194,7 +194,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); - dp->dp_ack_seq = rds_ib_piggyb_ack(ic); + dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); /* Advertise flow control */ if (ic->i_flowctl) { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 977fb86..abc8cc8 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -796,7 +796,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, addr = kmap_atomic(sg_page(&frag->f_sg)); - src = addr + frag_off; + src = addr + frag->f_sg.offset + frag_off; dst = (void *)map->m_page_addrs[map_page] + map_off; for (k = 0; k < to_copy; k += 8) { /* Record ports that became uncongested, ie diff --git a/net/rds/page.c b/net/rds/page.c index 616f21f..e2b5a58 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -135,8 +135,8 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, if (rem->r_offset != 0) rds_stats_inc(s_page_remainder_hit); - rem->r_offset += bytes; - if (rem->r_offset == PAGE_SIZE) { + rem->r_offset += ALIGN(bytes, 8); + if (rem->r_offset >= PAGE_SIZE) { __free_page(rem->r_page); rem->r_page = NULL; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 61ed2a8..86187da 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -127,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock, /* * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_CONNECTED bit protects those paths from being + * it is set. The RDS_CONN_UP bit protects those paths from being * called while it isn't set. */ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) @@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) if (!tc) return -ENOMEM; + mutex_init(&tc->t_conn_lock); tc->t_sock = NULL; tc->t_tinc = NULL; tc->t_tinc_hdr_rem = sizeof(struct rds_header); diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 64f873c..41c2283 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,10 @@ struct rds_tcp_connection { struct list_head t_tcp_node; struct rds_connection *conn; + /* t_conn_lock synchronizes the connection establishment between + * rds_tcp_accept_one and rds_tcp_conn_connect + */ + struct mutex t_conn_lock; struct socket *t_sock; void *t_orig_write_space; void *t_orig_data_ready; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 5cb1687..49a3fcf 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn) struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; + struct rds_tcp_connection *tc = conn->c_transport_data; + + mutex_lock(&tc->t_conn_lock); + if (rds_conn_up(conn)) { + mutex_unlock(&tc->t_conn_lock); + return 0; + } ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) @@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) } out: + mutex_unlock(&tc->t_conn_lock); if (sock) sock_release(sock); return ret; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 0936a4a..be263cd 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock) struct rds_connection *conn; int ret; struct inet_sock *inet; - struct rds_tcp_connection *rs_tcp; + struct rds_tcp_connection *rs_tcp = NULL; + int conn_state; + struct sock *nsk; ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, @@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock) * rds_tcp_state_change() will do that cleanup */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; - if (rs_tcp->t_sock && - ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { - struct sock *nsk = new_sock->sk; - - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; - ret = 0; - goto out; - } else if (rs_tcp->t_sock) { - rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); - conn->c_outgoing = 0; - } - rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); + mutex_lock(&rs_tcp->t_conn_lock); + conn_state = rds_conn_state(conn); + if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP) + goto rst_nsk; + if (rs_tcp->t_sock) { + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * c_transport_data. + */ + wait_event(conn->c_waitq, + !test_bit(RDS_IN_XMIT, &conn->c_flags)); + if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { + goto rst_nsk; + } else if (rs_tcp->t_sock) { + rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + conn->c_outgoing = 0; + } + } rds_tcp_set_callbacks(new_sock, conn); - rds_connect_complete(conn); + rds_connect_complete(conn); /* marks RDS_CONN_UP */ + new_sock = NULL; + ret = 0; + goto out; +rst_nsk: + /* reset the newly returned accept sock and bail */ + nsk = new_sock->sk; + rds_tcp_stats_inc(s_tcp_listen_closed_stale); + nsk->sk_user_data = NULL; + nsk->sk_prot->disconnect(nsk, 0); + tcp_done(nsk); new_sock = NULL; ret = 0; - out: + if (rs_tcp) + mutex_unlock(&rs_tcp->t_conn_lock); if (new_sock) sock_release(new_sock); return ret; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c589a9b..343d011 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -423,7 +423,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, u16 ife_type = 0; u8 *daddr = NULL; u8 *saddr = NULL; - int ret = 0; + int ret = 0, exists = 0; int err; err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy); @@ -435,25 +435,29 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + if (parm->flags & IFE_ENCODE) { /* Until we get issued the ethertype, we cant have * a default.. **/ if (!tb[TCA_IFE_TYPE]) { + if (exists) + tcf_hash_release(a, bind); pr_info("You MUST pass etherype for encoding\n"); return -EINVAL; } } - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*ife), bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - if (bind) /* dont override defaults */ - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; @@ -495,6 +499,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, NULL); if (err) { metadata_parse_err: + if (exists) + tcf_hash_release(a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 350e134..8b52700 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -96,7 +96,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, struct tcf_ipt *ipt; struct xt_entry_target *td, *t; char *tname; - int ret = 0, err; + int ret = 0, err, exists = 0; u32 hook = 0; u32 index = 0; @@ -107,18 +107,23 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (err < 0) return err; - if (tb[TCA_IPT_HOOK] == NULL) - return -EINVAL; - if (tb[TCA_IPT_TARG] == NULL) + if (tb[TCA_IPT_INDEX] != NULL) + index = nla_get_u32(tb[TCA_IPT_INDEX]); + + exists = tcf_hash_check(tn, index, a, bind); + if (exists && bind) + return 0; + + if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) return -EINVAL; - if (tb[TCA_IPT_INDEX] != NULL) - index = nla_get_u32(tb[TCA_IPT_INDEX]); - if (!tcf_hash_check(tn, index, a, bind)) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e8a760c..8f3948d 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -61,7 +61,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0; + int ret, ok_push = 0, exists = 0; if (nla == NULL) return -EINVAL; @@ -71,17 +71,27 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (tb[TCA_MIRRED_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: break; default: + if (exists) + tcf_hash_release(a, bind); return -EINVAL; } if (parm->ifindex) { dev = __dev_get_by_index(net, parm->ifindex); - if (dev == NULL) + if (dev == NULL) { + if (exists) + tcf_hash_release(a, bind); return -ENODEV; + } switch (dev->type) { case ARPHRD_TUNNEL: case ARPHRD_TUNNEL6: @@ -99,7 +109,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev = NULL; } - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { if (dev == NULL) return -EINVAL; ret = tcf_hash_create(tn, parm->index, est, a, @@ -108,9 +118,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return ret; ret = ACT_P_CREATED; } else { - if (bind) - return 0; - tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 75b2be1..3a33fb6 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -87,7 +87,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct tc_defact *parm; struct tcf_defact *d; char *defdata; - int ret = 0, err; + int ret = 0, err, exists = 0; if (nla == NULL) return -EINVAL; @@ -99,13 +99,21 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (tb[TCA_DEF_PARMS] == NULL) return -EINVAL; - if (tb[TCA_DEF_DATA] == NULL) - return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (tb[TCA_DEF_DATA] == NULL) { + if (exists) + tcf_hash_release(a, bind); + return -EINVAL; + } + defdata = nla_data(tb[TCA_DEF_DATA]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*d), bind, false); if (ret) @@ -122,8 +130,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } else { d = to_defact(a); - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cfcdbdc..69da5a8 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -69,7 +69,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL; u16 *queue_mapping = NULL; - int ret = 0, err; + int ret = 0, err, exists = 0; if (nla == NULL) return -EINVAL; @@ -96,12 +96,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, mark = nla_data(tb[TCA_SKBEDIT_MARK]); } - if (!flags) - return -EINVAL; - parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!flags) { + tcf_hash_release(a, bind); + return -EINVAL; + } + + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*d), bind, false); if (ret) @@ -111,8 +117,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(a); - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index bab8ae0..c45f926 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -77,7 +77,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, int action; __be16 push_vid = 0; __be16 push_proto = 0; - int ret = 0; + int ret = 0, exists = 0; int err; if (!nla) @@ -90,15 +90,25 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + switch (parm->v_action) { case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: - if (!tb[TCA_VLAN_PUSH_VLAN_ID]) + if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); - if (push_vid >= VLAN_VID_MASK) + if (push_vid >= VLAN_VID_MASK) { + if (exists) + tcf_hash_release(a, bind); return -ERANGE; + } if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) { push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]); @@ -114,11 +124,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } break; default: + if (exists) + tcf_hash_release(a, bind); return -EINVAL; } action = parm->v_action; - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*v), bind, false); if (ret) @@ -126,8 +138,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f18c350..80742ed 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, if (validate) skb = validate_xmit_skb_list(skb, dev); - if (skb) { + if (likely(skb)) { HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); + } else { + spin_lock(root_lock); + return qdisc_qlen(q); } spin_lock(root_lock); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9640bb3..4befe97 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) sch->q.qlen++; } +/* netem can't properly corrupt a megapacket (like we get from GSO), so instead + * when we statistically choose to corrupt one, we instead segment it, returning + * the first packet to be corrupted, and re-enqueue the remaining frames + */ +static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct sk_buff *segs; + netdev_features_t features = netif_skb_features(skb); + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) { + qdisc_reshape_fail(skb, sch); + return NULL; + } + consume_skb(skb); + return segs; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; + struct sk_buff *segs = NULL; + unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); + int nb = 0; int count = 1; + int rc = NET_XMIT_SUCCESS; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) @@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (skb_is_gso(skb)) { + segs = netem_segment(skb, sch); + if (!segs) + return NET_XMIT_DROP; + } else { + segs = skb; + } + + skb = segs; + segs = segs->next; + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) - return qdisc_drop(skb, sch); + skb_checksum_help(skb))) { + rc = qdisc_drop(skb, sch); + goto finish_segs; + } skb->data[prandom_u32() % skb_headlen(skb)] ^= 1<<(prandom_u32() % 8); @@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.requeues++; } +finish_segs: + if (segs) { + while (segs) { + skb2 = segs->next; + segs->next = NULL; + qdisc_skb_cb(segs)->pkt_len = segs->len; + last_len = segs->len; + rc = qdisc_enqueue(segs, sch); + if (rc != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(rc)) + qdisc_qstats_drop(sch); + } else { + nb++; + len += last_len; + } + segs = skb2; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + } return NET_XMIT_SUCCESS; } diff --git a/net/sctp/output.c b/net/sctp/output.c index 736c004..9844fe5 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -401,7 +401,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sk = chunk->skb->sk; /* Allocate the new skb. */ - nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); + nskb = alloc_skb(packet->size + MAX_HEADER, gfp); if (!nskb) goto nomem; @@ -523,8 +523,8 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) */ if (auth) sctp_auth_calculate_hmac(asoc, nskb, - (struct sctp_auth_chunk *)auth, - GFP_ATOMIC); + (struct sctp_auth_chunk *)auth, + gfp); /* 2) Calculate the Adler-32 checksum of the whole packet, * including the SCTP common header and all the @@ -705,7 +705,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, /* Check whether this chunk and all the rest of pending data will fit * or delay in hopes of bundling a full sized packet. */ - if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead) + if (chunk->skb->len + q->out_qlen > + transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8d3d362..084718f 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -866,8 +866,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) * sender MUST assure that at least one T3-rtx * timer is running. */ - if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) - sctp_transport_reset_timers(transport); + if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { + sctp_transport_reset_t3_rtx(transport); + transport->last_time_sent = jiffies; + } } break; @@ -924,8 +926,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) error = sctp_outq_flush_rtx(q, packet, rtx_timeout, &start_timer); - if (start_timer) - sctp_transport_reset_timers(transport); + if (start_timer) { + sctp_transport_reset_t3_rtx(transport); + transport->last_time_sent = jiffies; + } /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -1062,7 +1066,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport); + sctp_transport_reset_t3_rtx(transport); + transport->last_time_sent = jiffies; /* Only let one DATA chunk get bundled with a * COOKIE-ECHO chunk. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 7f0bf79..56f364d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3080,8 +3080,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, return SCTP_ERROR_RSRC_LOW; /* Start the heartbeat timer. */ - if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) - sctp_transport_hold(peer); + sctp_transport_reset_hb_timer(peer); asoc->new_transport = peer; break; case SCTP_PARAM_DEL_IP: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 7fe56d0..41b081a 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -69,8 +69,6 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_cmd_seq_t *commands, gfp_t gfp); -static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds, - struct sctp_transport *t); /******************************************************************** * Helper functions ********************************************************************/ @@ -367,6 +365,7 @@ void sctp_generate_heartbeat_event(unsigned long data) struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); + u32 elapsed, timeout; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -378,6 +377,16 @@ void sctp_generate_heartbeat_event(unsigned long data) goto out_unlock; } + /* Check if we should still send the heartbeat or reschedule */ + elapsed = jiffies - transport->last_time_sent; + timeout = sctp_transport_timeout(transport); + if (elapsed < timeout) { + elapsed = timeout - elapsed; + if (!mod_timer(&transport->hb_timer, jiffies + elapsed)) + sctp_transport_hold(transport); + goto out_unlock; + } + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), asoc->state, asoc->ep, asoc, @@ -507,7 +516,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, 0); /* Update the hb timer to resend a heartbeat every rto */ - sctp_cmd_hb_timer_update(commands, transport); + sctp_transport_reset_hb_timer(transport); } if (transport->state != SCTP_INACTIVE && @@ -634,11 +643,8 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds, * hold a reference on the transport to make sure none of * the needed data structures go away. */ - list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - - if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) - sctp_transport_hold(t); - } + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) + sctp_transport_reset_hb_timer(t); } static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds, @@ -669,15 +675,6 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds, } -/* Helper function to update the heartbeat timer. */ -static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds, - struct sctp_transport *t) -{ - /* Update the heartbeat timer. */ - if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) - sctp_transport_hold(t); -} - /* Helper function to handle the reception of an HEARTBEAT ACK. */ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, @@ -742,8 +739,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); /* Update the heartbeat timer. */ - if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) - sctp_transport_hold(t); + sctp_transport_reset_hb_timer(t); if (was_unconfirmed && asoc->peer.transport_count == 1) sctp_transport_immediate_rtx(t); @@ -1614,7 +1610,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_HB_TIMER_UPDATE: t = cmd->obj.transport; - sctp_cmd_hb_timer_update(commands, t); + sctp_transport_reset_hb_timer(t); break; case SCTP_CMD_HB_TIMERS_STOP: diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 9b6b48c..81b8667 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -183,7 +183,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport) +void sctp_transport_reset_t3_rtx(struct sctp_transport *transport) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -197,11 +197,18 @@ void sctp_transport_reset_timers(struct sctp_transport *transport) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); +} + +void sctp_transport_reset_hb_timer(struct sctp_transport *transport) +{ + unsigned long expires; /* When a data chunk is sent, reset the heartbeat interval. */ - if (!mod_timer(&transport->hb_timer, - sctp_transport_timeout(transport))) - sctp_transport_hold(transport); + expires = jiffies + sctp_transport_timeout(transport); + if (time_before(transport->hb_timer.expires, expires) && + !mod_timer(&transport->hb_timer, + expires + prandom_u32_max(transport->rto))) + sctp_transport_hold(transport); } /* This transport has been assigned to an association. @@ -595,13 +602,13 @@ void sctp_transport_burst_reset(struct sctp_transport *t) unsigned long sctp_transport_timeout(struct sctp_transport *trans) { /* RTO + timer slack +/- 50% of RTO */ - unsigned long timeout = (trans->rto >> 1) + prandom_u32_max(trans->rto); + unsigned long timeout = trans->rto >> 1; if (trans->state != SCTP_UNCONFIRMED && trans->state != SCTP_PF) timeout += trans->hbinterval; - return timeout + jiffies; + return timeout; } /* Reset transport variables to their initial values */ diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 8c6bc79..15612ff 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1728,8 +1728,8 @@ alloc_enc_pages(struct rpc_rqst *rqstp) return 0; } - first = snd_buf->page_base >> PAGE_CACHE_SHIFT; - last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; + first = snd_buf->page_base >> PAGE_SHIFT; + last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT; rqstp->rq_enc_pages_num = last - first + 1 + 1; rqstp->rq_enc_pages = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), @@ -1775,10 +1775,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, status = alloc_enc_pages(rqstp); if (status) return status; - first = snd_buf->page_base >> PAGE_CACHE_SHIFT; + first = snd_buf->page_base >> PAGE_SHIFT; inpages = snd_buf->pages + first; snd_buf->pages = rqstp->rq_enc_pages; - snd_buf->page_base -= first << PAGE_CACHE_SHIFT; + snd_buf->page_base -= first << PAGE_SHIFT; /* * Give the tail its own page, in case we need extra space in the * head when wrapping: diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index d94a8e1..244245b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -78,6 +78,7 @@ krb5_encrypt( memcpy(out, in, length); sg_init_one(sg, out, length); + skcipher_request_set_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, length, local_iv); @@ -115,6 +116,7 @@ krb5_decrypt( memcpy(out, in, length); sg_init_one(sg, out, length); + skcipher_request_set_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, length, local_iv); @@ -465,7 +467,7 @@ encryptor(struct scatterlist *sg, void *data) page_pos = desc->pos - outbuf->head[0].iov_len; if (page_pos >= 0 && page_pos < outbuf->page_len) { /* pages are not in place: */ - int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; + int i = (page_pos + outbuf->page_base) >> PAGE_SHIFT; in_page = desc->pages[i]; } else { in_page = sg_page(sg); @@ -946,7 +948,8 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, return PTR_ERR(hmac); } - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), + GFP_KERNEL); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); @@ -1012,7 +1015,8 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, return PTR_ERR(hmac); } - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), + GFP_KERNEL); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 71341cc..6542749 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -451,7 +451,8 @@ context_derive_keys_rc4(struct krb5_ctx *ctx) goto out_err_free_hmac; - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), + GFP_KERNEL); if (!desc) { dprintk("%s: failed to allocate hash descriptor for '%s'\n", __func__, ctx->gk5e->cksum_name); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 765088e4..a737c2d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -79,9 +79,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) len -= buf->head[0].iov_len; if (len <= buf->page_len) { unsigned int last = (buf->page_base + len - 1) - >>PAGE_CACHE_SHIFT; + >>PAGE_SHIFT; unsigned int offset = (buf->page_base + len - 1) - & (PAGE_CACHE_SIZE - 1); + & (PAGE_SIZE - 1); ptr = kmap_atomic(buf->pages[last]); pad = *(ptr + offset); kunmap_atomic(ptr); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 008c25d..553bf95 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -881,7 +881,7 @@ static ssize_t cache_downcall(struct address_space *mapping, char *kaddr; ssize_t ret = -ENOMEM; - if (count >= PAGE_CACHE_SIZE) + if (count >= PAGE_SIZE) goto out_slow; page = find_or_create_page(mapping, 0, GFP_KERNEL); @@ -892,7 +892,7 @@ static ssize_t cache_downcall(struct address_space *mapping, ret = cache_do_downcall(kaddr, buf, count, cd); kunmap(page); unlock_page(page); - page_cache_release(page); + put_page(page); return ret; out_slow: return cache_slow_downcall(buf, count, cd); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 31789ef..fc48eca 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1390,8 +1390,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int err; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; sb->s_d_op = &simple_dentry_operations; diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 2df87f7..de70c78 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -96,8 +96,8 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct if (base || xdr->page_base) { pglen -= base; base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - base &= ~PAGE_CACHE_MASK; + ppage += base >> PAGE_SHIFT; + base &= ~PAGE_MASK; } do { char *kaddr; @@ -113,7 +113,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct } } - len = PAGE_CACHE_SIZE; + len = PAGE_SIZE; kaddr = kmap_atomic(*ppage); if (base) { len -= base; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 4439ac4..6bdb386 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages); * Note: the addresses pgto_base and pgfrom_base are both calculated in * the same way: * if a memory area starts at byte 'base' in page 'pages[i]', - * then its address is given as (i << PAGE_CACHE_SHIFT) + base + * then its address is given as (i << PAGE_SHIFT) + base * Also note: pgfrom_base must be < pgto_base, but the memory areas * they point to may overlap. */ @@ -181,20 +181,20 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, pgto_base += len; pgfrom_base += len; - pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT); - pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT); + pgto = pages + (pgto_base >> PAGE_SHIFT); + pgfrom = pages + (pgfrom_base >> PAGE_SHIFT); - pgto_base &= ~PAGE_CACHE_MASK; - pgfrom_base &= ~PAGE_CACHE_MASK; + pgto_base &= ~PAGE_MASK; + pgfrom_base &= ~PAGE_MASK; do { /* Are any pointers crossing a page boundary? */ if (pgto_base == 0) { - pgto_base = PAGE_CACHE_SIZE; + pgto_base = PAGE_SIZE; pgto--; } if (pgfrom_base == 0) { - pgfrom_base = PAGE_CACHE_SIZE; + pgfrom_base = PAGE_SIZE; pgfrom--; } @@ -236,11 +236,11 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) char *vto; size_t copy; - pgto = pages + (pgbase >> PAGE_CACHE_SHIFT); - pgbase &= ~PAGE_CACHE_MASK; + pgto = pages + (pgbase >> PAGE_SHIFT); + pgbase &= ~PAGE_MASK; for (;;) { - copy = PAGE_CACHE_SIZE - pgbase; + copy = PAGE_SIZE - pgbase; if (copy > len) copy = len; @@ -253,7 +253,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) break; pgbase += copy; - if (pgbase == PAGE_CACHE_SIZE) { + if (pgbase == PAGE_SIZE) { flush_dcache_page(*pgto); pgbase = 0; pgto++; @@ -280,11 +280,11 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) char *vfrom; size_t copy; - pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT); - pgbase &= ~PAGE_CACHE_MASK; + pgfrom = pages + (pgbase >> PAGE_SHIFT); + pgbase &= ~PAGE_MASK; do { - copy = PAGE_CACHE_SIZE - pgbase; + copy = PAGE_SIZE - pgbase; if (copy > len) copy = len; @@ -293,7 +293,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) kunmap_atomic(vfrom); pgbase += copy; - if (pgbase == PAGE_CACHE_SIZE) { + if (pgbase == PAGE_SIZE) { pgbase = 0; pgfrom++; } @@ -1038,8 +1038,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, if (base < buf->page_len) { subbuf->page_len = min(buf->page_len - base, len); base += buf->page_base; - subbuf->page_base = base & ~PAGE_CACHE_MASK; - subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT]; + subbuf->page_base = base & ~PAGE_MASK; + subbuf->pages = &buf->pages[base >> PAGE_SHIFT]; len -= subbuf->page_len; base = 0; } else { @@ -1297,9 +1297,9 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, todo -= avail_here; base += buf->page_base; - ppages = buf->pages + (base >> PAGE_CACHE_SHIFT); - base &= ~PAGE_CACHE_MASK; - avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base, + ppages = buf->pages + (base >> PAGE_SHIFT); + base &= ~PAGE_MASK; + avail_page = min_t(unsigned int, PAGE_SIZE - base, avail_here); c = kmap(*ppages) + base; @@ -1383,7 +1383,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, } avail_page = min(avail_here, - (unsigned int) PAGE_CACHE_SIZE); + (unsigned int) PAGE_SIZE); } base = buf->page_len; /* align to start of tail */ } @@ -1479,9 +1479,9 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, if (page_len > len) page_len = len; len -= page_len; - page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); - i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; - thislen = PAGE_CACHE_SIZE - page_offset; + page_offset = (offset + buf->page_base) & (PAGE_SIZE - 1); + i = (offset + buf->page_base) >> PAGE_SHIFT; + thislen = PAGE_SIZE - page_offset; do { if (thislen > page_len) thislen = page_len; @@ -1492,7 +1492,7 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, page_len -= thislen; i++; page_offset = 0; - thislen = PAGE_CACHE_SIZE; + thislen = PAGE_SIZE; } while (page_len != 0); offset = 0; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8b5833c..b7e01d8 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -305,6 +305,8 @@ static void switchdev_port_attr_set_deferred(struct net_device *dev, if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", err, attr->id); + if (attr->complete) + attr->complete(dev, err, attr->complete_priv); } static int switchdev_port_attr_set_defer(struct net_device *dev, @@ -434,6 +436,8 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev, if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", err, obj->id); + if (obj->complete) + obj->complete(dev, err, obj->complete_priv); } static int switchdev_port_obj_add_defer(struct net_device *dev, @@ -502,6 +506,8 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", err, obj->id); + if (obj->complete) + obj->complete(dev, err, obj->complete_priv); } static int switchdev_port_obj_del_defer(struct net_device *dev, @@ -1079,7 +1085,7 @@ nla_put_failure: * @filter_dev: filter device * @idx: * - * Delete FDB entry from switch device. + * Dump FDB entries from switch device. */ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, diff --git a/net/tipc/core.c b/net/tipc/core.c index 03a8428..e2bdb07a 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -69,6 +69,7 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_nametbl; + INIT_LIST_HEAD(&tn->dist_queue); err = tipc_topsrv_start(net); if (err) goto out_subscr; diff --git a/net/tipc/core.h b/net/tipc/core.h index 5504d63..eff58dc 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -103,6 +103,9 @@ struct tipc_net { spinlock_t nametbl_lock; struct name_table *nametbl; + /* Name dist queue */ + struct list_head dist_queue; + /* Topology subscription server */ struct tipc_server *topsrv; atomic_t subscription_count; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index ebe9d0f..6b626a6 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -40,11 +40,6 @@ int sysctl_tipc_named_timeout __read_mostly = 2000; -/** - * struct tipc_dist_queue - queue holding deferred name table updates - */ -static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue); - struct distr_queue_item { struct distr_item i; u32 dtype; @@ -229,12 +224,31 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) kfree_rcu(p, rcu); } +/** + * tipc_dist_queue_purge - remove deferred updates from a node that went down + */ +static void tipc_dist_queue_purge(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct distr_queue_item *e, *tmp; + + spin_lock_bh(&tn->nametbl_lock); + list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { + if (e->node != addr) + continue; + list_del(&e->next); + kfree(e); + } + spin_unlock_bh(&tn->nametbl_lock); +} + void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) tipc_publ_purge(net, publ, addr); + tipc_dist_queue_purge(net, addr); } /** @@ -279,9 +293,11 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, * tipc_named_add_backlog - add a failed name table update to the backlog * */ -static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) +static void tipc_named_add_backlog(struct net *net, struct distr_item *i, + u32 type, u32 node) { struct distr_queue_item *e; + struct tipc_net *tn = net_generic(net, tipc_net_id); unsigned long now = get_jiffies_64(); e = kzalloc(sizeof(*e), GFP_ATOMIC); @@ -291,7 +307,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) e->node = node; e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout); memcpy(e, i, sizeof(*i)); - list_add_tail(&e->next, &tipc_dist_queue); + list_add_tail(&e->next, &tn->dist_queue); } /** @@ -301,10 +317,11 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) void tipc_named_process_backlog(struct net *net) { struct distr_queue_item *e, *tmp; + struct tipc_net *tn = net_generic(net, tipc_net_id); char addr[16]; unsigned long now = get_jiffies_64(); - list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { + list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { if (time_after(e->expires, now)) { if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) continue; @@ -344,7 +361,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) node = msg_orignode(msg); while (count--) { if (!tipc_update_nametbl(net, item, node, mtype)) - tipc_named_add_backlog(item, mtype, node); + tipc_named_add_backlog(net, item, mtype, node); item++; } kfree_skb(skb); diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178f..9aaa1bc 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1444,6 +1444,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int bearer_id = b->identity; struct tipc_link_entry *le; u16 bc_ack = msg_bcast_ack(hdr); + u32 self = tipc_own_addr(net); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1460,6 +1461,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) return tipc_node_bc_rcv(net, skb, bearer_id); } + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) + goto discard; + /* Locate neighboring node that sent packet */ n = tipc_node_find(net, msg_prevnode(hdr)); if (unlikely(!n)) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3dce53e..b5f1221 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1808,27 +1808,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps - * if we actually copied something out of the queue pair - * instead of just peeking ahead. - */ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there - * is nothing more to read, then modify the socket - * state. - */ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out: release_sock(sk); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 0a369bb..5621473 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -842,7 +842,7 @@ static void vmci_transport_peer_detach_cb(u32 sub_id, * qp_handle. */ if (vmci_handle_is_invalid(e_payload->handle) || - vmci_handle_is_equal(trans->qp_handle, e_payload->handle)) + !vmci_handle_is_equal(trans->qp_handle, e_payload->handle)) return; /* We don't ask for delayed CBs when we subscribe to this event (we @@ -1735,11 +1735,8 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); - if (err) - return err; - if (!skb) - return -EAGAIN; + return err; dg = (struct vmci_datagram *)skb->data; if (!dg) @@ -2154,7 +2151,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); -MODULE_VERSION("1.0.2.0-k"); +MODULE_VERSION("1.0.4.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 98c9242..056a730 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13216,7 +13216,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; - if (state != NETLINK_URELEASE) + if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 7ecd04c..997ff7b 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memset(&theirs, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); if (len < 0) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ad7f5b3..1c4ad47 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; skb_dst_force(skb); + dev_hold(skb->dev); nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; resume: + dev_put(skb->dev); + spin_lock(&x->lock); if (nexthdr <= 0) { if (nexthdr == -EBADMSG) { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ff4a91f..637387b 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); + /* Inner headers are invalid now. */ + skb->encapsulation = 0; + err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out; |