diff options
Diffstat (limited to 'net')
417 files changed, 10557 insertions, 6995 deletions
diff --git a/net/802/hippi.c b/net/802/hippi.c index 51a1f53..a97a3bd 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -172,14 +172,14 @@ EXPORT_SYMBOL(hippi_mac_addr); int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) { /* Never send broadcast/multicast ARP messages */ - p->mcast_probes = 0; + NEIGH_VAR_SET(p, MCAST_PROBES, 0); /* In IPv6 unicast probes are valid even on NBMA, * because they are encapsulated in normal IPv6 protocol. * Should be a generic flag. */ if (p->tbl->family != AF_INET6) - p->ucast_probes = 0; + NEIGH_VAR_SET(p, UCAST_PROBES, 0); return 0; } EXPORT_SYMBOL(hippi_neigh_setup_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 762896e..47c908f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -530,6 +530,23 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, + unsigned int len) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; + + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); +} + +static const struct header_ops vlan_passthru_header_ops = { + .create = vlan_passthru_hard_header, + .rebuild = dev_rebuild_header, + .parse = eth_header_parse, +}; + static struct device_type vlan_type = { .name = "vlan", }; @@ -573,7 +590,7 @@ static int vlan_dev_init(struct net_device *dev) dev->needed_headroom = real_dev->needed_headroom; if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { - dev->header_ops = real_dev->header_ops; + dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { dev->header_ops = &vlan_header_ops; diff --git a/net/Kconfig b/net/Kconfig index d334678..e411046 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -238,12 +238,19 @@ config XPS depends on SMP default y -config NETPRIO_CGROUP +config CGROUP_NET_PRIO tristate "Network priority cgroup" depends on CGROUPS ---help--- Cgroup subsystem for use in assigning processes to network priorities on - a per-interface basis + a per-interface basis. + +config CGROUP_NET_CLASSID + boolean "Network classid cgroup" + depends on CGROUPS + ---help--- + Cgroup subsystem for use as general purpose socket classid marker that is + being used in cls_cgroup and for netfilter matching. config NET_RX_BUSY_POLL boolean diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a2b480a..b9c8a6e 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -307,9 +307,9 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) hard_iface->bat_iv.ogm_buff = ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; - batadv_ogm_packet->header.packet_type = BATADV_IV_OGM; - batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; - batadv_ogm_packet->header.ttl = 2; + batadv_ogm_packet->packet_type = BATADV_IV_OGM; + batadv_ogm_packet->version = BATADV_COMPAT_VERSION; + batadv_ogm_packet->ttl = 2; batadv_ogm_packet->flags = BATADV_NO_FLAGS; batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; @@ -346,7 +346,7 @@ batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; - batadv_ogm_packet->header.ttl = BATADV_TTL; + batadv_ogm_packet->ttl = BATADV_TTL; } /* when do we schedule our own ogm to be sent */ @@ -435,7 +435,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, fwd_str, (packet_num > 0 ? "aggregated " : ""), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl, + batadv_ogm_packet->tq, batadv_ogm_packet->ttl, (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? "on" : "off"), hard_iface->net_dev->name, @@ -491,7 +491,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) /* multihomed peer assumed * non-primary OGMs are only broadcasted on their interface */ - if ((directlink && (batadv_ogm_packet->header.ttl == 1)) || + if ((directlink && (batadv_ogm_packet->ttl == 1)) || (forw_packet->own && (forw_packet->if_incoming != primary_if))) { /* FIXME: what about aggregated packets ? */ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -499,7 +499,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) (forw_packet->own ? "Sending own" : "Forwarding"), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->header.ttl, + batadv_ogm_packet->ttl, forw_packet->if_incoming->net_dev->name, forw_packet->if_incoming->net_dev->dev_addr); @@ -572,7 +572,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, */ if ((!directlink) && (!(batadv_ogm_packet->flags & BATADV_DIRECTLINK)) && - (batadv_ogm_packet->header.ttl != 1) && + (batadv_ogm_packet->ttl != 1) && /* own packets originating non-primary * interfaces leave only that interface @@ -587,7 +587,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, * interface only - we still can aggregate */ if ((directlink) && - (new_bat_ogm_packet->header.ttl == 1) && + (new_bat_ogm_packet->ttl == 1) && (forw_packet->if_incoming == if_incoming) && /* packets from direct neighbors or @@ -778,7 +778,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); uint16_t tvlv_len; - if (batadv_ogm_packet->header.ttl <= 1) { + if (batadv_ogm_packet->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); return; } @@ -798,7 +798,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, tvlv_len = ntohs(batadv_ogm_packet->tvlv_len); - batadv_ogm_packet->header.ttl--; + batadv_ogm_packet->ttl--; memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); /* apply hop penalty */ @@ -807,7 +807,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: tq: %i, ttl: %i\n", - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl); + batadv_ogm_packet->tq, batadv_ogm_packet->ttl); /* switch of primaries first hop flag when forwarding */ batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP; @@ -972,8 +972,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, spin_unlock_bh(&neigh_node->bat_iv.lq_update_lock); if (dup_status == BATADV_NO_DUP) { - orig_node->last_ttl = batadv_ogm_packet->header.ttl; - neigh_node->last_ttl = batadv_ogm_packet->header.ttl; + orig_node->last_ttl = batadv_ogm_packet->ttl; + neigh_node->last_ttl = batadv_ogm_packet->ttl; } batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node); @@ -1247,7 +1247,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, * packet in an aggregation. Here we expect that the padding * is always zero (or not 0x01) */ - if (batadv_ogm_packet->header.packet_type != BATADV_IV_OGM) + if (batadv_ogm_packet->packet_type != BATADV_IV_OGM) return; /* could be changed by schedule_own_packet() */ @@ -1267,8 +1267,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, batadv_ogm_packet->prev_sender, ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, - batadv_ogm_packet->header.ttl, - batadv_ogm_packet->header.version, has_directlink_flag); + batadv_ogm_packet->ttl, + batadv_ogm_packet->version, has_directlink_flag); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { @@ -1433,7 +1433,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, * seqno and similar ttl as the non-duplicate */ sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno); - similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl; + similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->ttl; if (is_bidirect && ((dup_status == BATADV_NO_DUP) || (sameseq && similar_ttl))) batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6c8c393..b316a4c 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -349,7 +349,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - switch (unicast_4addr_packet->u.header.packet_type) { + switch (unicast_4addr_packet->u.packet_type) { case BATADV_UNICAST: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within a UNICAST packet\n"); @@ -374,7 +374,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, break; default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } break; case BATADV_BCAST: @@ -387,7 +387,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within an unknown packet type (0x%x)\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } } diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 271d321..6ddb614 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -355,7 +355,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES, skb->len + ETH_HLEN); - packet->header.ttl--; + packet->ttl--; batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = true; @@ -444,9 +444,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb, goto out_err; /* Create one header to be copied to all fragments */ - frag_header.header.packet_type = BATADV_UNICAST_FRAG; - frag_header.header.version = BATADV_COMPAT_VERSION; - frag_header.header.ttl = BATADV_TTL; + frag_header.packet_type = BATADV_UNICAST_FRAG; + frag_header.version = BATADV_COMPAT_VERSION; + frag_header.ttl = BATADV_TTL; frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno)); frag_header.reserved = 0; frag_header.no = 0; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 29ae4ef..130cc32 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -194,7 +194,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto free_skb; } - if (icmp_header->header.packet_type != BATADV_ICMP) { + if (icmp_header->packet_type != BATADV_ICMP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); len = -EINVAL; @@ -243,9 +243,9 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, icmp_header->uid = socket_client->index; - if (icmp_header->header.version != BATADV_COMPAT_VERSION) { + if (icmp_header->version != BATADV_COMPAT_VERSION) { icmp_header->msg_type = BATADV_PARAMETER_PROBLEM; - icmp_header->header.version = BATADV_COMPAT_VERSION; + icmp_header->version = BATADV_COMPAT_VERSION; batadv_socket_add_packet(socket_client, icmp_header, packet_len); goto free_skb; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c51a5e5..1511f64 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -383,17 +383,17 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { + if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); + batadv_ogm_packet->version); goto err_free; } /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ - idx = batadv_ogm_packet->header.packet_type; + idx = batadv_ogm_packet->packet_type; ret = (*batadv_rx_handler[idx])(skb, hard_iface); if (ret == NET_RX_DROP) @@ -426,8 +426,8 @@ static void batadv_recv_handler_init(void) BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, icmph.dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, icmph.dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4); /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; @@ -1119,9 +1119,9 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, skb_reserve(skb, ETH_HLEN); tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; - unicast_tvlv_packet->header.packet_type = BATADV_UNICAST_TVLV; - unicast_tvlv_packet->header.version = BATADV_COMPAT_VERSION; - unicast_tvlv_packet->header.ttl = BATADV_TTL; + unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; + unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; + unicast_tvlv_packet->ttl = BATADV_TTL; unicast_tvlv_packet->reserved = 0; unicast_tvlv_packet->tvlv_len = htons(tvlv_len); unicast_tvlv_packet->align = 0; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index f94f287b..322dd73 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -266,7 +266,7 @@ static inline void batadv_dbg(int type __always_unused, */ static inline int batadv_compare_eth(const void *data1, const void *data2) { - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return ether_addr_equal_unaligned(data1, data2); } /** diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 351e199..511d7e1 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -722,7 +722,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, { if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) return false; - if (orig_node->last_ttl != ogm_packet->header.ttl + 1) + if (orig_node->last_ttl != ogm_packet->ttl + 1) return false; if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) return false; @@ -1082,9 +1082,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, coded_packet = (struct batadv_coded_packet *)skb_dest->data; skb_reset_mac_header(skb_dest); - coded_packet->header.packet_type = BATADV_CODED; - coded_packet->header.version = BATADV_COMPAT_VERSION; - coded_packet->header.ttl = packet1->header.ttl; + coded_packet->packet_type = BATADV_CODED; + coded_packet->version = BATADV_COMPAT_VERSION; + coded_packet->ttl = packet1->ttl; /* Info about first unicast packet */ memcpy(coded_packet->first_source, first_source, ETH_ALEN); @@ -1097,7 +1097,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, memcpy(coded_packet->second_source, second_source, ETH_ALEN); memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); coded_packet->second_crc = packet_id2; - coded_packet->second_ttl = packet2->header.ttl; + coded_packet->second_ttl = packet2->ttl; coded_packet->second_ttvn = packet2->ttvn; coded_packet->coded_len = htons(coding_len); @@ -1452,7 +1452,7 @@ bool batadv_nc_skb_forward(struct sk_buff *skb, /* We only handle unicast packets */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Try to find a coding opportunity and send the skb if one is found */ @@ -1505,7 +1505,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, /* Check for supported packet type */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Find existing nc_path or create a new */ @@ -1623,7 +1623,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, ttvn = coded_packet_tmp.second_ttvn; } else { orig_dest = coded_packet_tmp.first_orig_dest; - ttl = coded_packet_tmp.header.ttl; + ttl = coded_packet_tmp.ttl; ttvn = coded_packet_tmp.first_ttvn; } @@ -1648,9 +1648,9 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, /* Create decoded unicast packet */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.packet_type = BATADV_UNICAST; - unicast_packet->header.version = BATADV_COMPAT_VERSION; - unicast_packet->header.ttl = ttl; + unicast_packet->packet_type = BATADV_UNICAST; + unicast_packet->version = BATADV_COMPAT_VERSION; + unicast_packet->ttl = ttl; memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); unicast_packet->ttvn = ttvn; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8ab1434..803ab4b 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -41,7 +41,7 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_orig_node, hash_entry); - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return batadv_compare_eth(data1, data2); } /** diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 207459b..2dd8f24 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -155,6 +155,7 @@ enum batadv_tvlv_type { BATADV_TVLV_ROAM = 0x05, }; +#pragma pack(2) /* the destination hardware field in the ARP frame is used to * transport the claim type and the group id */ @@ -163,24 +164,20 @@ struct batadv_bla_claim_dst { uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ }; - -struct batadv_header { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; - /* the parent struct has to add a byte after the header to make - * everything 4 bytes aligned again - */ -}; +#pragma pack() /** * struct batadv_ogm_packet - ogm (routing protocol) packet - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @flags: contains routing relevant flags - see enum batadv_iv_flags * @tvlv_len: length of tvlv data following the ogm header */ struct batadv_ogm_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t flags; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -196,29 +193,51 @@ struct batadv_ogm_packet { #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) /** - * batadv_icmp_header - common ICMP header - * @header: common batman header + * batadv_icmp_header - common members among all the ICMP packets + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node * @uid: local ICMP socket identifier + * @align: not used - useful for alignment purposes only + * + * This structure is used for ICMP packets parsing only and it is never sent + * over the wire. The alignment field at the end is there to ensure that + * members are padded the same way as they are in real packets. */ struct batadv_icmp_header { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; uint8_t uid; + uint8_t align[3]; }; /** * batadv_icmp_packet - ICMP packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @reserved: not used - useful for alignment * @seqno: ICMP sequence number */ struct batadv_icmp_packet { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t reserved; __be16 seqno; }; @@ -227,13 +246,25 @@ struct batadv_icmp_packet { /** * batadv_icmp_packet_rr - ICMP RouteRecord packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @rr_cur: number of entries the rr array * @seqno: ICMP sequence number * @rr: route record array */ struct batadv_icmp_packet_rr { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t rr_cur; __be16 seqno; uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; @@ -253,8 +284,18 @@ struct batadv_icmp_packet_rr { */ #pragma pack(2) +/** + * struct batadv_unicast_packet - unicast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @ttvn: translation table version number + * @dest: originator destination of the unicast packet + */ struct batadv_unicast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t ttvn; /* destination translation table version number */ uint8_t dest[ETH_ALEN]; /* "4 bytes boundary + 2 bytes" long to make the payload after the @@ -280,7 +321,9 @@ struct batadv_unicast_4addr_packet { /** * struct batadv_frag_packet - fragmented packet - * @header: common batman packet header with type, compatversion, and ttl + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence @@ -289,7 +332,9 @@ struct batadv_unicast_4addr_packet { * @total_size: size of the merged packet */ struct batadv_frag_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; #if defined(__BIG_ENDIAN_BITFIELD) uint8_t no:4; uint8_t reserved:4; @@ -305,8 +350,19 @@ struct batadv_frag_packet { __be16 total_size; }; +/** + * struct batadv_bcast_packet - broadcast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @reserved: reserved byte for alignment + * @seqno: sequence identification + * @orig: originator of the broadcast packet + */ struct batadv_bcast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -315,11 +371,11 @@ struct batadv_bcast_packet { */ }; -#pragma pack() - /** * struct batadv_coded_packet - network coded packet - * @header: common batman packet header and ttl of first included packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: Align following fields to 2-byte boundaries * @first_source: original source of first included packet * @first_orig_dest: original destinal of first included packet @@ -334,7 +390,9 @@ struct batadv_bcast_packet { * @coded_len: length of network coded part of the payload */ struct batadv_coded_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t first_ttvn; /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */ uint8_t first_source[ETH_ALEN]; @@ -349,9 +407,13 @@ struct batadv_coded_packet { __be16 coded_len; }; +#pragma pack() + /** * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: reserved field (for packet alignment) * @src: address of the source * @dst: address of the destination @@ -359,7 +421,9 @@ struct batadv_coded_packet { * @align: 2 bytes to align the header to a 4 byte boundry */ struct batadv_unicast_tvlv_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; uint8_t dst[ETH_ALEN]; uint8_t src[ETH_ALEN]; @@ -420,13 +484,13 @@ struct batadv_tvlv_tt_vlan_data { * struct batadv_tvlv_tt_change - translation table diff data * @flags: status indicators concerning the non-mesh client (see * batadv_tt_client_flags) - * @reserved: reserved field + * @reserved: reserved field - useful for alignment purposes only * @addr: mac address of non-mesh client that triggered this tt change * @vid: VLAN identifier */ struct batadv_tvlv_tt_change { uint8_t flags; - uint8_t reserved; + uint8_t reserved[3]; uint8_t addr[ETH_ALEN]; __be16 vid; }; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index d4114d7..46278bf 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -308,7 +308,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, memcpy(icmph->dst, icmph->orig, ETH_ALEN); memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN); icmph->msg_type = BATADV_ECHO_REPLY; - icmph->header.ttl = BATADV_TTL; + icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); if (res != NET_XMIT_DROP) @@ -338,9 +338,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; /* send TTL exceeded if packet is an echo request (traceroute) */ - if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { + if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n", - icmp_packet->icmph.orig, icmp_packet->icmph.dst); + icmp_packet->orig, icmp_packet->dst); goto out; } @@ -349,7 +349,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, goto out; /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); if (!orig_node) goto out; @@ -359,11 +359,11 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; - memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN); - memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED; - icmp_packet->icmph.header.ttl = BATADV_TTL; + icmp_packet->msg_type = BATADV_TTL_EXCEEDED; + icmp_packet->ttl = BATADV_TTL; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; @@ -434,7 +434,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, return batadv_recv_my_icmp_packet(bat_priv, skb); /* TTL exceeded */ - if (icmph->header.ttl < 2) + if (icmph->ttl < 2) return batadv_recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ @@ -449,7 +449,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph = (struct batadv_icmp_header *)skb->data; /* decrement ttl */ - icmph->header.ttl--; + icmph->ttl--; /* route it */ if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) @@ -709,7 +709,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; /* TTL exceeded */ - if (unicast_packet->header.ttl < 2) { + if (unicast_packet->ttl < 2) { pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); goto out; @@ -727,9 +727,9 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.ttl--; + unicast_packet->ttl--; - switch (unicast_packet->header.packet_type) { + switch (unicast_packet->packet_type) { case BATADV_UNICAST_4ADDR: hdr_len = sizeof(struct batadv_unicast_4addr_packet); break; @@ -970,7 +970,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR; + is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); @@ -1160,7 +1160,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; - if (bcast_packet->header.ttl < 2) + if (bcast_packet->ttl < 2) goto out; orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index c83be5e..fba4dcf 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -161,11 +161,11 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, return false; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.version = BATADV_COMPAT_VERSION; + unicast_packet->version = BATADV_COMPAT_VERSION; /* batman packet type: unicast */ - unicast_packet->header.packet_type = BATADV_UNICAST; + unicast_packet->packet_type = BATADV_UNICAST; /* set unicast ttl */ - unicast_packet->header.ttl = BATADV_TTL; + unicast_packet->ttl = BATADV_TTL; /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); /* set the destination tt version number */ @@ -221,7 +221,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, goto out; uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - uc_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; + uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR; memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); uc_4addr_packet->subtype = packet_subtype; uc_4addr_packet->reserved = 0; @@ -436,7 +436,7 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; - bcast_packet->header.ttl--; + bcast_packet->ttl--; skb_reset_mac_header(newskb); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 36f0508..a8f99d1 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -264,11 +264,11 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; bcast_packet = (struct batadv_bcast_packet *)skb->data; - bcast_packet->header.version = BATADV_COMPAT_VERSION; - bcast_packet->header.ttl = BATADV_TTL; + bcast_packet->version = BATADV_COMPAT_VERSION; + bcast_packet->ttl = BATADV_TTL; /* batman packet type: broadcast */ - bcast_packet->header.packet_type = BATADV_BCAST; + bcast_packet->packet_type = BATADV_BCAST; bcast_packet->reserved = 0; /* hw address of first interface is the orig mac because only @@ -328,7 +328,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, int hdr_size, struct batadv_orig_node *orig_node) { - struct batadv_header *batadv_header = (struct batadv_header *)skb->data; + struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(soft_iface); __be16 ethertype = htons(ETH_P_BATMAN); struct vlan_ethhdr *vhdr; @@ -336,7 +336,8 @@ void batadv_interface_rx(struct net_device *soft_iface, unsigned short vid; bool is_bcast; - is_bcast = (batadv_header->packet_type == BATADV_BCAST); + batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; + is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); /* check if enough space is available for pulling, and pull */ if (!pskb_may_pull(skb, hdr_size)) @@ -345,7 +346,12 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - vid = batadv_get_vid(skb, hdr_size); + /* clean the netfilter state now that the batman-adv header has been + * removed + */ + nf_reset(skb); + + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4add57d..19bc42f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -51,7 +51,7 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_tt_common_entry, hash_entry); - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return batadv_compare_eth(data1, data2); } /** @@ -333,7 +333,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - tt_change_node->change.reserved = 0; + memset(tt_change_node->change.reserved, 0, + sizeof(tt_change_node->change.reserved)); memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); tt_change_node->change.vid = htons(common->vid); @@ -2221,7 +2222,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, ETH_ALEN); tt_change->flags = tt_common_entry->flags; tt_change->vid = htons(tt_common_entry->vid); - tt_change->reserved = 0; + memset(tt_change->reserved, 0, + sizeof(tt_change->reserved)); tt_num_entries++; tt_change++; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 56ca494..0c5866b 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,7 +31,7 @@ #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> -#define VERSION "2.17" +#define VERSION "2.18" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index e7ee531..5a5b16f 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -12,8 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _BNEP_H diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6ccc4eb..8b8b5f8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1228,7 +1228,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) /* If Connectionless Slave Broadcast master role is supported * enable all necessary events for it. */ - if (hdev->features[2][0] & 0x01) { + if (lmp_csb_master_capable(hdev)) { events[1] |= 0x40; /* Triggered Clock Capture */ events[1] |= 0x80; /* Synchronization Train Complete */ events[2] |= 0x10; /* Slave Page Response Timeout */ @@ -1238,7 +1238,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) /* If Connectionless Slave Broadcast slave role is supported * enable all necessary events for it. */ - if (hdev->features[2][0] & 0x02) { + if (lmp_csb_slave_capable(hdev)) { events[2] |= 0x01; /* Synchronization Train Received */ events[2] |= 0x02; /* CSB Receive */ events[2] |= 0x04; /* CSB Timeout */ @@ -1275,15 +1275,17 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_setup_link_policy(req); if (lmp_le_capable(hdev)) { - /* If the controller has a public BD_ADDR, then by - * default use that one. If this is a LE only - * controller without one, default to the random - * address. - */ - if (bacmp(&hdev->bdaddr, BDADDR_ANY)) - hdev->own_addr_type = ADDR_LE_DEV_PUBLIC; - else - hdev->own_addr_type = ADDR_LE_DEV_RANDOM; + if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + /* If the controller has a public BD_ADDR, then + * by default use that one. If this is a LE only + * controller without a public address, default + * to the random address. + */ + if (bacmp(&hdev->bdaddr, BDADDR_ANY)) + hdev->own_addr_type = ADDR_LE_DEV_PUBLIC; + else + hdev->own_addr_type = ADDR_LE_DEV_RANDOM; + } hci_set_le_support(req); } @@ -1307,7 +1309,7 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_set_event_mask_page_2(req); /* Check for Synchronization Train support */ - if (hdev->features[2][0] & 0x04) + if (lmp_sync_train_capable(hdev)) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5935f74..5fb3df66 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -486,7 +486,10 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) + if (rp->status) + return; + + if (test_bit(HCI_SETUP, &hdev->dev_flags)) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); } @@ -538,12 +541,6 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, if (hdev->features[0][5] & LMP_EDR_3S_ESCO) hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); - - BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, - hdev->features[0][0], hdev->features[0][1], - hdev->features[0][2], hdev->features[0][3], - hdev->features[0][4], hdev->features[0][5], - hdev->features[0][6], hdev->features[0][7]); } static void hci_cc_read_local_ext_features(struct hci_dev *hdev, @@ -1782,7 +1779,9 @@ static u8 hci_to_mgmt_reason(u8 err) static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_disconn_complete *ev = (void *) skb->data; + u8 reason = hci_to_mgmt_reason(ev->reason); struct hci_conn *conn; + u8 type; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); @@ -1792,43 +1791,38 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!conn) goto unlock; - if (ev->status == 0) - conn->state = BT_CLOSED; + if (ev->status) { + mgmt_disconnect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, ev->status); + goto unlock; + } - if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags) && - (conn->type == ACL_LINK || conn->type == LE_LINK)) { - if (ev->status) { - mgmt_disconnect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, ev->status); - } else { - u8 reason = hci_to_mgmt_reason(ev->reason); + conn->state = BT_CLOSED; - mgmt_device_disconnected(hdev, &conn->dst, conn->type, - conn->dst_type, reason); - } - } + if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) + mgmt_device_disconnected(hdev, &conn->dst, conn->type, + conn->dst_type, reason); - if (ev->status == 0) { - u8 type = conn->type; + if (conn->type == ACL_LINK && conn->flush_key) + hci_remove_link_key(hdev, &conn->dst); - if (type == ACL_LINK && conn->flush_key) - hci_remove_link_key(hdev, &conn->dst); - hci_proto_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); + type = conn->type; - /* Re-enable advertising if necessary, since it might - * have been disabled by the connection. From the - * HCI_LE_Set_Advertise_Enable command description in - * the core specification (v4.0): - * "The Controller shall continue advertising until the Host - * issues an LE_Set_Advertise_Enable command with - * Advertising_Enable set to 0x00 (Advertising is disabled) - * or until a connection is created or until the Advertising - * is timed out due to Directed Advertising." - */ - if (type == LE_LINK) - mgmt_reenable_advertising(hdev); - } + hci_proto_disconn_cfm(conn, ev->reason); + hci_conn_del(conn); + + /* Re-enable advertising if necessary, since it might + * have been disabled by the connection. From the + * HCI_LE_Set_Advertise_Enable command description in + * the core specification (v4.0): + * "The Controller shall continue advertising until the Host + * issues an LE_Set_Advertise_Enable command with + * Advertising_Enable set to 0x00 (Advertising is disabled) + * or until a connection is created or until the Advertising + * is timed out due to Directed Advertising." + */ + if (type == LE_LINK) + mgmt_reenable_advertising(hdev); unlock: hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6a6c8bb..7552f9e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -940,8 +940,22 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); skb_pull(skb, 1); - if (hci_pi(sk)->channel == HCI_CHANNEL_RAW && - bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { + if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { + /* No permission check is needed for user channel + * since that gets enforced when binding the socket. + * + * However check that the packet type is valid. + */ + if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && + bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + err = -EINVAL; + goto drop; + } + + skb_queue_tail(&hdev->raw_q, skb); + queue_work(hdev->workqueue, &hdev->tx_work); + } else if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); @@ -972,14 +986,6 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - if (hci_pi(sk)->channel == HCI_CHANNEL_USER && - bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { - err = -EINVAL; - goto drop; - } - skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4af3821..b6bca64 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -49,6 +49,9 @@ static u8 l2cap_fixed_chan[8] = { L2CAP_FC_L2CAP | L2CAP_FC_CONNLESS, }; static LIST_HEAD(chan_list); static DEFINE_RWLOCK(chan_list_lock); +static u16 le_max_credits = L2CAP_LE_MAX_CREDITS; +static u16 le_default_mps = L2CAP_LE_DEFAULT_MPS; + static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, @@ -213,9 +216,14 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid) static u16 l2cap_alloc_cid(struct l2cap_conn *conn) { - u16 cid = L2CAP_CID_DYN_START; + u16 cid, dyn_end; + + if (conn->hcon->type == LE_LINK) + dyn_end = L2CAP_CID_LE_DYN_END; + else + dyn_end = L2CAP_CID_DYN_END; - for (; cid < L2CAP_CID_DYN_END; cid++) { + for (cid = L2CAP_CID_DYN_START; cid < dyn_end; cid++) { if (!__l2cap_get_chan_by_scid(conn, cid)) return cid; } @@ -490,6 +498,18 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } +static void l2cap_le_flowctl_init(struct l2cap_chan *chan) +{ + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + chan->tx_credits = 0; + chan->rx_credits = le_max_credits; + chan->mps = min_t(u16, chan->imtu, L2CAP_LE_DEFAULT_MPS); + + skb_queue_head_init(&chan->tx_q); +} + void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, @@ -502,12 +522,12 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) switch (chan->chan_type) { case L2CAP_CHAN_CONN_ORIENTED: if (conn->hcon->type == LE_LINK) { - /* LE connection */ - chan->omtu = L2CAP_DEFAULT_MTU; - if (chan->dcid == L2CAP_CID_ATT) + if (chan->dcid == L2CAP_CID_ATT) { + chan->omtu = L2CAP_DEFAULT_MTU; chan->scid = L2CAP_CID_ATT; - else + } else { chan->scid = l2cap_alloc_cid(conn); + } } else { /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); @@ -597,6 +617,10 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) case L2CAP_MODE_BASIC: break; + case L2CAP_MODE_LE_FLOWCTL: + skb_queue_purge(&chan->tx_q); + break; + case L2CAP_MODE_ERTM: __clear_retrans_timer(chan); __clear_monitor_timer(chan); @@ -617,6 +641,50 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) return; } +static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_le_conn_rsp rsp; + u16 result; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) + result = L2CAP_CR_AUTHORIZATION; + else + result = L2CAP_CR_BAD_PSM; + + l2cap_state_change(chan, BT_DISCONN); + + rsp.dcid = cpu_to_le16(chan->scid); + rsp.mtu = cpu_to_le16(chan->imtu); + rsp.mps = cpu_to_le16(chan->mps); + rsp.credits = cpu_to_le16(chan->rx_credits); + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), + &rsp); +} + +static void l2cap_chan_connect_reject(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_conn_rsp rsp; + u16 result; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) + result = L2CAP_CR_SEC_BLOCK; + else + result = L2CAP_CR_BAD_PSM; + + l2cap_state_change(chan, BT_DISCONN); + + rsp.scid = cpu_to_le16(chan->dcid); + rsp.dcid = cpu_to_le16(chan->scid); + rsp.result = cpu_to_le16(result); + rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO); + + l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); +} + void l2cap_chan_close(struct l2cap_chan *chan, int reason) { struct l2cap_conn *conn = chan->conn; @@ -630,8 +698,10 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) case BT_CONNECTED: case BT_CONFIG: - if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && - conn->hcon->type == ACL_LINK) { + /* ATT uses L2CAP_CHAN_CONN_ORIENTED so we must also + * check for chan->psm. + */ + if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && chan->psm) { __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); l2cap_send_disconn_req(chan, reason); } else @@ -639,24 +709,11 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; case BT_CONNECT2: - if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && - conn->hcon->type == ACL_LINK) { - struct l2cap_conn_rsp rsp; - __u16 result; - - if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) - result = L2CAP_CR_SEC_BLOCK; - else - result = L2CAP_CR_BAD_PSM; - - l2cap_state_change(chan, BT_DISCONN); - - rsp.scid = cpu_to_le16(chan->dcid); - rsp.dcid = cpu_to_le16(chan->scid); - rsp.result = cpu_to_le16(result); - rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, - sizeof(rsp), &rsp); + if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { + if (conn->hcon->type == ACL_LINK) + l2cap_chan_connect_reject(chan); + else if (conn->hcon->type == LE_LINK) + l2cap_chan_le_connect_reject(chan); } l2cap_chan_del(chan, reason); @@ -726,6 +783,9 @@ int l2cap_chan_check_security(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; __u8 auth_type; + if (conn->hcon->type == LE_LINK) + return smp_conn_security(conn->hcon, chan->sec_level); + auth_type = l2cap_get_auth_type(chan); return hci_conn_security(conn->hcon, chan->sec_level, auth_type); @@ -1152,16 +1212,57 @@ static void l2cap_chan_ready(struct l2cap_chan *chan) chan->conf_state = 0; __clear_chan_timer(chan); + if (chan->mode == L2CAP_MODE_LE_FLOWCTL && !chan->tx_credits) + chan->ops->suspend(chan); + chan->state = BT_CONNECTED; chan->ops->ready(chan); } +static void l2cap_le_connect(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_le_conn_req req; + + if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags)) + return; + + req.psm = chan->psm; + req.scid = cpu_to_le16(chan->scid); + req.mtu = cpu_to_le16(chan->imtu); + req.mps = cpu_to_le16(chan->mps); + req.credits = cpu_to_le16(chan->rx_credits); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_REQ, + sizeof(req), &req); +} + +static void l2cap_le_start(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + + if (!smp_conn_security(conn->hcon, chan->sec_level)) + return; + + if (!chan->psm) { + l2cap_chan_ready(chan); + return; + } + + if (chan->state == BT_CONNECT) + l2cap_le_connect(chan); +} + static void l2cap_start_connection(struct l2cap_chan *chan) { if (__amp_capable(chan)) { BT_DBG("chan %p AMP capable: discover AMPs", chan); a2mp_discover_amp(chan); + } else if (chan->conn->hcon->type == LE_LINK) { + l2cap_le_start(chan); } else { l2cap_send_conn_req(chan); } @@ -1172,7 +1273,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; if (conn->hcon->type == LE_LINK) { - l2cap_chan_ready(chan); + l2cap_le_start(chan); return; } @@ -1430,9 +1531,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) } if (hcon->type == LE_LINK) { - if (smp_conn_security(hcon, chan->sec_level)) - l2cap_chan_ready(chan); - + l2cap_le_start(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { l2cap_chan_ready(chan); @@ -1703,7 +1802,8 @@ EXPORT_SYMBOL(l2cap_conn_put); */ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr_t *src, - bdaddr_t *dst) + bdaddr_t *dst, + u8 link_type) { struct l2cap_chan *c, *c1 = NULL; @@ -1713,6 +1813,12 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, if (state && c->state != state) continue; + if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR) + continue; + + if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) + continue; + if (c->psm == psm) { int src_match, dst_match; int src_any, dst_any; @@ -1739,6 +1845,18 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, return c1; } +static bool is_valid_psm(u16 psm, u8 dst_type) +{ + if (!psm) + return false; + + if (bdaddr_type_is_le(dst_type)) + return (psm <= 0x00ff); + + /* PSM must be odd and lsb of upper byte must be 0 */ + return ((psm & 0x0101) == 0x0001); +} + int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst, u8 dst_type) { @@ -1759,8 +1877,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, l2cap_chan_lock(chan); - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((__le16_to_cpu(psm) & 0x0101) != 0x0001 && !cid && + if (!is_valid_psm(__le16_to_cpu(psm), dst_type) && !cid && chan->chan_type != L2CAP_CHAN_RAW) { err = -EINVAL; goto done; @@ -1774,6 +1891,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, switch (chan->mode) { case L2CAP_MODE_BASIC: break; + case L2CAP_MODE_LE_FLOWCTL: + l2cap_le_flowctl_init(chan); + break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: if (!disable_ertm) @@ -2432,6 +2552,89 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, return 0; } +static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan, + struct msghdr *msg, + size_t len, u16 sdulen) +{ + struct l2cap_conn *conn = chan->conn; + struct sk_buff *skb; + int err, count, hlen; + struct l2cap_hdr *lh; + + BT_DBG("chan %p len %zu", chan, len); + + if (!conn) + return ERR_PTR(-ENOTCONN); + + hlen = L2CAP_HDR_SIZE; + + if (sdulen) + hlen += L2CAP_SDULEN_SIZE; + + count = min_t(unsigned int, (conn->mtu - hlen), len); + + skb = chan->ops->alloc_skb(chan, count + hlen, + msg->msg_flags & MSG_DONTWAIT); + if (IS_ERR(skb)) + return skb; + + /* Create L2CAP header */ + lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(chan->dcid); + lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); + + if (sdulen) + put_unaligned_le16(sdulen, skb_put(skb, L2CAP_SDULEN_SIZE)); + + err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); + if (unlikely(err < 0)) { + kfree_skb(skb); + return ERR_PTR(err); + } + + return skb; +} + +static int l2cap_segment_le_sdu(struct l2cap_chan *chan, + struct sk_buff_head *seg_queue, + struct msghdr *msg, size_t len) +{ + struct sk_buff *skb; + size_t pdu_len; + u16 sdu_len; + + BT_DBG("chan %p, msg %p, len %zu", chan, msg, len); + + pdu_len = chan->conn->mtu - L2CAP_HDR_SIZE; + + pdu_len = min_t(size_t, pdu_len, chan->remote_mps); + + sdu_len = len; + pdu_len -= L2CAP_SDULEN_SIZE; + + while (len > 0) { + if (len <= pdu_len) + pdu_len = len; + + skb = l2cap_create_le_flowctl_pdu(chan, msg, pdu_len, sdu_len); + if (IS_ERR(skb)) { + __skb_queue_purge(seg_queue); + return PTR_ERR(skb); + } + + __skb_queue_tail(seg_queue, skb); + + len -= pdu_len; + + if (sdu_len) { + sdu_len = 0; + pdu_len += L2CAP_SDULEN_SIZE; + } + } + + return 0; +} + int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u32 priority) { @@ -2453,6 +2656,40 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, } switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + /* Check outgoing MTU */ + if (len > chan->omtu) + return -EMSGSIZE; + + if (!chan->tx_credits) + return -EAGAIN; + + __skb_queue_head_init(&seg_queue); + + err = l2cap_segment_le_sdu(chan, &seg_queue, msg, len); + + if (chan->state != BT_CONNECTED) { + __skb_queue_purge(&seg_queue); + err = -ENOTCONN; + } + + if (err) + return err; + + skb_queue_splice_tail_init(&seg_queue, &chan->tx_q); + + while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { + l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); + chan->tx_credits--; + } + + if (!chan->tx_credits) + chan->ops->suspend(chan); + + err = len; + + break; + case L2CAP_MODE_BASIC: /* Check outgoing MTU */ if (len > chan->omtu) @@ -3592,6 +3829,23 @@ static int l2cap_build_conf_rsp(struct l2cap_chan *chan, void *data, return ptr - data; } +void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) +{ + struct l2cap_le_conn_rsp rsp; + struct l2cap_conn *conn = chan->conn; + + BT_DBG("chan %p", chan); + + rsp.dcid = cpu_to_le16(chan->scid); + rsp.mtu = cpu_to_le16(chan->imtu); + rsp.mps = cpu_to_le16(chan->mps); + rsp.credits = cpu_to_le16(chan->rx_credits); + rsp.result = __constant_cpu_to_le16(L2CAP_CR_SUCCESS); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), + &rsp); +} + void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn_rsp rsp; @@ -3713,7 +3967,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, - &conn->hcon->dst); + &conn->hcon->dst, ACL_LINK); if (!pchan) { result = L2CAP_CR_BAD_PSM; goto sendresp; @@ -5155,18 +5409,17 @@ static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency, static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, - u8 *data) + u16 cmd_len, u8 *data) { struct hci_conn *hcon = conn->hcon; struct l2cap_conn_param_update_req *req; struct l2cap_conn_param_update_rsp rsp; - u16 min, max, latency, to_multiplier, cmd_len; + u16 min, max, latency, to_multiplier; int err; if (!(hcon->link_mode & HCI_LM_MASTER)) return -EINVAL; - cmd_len = __le16_to_cpu(cmd->len); if (cmd_len != sizeof(struct l2cap_conn_param_update_req)) return -EPROTO; @@ -5196,6 +5449,65 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, return 0; } +static int l2cap_le_connect_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_le_conn_rsp *rsp = (struct l2cap_le_conn_rsp *) data; + u16 dcid, mtu, mps, credits, result; + struct l2cap_chan *chan; + int err; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + dcid = __le16_to_cpu(rsp->dcid); + mtu = __le16_to_cpu(rsp->mtu); + mps = __le16_to_cpu(rsp->mps); + credits = __le16_to_cpu(rsp->credits); + result = __le16_to_cpu(rsp->result); + + if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23)) + return -EPROTO; + + BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", + dcid, mtu, mps, credits, result); + + mutex_lock(&conn->chan_lock); + + chan = __l2cap_get_chan_by_ident(conn, cmd->ident); + if (!chan) { + err = -EBADSLT; + goto unlock; + } + + err = 0; + + l2cap_chan_lock(chan); + + switch (result) { + case L2CAP_CR_SUCCESS: + chan->ident = 0; + chan->dcid = dcid; + chan->omtu = mtu; + chan->remote_mps = mps; + chan->tx_credits = credits; + l2cap_chan_ready(chan); + break; + + default: + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + l2cap_chan_unlock(chan); + +unlock: + mutex_unlock(&conn->chan_lock); + + return err; +} + static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) @@ -5276,23 +5588,235 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, return err; } +static int l2cap_le_connect_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_le_conn_req *req = (struct l2cap_le_conn_req *) data; + struct l2cap_le_conn_rsp rsp; + struct l2cap_chan *chan, *pchan; + u16 dcid, scid, credits, mtu, mps; + __le16 psm; + u8 result; + + if (cmd_len != sizeof(*req)) + return -EPROTO; + + scid = __le16_to_cpu(req->scid); + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + psm = req->psm; + dcid = 0; + credits = 0; + + if (mtu < 23 || mps < 23) + return -EPROTO; + + BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), + scid, mtu, mps); + + /* Check if we have socket listening on psm */ + pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, + &conn->hcon->dst, LE_LINK); + if (!pchan) { + result = L2CAP_CR_BAD_PSM; + chan = NULL; + goto response; + } + + mutex_lock(&conn->chan_lock); + l2cap_chan_lock(pchan); + + if (!smp_sufficient_security(conn->hcon, pchan->sec_level)) { + result = L2CAP_CR_AUTHENTICATION; + chan = NULL; + goto response_unlock; + } + + /* Check if we already have channel with that dcid */ + if (__l2cap_get_chan_by_dcid(conn, scid)) { + result = L2CAP_CR_NO_MEM; + chan = NULL; + goto response_unlock; + } + + chan = pchan->ops->new_connection(pchan); + if (!chan) { + result = L2CAP_CR_NO_MEM; + goto response_unlock; + } + + l2cap_le_flowctl_init(chan); + + bacpy(&chan->src, &conn->hcon->src); + bacpy(&chan->dst, &conn->hcon->dst); + chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); + chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->psm = psm; + chan->dcid = scid; + chan->omtu = mtu; + chan->remote_mps = mps; + chan->tx_credits = __le16_to_cpu(req->credits); + + __l2cap_chan_add(conn, chan); + dcid = chan->scid; + credits = chan->rx_credits; + + __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); + + chan->ident = cmd->ident; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { + l2cap_state_change(chan, BT_CONNECT2); + result = L2CAP_CR_PEND; + chan->ops->defer(chan); + } else { + l2cap_chan_ready(chan); + result = L2CAP_CR_SUCCESS; + } + +response_unlock: + l2cap_chan_unlock(pchan); + mutex_unlock(&conn->chan_lock); + + if (result == L2CAP_CR_PEND) + return 0; + +response: + if (chan) { + rsp.mtu = cpu_to_le16(chan->imtu); + rsp.mps = cpu_to_le16(chan->mps); + } else { + rsp.mtu = 0; + rsp.mps = 0; + } + + rsp.dcid = cpu_to_le16(dcid); + rsp.credits = cpu_to_le16(credits); + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); + + return 0; +} + +static inline int l2cap_le_credits(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_le_credits *pkt; + struct l2cap_chan *chan; + u16 cid, credits; + + if (cmd_len != sizeof(*pkt)) + return -EPROTO; + + pkt = (struct l2cap_le_credits *) data; + cid = __le16_to_cpu(pkt->cid); + credits = __le16_to_cpu(pkt->credits); + + BT_DBG("cid 0x%4.4x credits 0x%4.4x", cid, credits); + + chan = l2cap_get_chan_by_dcid(conn, cid); + if (!chan) + return -EBADSLT; + + chan->tx_credits += credits; + + while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { + l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); + chan->tx_credits--; + } + + if (chan->tx_credits) + chan->ops->resume(chan); + + l2cap_chan_unlock(chan); + + return 0; +} + +static inline int l2cap_le_command_rej(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data; + struct l2cap_chan *chan; + + if (cmd_len < sizeof(*rej)) + return -EPROTO; + + mutex_lock(&conn->chan_lock); + + chan = __l2cap_get_chan_by_ident(conn, cmd->ident); + if (!chan) + goto done; + + l2cap_chan_lock(chan); + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + +done: + mutex_unlock(&conn->chan_lock); + return 0; +} + static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, u8 *data) + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) { + int err = 0; + + if (!enable_lecoc) { + switch (cmd->code) { + case L2CAP_LE_CONN_REQ: + case L2CAP_LE_CONN_RSP: + case L2CAP_LE_CREDITS: + case L2CAP_DISCONN_REQ: + case L2CAP_DISCONN_RSP: + return -EINVAL; + } + } + switch (cmd->code) { case L2CAP_COMMAND_REJ: - return 0; + l2cap_le_command_rej(conn, cmd, cmd_len, data); + break; case L2CAP_CONN_PARAM_UPDATE_REQ: - return l2cap_conn_param_update_req(conn, cmd, data); + err = l2cap_conn_param_update_req(conn, cmd, cmd_len, data); + break; case L2CAP_CONN_PARAM_UPDATE_RSP: - return 0; + break; + + case L2CAP_LE_CONN_RSP: + l2cap_le_connect_rsp(conn, cmd, cmd_len, data); + break; + + case L2CAP_LE_CONN_REQ: + err = l2cap_le_connect_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_LE_CREDITS: + err = l2cap_le_credits(conn, cmd, cmd_len, data); + break; + + case L2CAP_DISCONN_REQ: + err = l2cap_disconnect_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_DISCONN_RSP: + l2cap_disconnect_rsp(conn, cmd, cmd_len, data); + break; default: BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code); - return -EINVAL; + err = -EINVAL; + break; } + + return err; } static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, @@ -5321,7 +5845,7 @@ static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, goto drop; } - err = l2cap_le_sig_cmd(conn, cmd, skb->data); + err = l2cap_le_sig_cmd(conn, cmd, len, skb->data); if (err) { struct l2cap_cmd_rej_unk rej; @@ -6312,6 +6836,121 @@ drop: return 0; } +static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_le_credits pkt; + u16 return_credits; + + /* We return more credits to the sender only after the amount of + * credits falls below half of the initial amount. + */ + if (chan->rx_credits >= (le_max_credits + 1) / 2) + return; + + return_credits = le_max_credits - chan->rx_credits; + + BT_DBG("chan %p returning %u credits to sender", chan, return_credits); + + chan->rx_credits += return_credits; + + pkt.cid = cpu_to_le16(chan->scid); + pkt.credits = cpu_to_le16(return_credits); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); +} + +static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) +{ + int err; + + if (!chan->rx_credits) { + BT_ERR("No credits to receive LE L2CAP data"); + return -ENOBUFS; + } + + if (chan->imtu < skb->len) { + BT_ERR("Too big LE L2CAP PDU"); + return -ENOBUFS; + } + + chan->rx_credits--; + BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits); + + l2cap_chan_le_send_credits(chan); + + err = 0; + + if (!chan->sdu) { + u16 sdu_len; + + sdu_len = get_unaligned_le16(skb->data); + skb_pull(skb, L2CAP_SDULEN_SIZE); + + BT_DBG("Start of new SDU. sdu_len %u skb->len %u imtu %u", + sdu_len, skb->len, chan->imtu); + + if (sdu_len > chan->imtu) { + BT_ERR("Too big LE L2CAP SDU length received"); + err = -EMSGSIZE; + goto failed; + } + + if (skb->len > sdu_len) { + BT_ERR("Too much LE L2CAP data received"); + err = -EINVAL; + goto failed; + } + + if (skb->len == sdu_len) + return chan->ops->recv(chan, skb); + + chan->sdu = skb; + chan->sdu_len = sdu_len; + chan->sdu_last_frag = skb; + + return 0; + } + + BT_DBG("SDU fragment. chan->sdu->len %u skb->len %u chan->sdu_len %u", + chan->sdu->len, skb->len, chan->sdu_len); + + if (chan->sdu->len + skb->len > chan->sdu_len) { + BT_ERR("Too much LE L2CAP data received"); + err = -EINVAL; + goto failed; + } + + append_skb_frag(chan->sdu, skb, &chan->sdu_last_frag); + skb = NULL; + + if (chan->sdu->len == chan->sdu_len) { + err = chan->ops->recv(chan, chan->sdu); + if (!err) { + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + } + } + +failed: + if (err) { + kfree_skb(skb); + kfree_skb(chan->sdu); + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + } + + /* We can't return an error here since we took care of the skb + * freeing internally. An error return would cause the caller to + * do a double-free of the skb. + */ + return 0; +} + static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) { @@ -6341,6 +6980,12 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, goto drop; switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + if (l2cap_le_data_rcv(chan, skb) < 0) + goto drop; + + goto done; + case L2CAP_MODE_BASIC: /* If socket recv buffers overflows we drop data here * which is *bad* because L2CAP has to be reliable. @@ -6380,7 +7025,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, if (hcon->type != ACL_LINK) goto drop; - chan = l2cap_global_chan_by_psm(0, psm, &hcon->src, &hcon->dst); + chan = l2cap_global_chan_by_psm(0, psm, &hcon->src, &hcon->dst, + ACL_LINK); if (!chan) goto drop; @@ -6612,11 +7258,10 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } if (chan->state == BT_CONNECT) { - if (!status) { + if (!status) l2cap_start_connection(chan); - } else { + else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); - } } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; __u16 res, stat; @@ -6817,6 +7462,11 @@ int __init l2cap_init(void) l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); + debugfs_create_u16("l2cap_le_max_credits", 0466, bt_debugfs, + &le_max_credits); + debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs, + &le_default_mps); + return 0; } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7cc24d2..e7806e6 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -27,6 +27,7 @@ /* Bluetooth L2CAP sockets. */ +#include <linux/module.h> #include <linux/export.h> #include <net/bluetooth/bluetooth.h> @@ -35,6 +36,8 @@ #include "smp.h" +bool enable_lecoc; + static struct bt_sock_list l2cap_sk_list = { .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) }; @@ -50,6 +53,32 @@ bool l2cap_is_socket(struct socket *sock) } EXPORT_SYMBOL(l2cap_is_socket); +static int l2cap_validate_bredr_psm(u16 psm) +{ + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((psm & 0x0101) != 0x0001) + return -EINVAL; + + /* Restrict usage of well-known PSMs */ + if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + return 0; +} + +static int l2cap_validate_le_psm(u16 psm) +{ + /* Valid LE_PSM ranges are defined only until 0x00ff */ + if (psm > 0x00ff) + return -EINVAL; + + /* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */ + if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + return 0; +} + static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; @@ -73,11 +102,11 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) return -EINVAL; if (bdaddr_type_is_le(la.l2_bdaddr_type)) { - /* Connection oriented channels are not supported on LE */ - if (la.l2_psm) + if (!enable_lecoc && la.l2_psm) return -EINVAL; /* We only allow ATT user space socket */ - if (la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) + if (la.l2_cid && + la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) return -EINVAL; } @@ -91,17 +120,13 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (la.l2_psm) { __u16 psm = __le16_to_cpu(la.l2_psm); - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((psm & 0x0101) != 0x0001) { - err = -EINVAL; - goto done; - } + if (la.l2_bdaddr_type == BDADDR_BREDR) + err = l2cap_validate_bredr_psm(psm); + else + err = l2cap_validate_le_psm(psm); - /* Restrict usage of well-known PSMs */ - if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { - err = -EACCES; + if (err) goto done; - } } if (la.l2_cid) @@ -127,6 +152,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) bacpy(&chan->src, &la.l2_bdaddr); chan->src_type = la.l2_bdaddr_type; + if (chan->psm && bdaddr_type_is_le(chan->src_type)) + chan->mode = L2CAP_MODE_LE_FLOWCTL; + chan->state = BT_BOUND; sk->sk_state = BT_BOUND; @@ -189,14 +217,17 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if (bdaddr_type_is_le(la.l2_bdaddr_type)) { - /* Connection oriented channels are not supported on LE */ - if (la.l2_psm) + if (!enable_lecoc && la.l2_psm) return -EINVAL; /* We only allow ATT user space socket */ - if (la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) + if (la.l2_cid && + la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) return -EINVAL; } + if (chan->psm && bdaddr_type_is_le(chan->src_type)) + chan->mode = L2CAP_MODE_LE_FLOWCTL; + err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), &la.l2_bdaddr, la.l2_bdaddr_type); if (err) @@ -234,6 +265,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) switch (chan->mode) { case L2CAP_MODE_BASIC: + case L2CAP_MODE_LE_FLOWCTL: break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: @@ -360,6 +392,16 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, switch (optname) { case L2CAP_OPTIONS: + /* LE sockets should use BT_SNDMTU/BT_RCVMTU, but since + * legacy ATT code depends on getsockopt for + * L2CAP_OPTIONS we need to let this pass. + */ + if (bdaddr_type_is_le(chan->src_type) && + chan->scid != L2CAP_CID_ATT) { + err = -EINVAL; + break; + } + memset(&opts, 0, sizeof(opts)); opts.imtu = chan->imtu; opts.omtu = chan->omtu; @@ -514,6 +556,41 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; + case BT_SNDMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + if (put_user(chan->omtu, (u16 __user *) optval)) + err = -EFAULT; + break; + + case BT_RCVMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + if (put_user(chan->imtu, (u16 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; @@ -554,6 +631,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, switch (optname) { case L2CAP_OPTIONS: + if (bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + if (sk->sk_state == BT_CONNECTED) { err = -EINVAL; break; @@ -585,6 +667,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, chan->mode = opts.mode; switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + break; case L2CAP_MODE_BASIC: clear_bit(CONF_STATE2_DEVICE, &chan->conf_state); break; @@ -807,6 +891,47 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; + case BT_SNDMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + /* Setting is not supported as it's the remote side that + * decides this. + */ + err = -EPERM; + break; + + case BT_RCVMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + if (sk->sk_state == BT_CONNECTED) { + err = -EISCONN; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + chan->imtu = opt; + break; + default: err = -ENOPROTOOPT; break; @@ -859,10 +984,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - sk->sk_state = BT_CONFIG; - pi->chan->state = BT_CONFIG; + if (bdaddr_type_is_le(pi->chan->src_type)) { + sk->sk_state = BT_CONNECTED; + pi->chan->state = BT_CONNECTED; + __l2cap_le_connect_rsp_defer(pi->chan); + } else { + sk->sk_state = BT_CONFIG; + pi->chan->state = BT_CONFIG; + __l2cap_connect_rsp_defer(pi->chan); + } - __l2cap_connect_rsp_defer(pi->chan); err = 0; goto done; } @@ -1236,6 +1367,14 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) return sk->sk_sndtimeo; } +static void l2cap_sock_suspend_cb(struct l2cap_chan *chan) +{ + struct sock *sk = chan->data; + + set_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags); + sk->sk_state_change(sk); +} + static struct l2cap_ops l2cap_chan_ops = { .name = "L2CAP Socket Interface", .new_connection = l2cap_sock_new_connection_cb, @@ -1246,6 +1385,7 @@ static struct l2cap_ops l2cap_chan_ops = { .ready = l2cap_sock_ready_cb, .defer = l2cap_sock_defer_cb, .resume = l2cap_sock_resume_cb, + .suspend = l2cap_sock_suspend_cb, .set_shutdown = l2cap_sock_set_shutdown_cb, .get_sndtimeo = l2cap_sock_get_sndtimeo_cb, .alloc_skb = l2cap_sock_alloc_skb_cb, @@ -1303,6 +1443,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->tx_win_max = pchan->tx_win_max; chan->sec_level = pchan->sec_level; chan->flags = pchan->flags; + chan->tx_credits = pchan->tx_credits; + chan->rx_credits = pchan->rx_credits; security_sk_clone(parent, sk); } else { @@ -1469,3 +1611,6 @@ void l2cap_cleanup_sockets(void) bt_sock_unregister(BTPROTO_L2CAP); proto_unregister(&l2cap_proto); } + +module_param(enable_lecoc, bool, 0644); +MODULE_PARM_DESC(enable_lecoc, "Enable support for LE CoC"); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 074d836..a03ca3c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1264,7 +1264,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->val == 0x02) { /* Limited discoverable mode */ - hci_cp.num_iac = 2; + hci_cp.num_iac = min_t(u8, hdev->num_iac, 2); hci_cp.iac_lap[0] = 0x00; /* LIAC */ hci_cp.iac_lap[1] = 0x8b; hci_cp.iac_lap[2] = 0x9e; @@ -4595,6 +4595,9 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; + if (link_type != ACL_LINK && link_type != LE_LINK) + return; + mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk); bacpy(&ev.addr.bdaddr, bdaddr); @@ -4613,6 +4616,8 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { + u8 bdaddr_type = link_to_bdaddr(link_type, addr_type); + struct mgmt_cp_disconnect *cp; struct mgmt_rp_disconnect rp; struct pending_cmd *cmd; @@ -4623,8 +4628,16 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, if (!cmd) return; + cp = cmd->param; + + if (bacmp(bdaddr, &cp->addr.bdaddr)) + return; + + if (cp->addr.type != bdaddr_type) + return; + bacpy(&rp.addr.bdaddr, bdaddr); - rp.addr.type = link_to_bdaddr(link_type, addr_type); + rp.addr.type = bdaddr_type; cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, mgmt_status(status), &rp, sizeof(rp)); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4b07acb..4500736 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -53,8 +53,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) { struct blkcipher_desc desc; struct scatterlist sg; - int err, iv_len; - unsigned char iv[128]; + int err; if (tfm == NULL) { BT_ERR("tfm %p", tfm); @@ -72,12 +71,6 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) sg_init_one(&sg, r, 16); - iv_len = crypto_blkcipher_ivsize(tfm); - if (iv_len) { - memset(&iv, 0xff, iv_len); - crypto_blkcipher_set_iv(tfm, iv, iv_len); - } - err = crypto_blkcipher_encrypt(&desc, &sg, &sg, 16); if (err) BT_ERR("Encrypt data error %d", err); @@ -143,13 +136,6 @@ static int smp_s1(struct crypto_blkcipher *tfm, u8 k[16], u8 r1[16], return err; } -static int smp_rand(u8 *buf) -{ - get_random_bytes(buf, 16); - - return 0; -} - static struct sk_buff *smp_build_cmd(struct l2cap_conn *conn, u8 code, u16 dlen, void *data) { @@ -257,11 +243,11 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size) return 0; } -static void smp_failure(struct l2cap_conn *conn, u8 reason, u8 send) +static void smp_failure(struct l2cap_conn *conn, u8 reason) { struct hci_conn *hcon = conn->hcon; - if (send) + if (reason) smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason), &reason); @@ -406,7 +392,7 @@ static void confirm_work(struct work_struct *work) return; error: - smp_failure(conn, reason, 1); + smp_failure(conn, reason); } static void random_work(struct work_struct *work) @@ -490,7 +476,7 @@ static void random_work(struct work_struct *work) return; error: - smp_failure(conn, reason, 1); + smp_failure(conn, reason); } static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) @@ -555,10 +541,10 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) break; case MGMT_OP_USER_PASSKEY_NEG_REPLY: case MGMT_OP_USER_CONFIRM_NEG_REPLY: - smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED, 1); + smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED); return 0; default: - smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED, 1); + smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED); return -EOPNOTSUPP; } @@ -606,9 +592,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; - ret = smp_rand(smp->prnd); - if (ret) - return SMP_UNSPECIFIED; + get_random_bytes(smp->prnd, sizeof(smp->prnd)); smp->prsp[0] = SMP_CMD_PAIRING_RSP; memcpy(&smp->prsp[1], &rsp, sizeof(rsp)); @@ -644,9 +628,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; - ret = smp_rand(smp->prnd); - if (ret) - return SMP_UNSPECIFIED; + get_random_bytes(smp->prnd, sizeof(smp->prnd)); smp->prsp[0] = SMP_CMD_PAIRING_RSP; memcpy(&smp->prsp[1], rsp, sizeof(*rsp)); @@ -768,6 +750,17 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } +bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level) +{ + if (sec_level == BT_SECURITY_LOW) + return true; + + if (hcon->sec_level >= sec_level) + return true; + + return false; +} + int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -779,10 +772,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) return 1; - if (sec_level == BT_SECURITY_LOW) - return 1; - - if (hcon->sec_level >= sec_level) + if (smp_sufficient_security(hcon, sec_level)) return 1; if (hcon->link_mode & HCI_LM_MASTER) @@ -895,7 +885,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) break; case SMP_CMD_PAIRING_FAIL: - smp_failure(conn, skb->data[0], 0); + smp_failure(conn, 0); reason = 0; err = -EPERM; break; @@ -941,7 +931,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) done: if (reason) - smp_failure(conn, reason, 1); + smp_failure(conn, reason); kfree_skb(skb); return err; diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index f8ba07f..a700bcb 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -136,6 +136,7 @@ struct smp_chan { }; /* SMP Commands */ +bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb); int smp_distribute_keys(struct l2cap_conn *conn, __u8 force); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f00cfd2..e4401a5 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -32,7 +32,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; - struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); u16 vid = 0; rcu_read_lock(); @@ -90,12 +90,12 @@ static int br_dev_init(struct net_device *dev) struct net_bridge *br = netdev_priv(dev); int i; - br->stats = alloc_percpu(struct br_cpu_netstats); + br->stats = alloc_percpu(struct pcpu_sw_netstats); if (!br->stats) return -ENOMEM; for_each_possible_cpu(i) { - struct br_cpu_netstats *br_dev_stats; + struct pcpu_sw_netstats *br_dev_stats; br_dev_stats = per_cpu_ptr(br->stats, i); u64_stats_init(&br_dev_stats->syncp); } @@ -135,12 +135,12 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct net_bridge *br = netdev_priv(dev); - struct br_cpu_netstats tmp, sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; unsigned int cpu; for_each_possible_cpu(cpu) { unsigned int start; - const struct br_cpu_netstats *bstats + const struct pcpu_sw_netstats *bstats = per_cpu_ptr(br->stats, cpu); do { start = u64_stats_fetch_begin_bh(&bstats->syncp); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 33e8f23..c5f5a4a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -570,8 +570,7 @@ static void fdb_notify(struct net_bridge *br, rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); return; errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } /* Dump information about entries, in response to GETNEIGH */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 4b81b14..d3409e6 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -26,13 +26,13 @@ static int deliver_clone(const struct net_bridge_port *prev, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)); -/* Don't forward packets to originating port or forwarding diasabled */ +/* Don't forward packets to originating port or forwarding disabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { - return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && - p->state == BR_STATE_FORWARDING); + p->state == BR_STATE_FORWARDING; } static inline unsigned int packet_length(const struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4bf02ad..1f6bd1e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -61,7 +61,7 @@ static int port_cost(struct net_device *dev) } -/* Check for port carrier transistions. */ +/* Check for port carrier transitions. */ void br_port_carrier_check(struct net_bridge_port *p) { struct net_device *dev = p->dev; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7e73c32..bf8dc7d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -28,7 +28,7 @@ static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); - struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index cd8c3a4..a9a4a1b 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -381,7 +381,7 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct net_bridge *br = netdev_priv(dev); - switch(cmd) { + switch (cmd) { case SIOCDEVPRIVATE: return old_dev_ioctl(dev, rq, cmd); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4c214b2..ef66365 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1998,7 +1998,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) u32 old; struct net_bridge_mdb_htable *mdb; - spin_lock(&br->multicast_lock); + spin_lock_bh(&br->multicast_lock); if (!netif_running(br->dev)) goto unlock; @@ -2030,7 +2030,7 @@ rollback: } unlock: - spin_unlock(&br->multicast_lock); + spin_unlock_bh(&br->multicast_lock); return err; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 80cad2c..b008c59 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -1001,7 +1001,7 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, - void __user * buffer, size_t * lenp, loff_t * ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f75d92e..e74b6d53 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -195,8 +195,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return; errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } @@ -373,7 +372,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) p = br_port_get_rtnl(dev); /* We want to accept dev as bridge itself if the AF_SPEC - * is set to see if someone is setting vlan info on the brigde + * is set to see if someone is setting vlan info on the bridge */ if (!p && !afspec) return -EINVAL; @@ -389,7 +388,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) err = br_setport(p, tb); spin_unlock_bh(&p->br->lock); } else { - /* Binary compatability with old RSTP */ + /* Binary compatibility with old RSTP */ if (nla_len(protinfo) < sizeof(u8)) return -EINVAL; @@ -482,9 +481,7 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_af_register(&br_af_ops); - if (err) - goto out; + rtnl_af_register(&br_af_ops); err = rtnl_link_register(&br_link_ops); if (err) @@ -494,7 +491,6 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); -out: br_mdb_uninit(); return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 229d820..3733f15 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -210,21 +210,13 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } -struct br_cpu_netstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - struct net_bridge { spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct br_cpu_netstats __percpu *stats; + struct pcpu_sw_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; #ifdef CONFIG_BRIDGE_NETFILTER @@ -426,6 +418,16 @@ netdev_features_t br_features_recompute(struct net_bridge *br, int br_handle_frame_finish(struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); +static inline bool br_rx_handler_check_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler) == br_handle_frame; +} + +static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) +{ + return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; +} + /* br_ioctl.c */ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, @@ -711,7 +713,7 @@ void br_netfilter_fini(void); void br_netfilter_rtable_init(struct net_bridge *); #else #define br_netfilter_init() (0) -#define br_netfilter_fini() do { } while(0) +#define br_netfilter_fini() do { } while (0) #define br_netfilter_rtable_init(x) #endif diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8660ea3..bdb459d 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -153,7 +153,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; - p = br_port_get_rcu(dev); + p = br_port_get_check_rcu(dev); if (!p) goto err; diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 950663d..558c46d 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -110,7 +110,7 @@ static void br_tcn_timer_expired(unsigned long arg) if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) { br_transmit_tcn(br); - mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); + mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); } spin_unlock(&br->lock); } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 3b9637f..8dac6555 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -49,53 +49,51 @@ static ssize_t store_bridge_parm(struct device *d, } -static ssize_t show_forward_delay(struct device *d, +static ssize_t forward_delay_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static ssize_t store_forward_delay(struct device *d, +static ssize_t forward_delay_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_forward_delay); } -static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, - show_forward_delay, store_forward_delay); +static DEVICE_ATTR_RW(forward_delay); -static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, +static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static ssize_t store_hello_time(struct device *d, +static ssize_t hello_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_hello_time); } -static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, - store_hello_time); +static DEVICE_ATTR_RW(hello_time); -static ssize_t show_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->max_age)); } -static ssize_t store_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_max_age); } -static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); +static DEVICE_ATTR_RW(max_age); -static ssize_t show_ageing_time(struct device *d, +static ssize_t ageing_time_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -108,16 +106,15 @@ static int set_ageing_time(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_ageing_time(struct device *d, +static ssize_t ageing_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_ageing_time); } -static DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, - store_ageing_time); +static DEVICE_ATTR_RW(ageing_time); -static ssize_t show_stp_state(struct device *d, +static ssize_t stp_state_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -125,7 +122,7 @@ static ssize_t show_stp_state(struct device *d, } -static ssize_t store_stp_state(struct device *d, +static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -147,20 +144,21 @@ static ssize_t store_stp_state(struct device *d, return len; } -static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, - store_stp_state); +static DEVICE_ATTR_RW(stp_state); -static ssize_t show_group_fwd_mask(struct device *d, - struct device_attribute *attr, char *buf) +static ssize_t group_fwd_mask_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#x\n", br->group_fwd_mask); } -static ssize_t store_group_fwd_mask(struct device *d, - struct device_attribute *attr, const char *buf, - size_t len) +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) { struct net_bridge *br = to_bridge(d); char *endp; @@ -180,10 +178,9 @@ static ssize_t store_group_fwd_mask(struct device *d, return len; } -static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, - store_group_fwd_mask); +static DEVICE_ATTR_RW(group_fwd_mask); -static ssize_t show_priority(struct device *d, struct device_attribute *attr, +static ssize_t priority_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -197,93 +194,91 @@ static int set_priority(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_priority(struct device *d, struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t priority_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_priority); } -static DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); +static DEVICE_ATTR_RW(priority); -static ssize_t show_root_id(struct device *d, struct device_attribute *attr, +static ssize_t root_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } -static DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); +static DEVICE_ATTR_RO(root_id); -static ssize_t show_bridge_id(struct device *d, struct device_attribute *attr, +static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } -static DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); +static DEVICE_ATTR_RO(bridge_id); -static ssize_t show_root_port(struct device *d, struct device_attribute *attr, +static ssize_t root_port_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_port); } -static DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); +static DEVICE_ATTR_RO(root_port); -static ssize_t show_root_path_cost(struct device *d, +static ssize_t root_path_cost_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } -static DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); +static DEVICE_ATTR_RO(root_path_cost); -static ssize_t show_topology_change(struct device *d, +static ssize_t topology_change_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } -static DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); +static DEVICE_ATTR_RO(topology_change); -static ssize_t show_topology_change_detected(struct device *d, +static ssize_t topology_change_detected_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } -static DEVICE_ATTR(topology_change_detected, S_IRUGO, - show_topology_change_detected, NULL); +static DEVICE_ATTR_RO(topology_change_detected); -static ssize_t show_hello_timer(struct device *d, +static ssize_t hello_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } -static DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); +static DEVICE_ATTR_RO(hello_timer); -static ssize_t show_tcn_timer(struct device *d, struct device_attribute *attr, +static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } -static DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); +static DEVICE_ATTR_RO(tcn_timer); -static ssize_t show_topology_change_timer(struct device *d, +static ssize_t topology_change_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } -static DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, - NULL); +static DEVICE_ATTR_RO(topology_change_timer); -static ssize_t show_gc_timer(struct device *d, struct device_attribute *attr, +static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); } -static DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); +static DEVICE_ATTR_RO(gc_timer); -static ssize_t show_group_addr(struct device *d, +static ssize_t group_addr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -293,7 +288,7 @@ static ssize_t show_group_addr(struct device *d, br->group_addr[4], br->group_addr[5]); } -static ssize_t store_group_addr(struct device *d, +static ssize_t group_addr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -324,10 +319,9 @@ static ssize_t store_group_addr(struct device *d, return len; } -static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, - show_group_addr, store_group_addr); +static DEVICE_ATTR_RW(group_addr); -static ssize_t store_flush(struct device *d, +static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -339,26 +333,25 @@ static ssize_t store_flush(struct device *d, br_fdb_flush(br); return len; } -static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush); +static DEVICE_ATTR_WO(flush); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -static ssize_t show_multicast_router(struct device *d, +static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_router); } -static ssize_t store_multicast_router(struct device *d, +static ssize_t multicast_router_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_router); } -static DEVICE_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, - store_multicast_router); +static DEVICE_ATTR_RW(multicast_router); -static ssize_t show_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -366,18 +359,17 @@ static ssize_t show_multicast_snooping(struct device *d, return sprintf(buf, "%d\n", !br->multicast_disabled); } -static ssize_t store_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_toggle); } -static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, - show_multicast_snooping, store_multicast_snooping); +static DEVICE_ATTR_RW(multicast_snooping); -static ssize_t show_multicast_query_use_ifaddr(struct device *d, - struct device_attribute *attr, - char *buf) +static ssize_t multicast_query_use_ifaddr_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); @@ -390,17 +382,15 @@ static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) } static ssize_t -store_multicast_query_use_ifaddr(struct device *d, +multicast_query_use_ifaddr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_use_ifaddr); } -static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, - show_multicast_query_use_ifaddr, - store_multicast_query_use_ifaddr); +static DEVICE_ATTR_RW(multicast_query_use_ifaddr); -static ssize_t show_multicast_querier(struct device *d, +static ssize_t multicast_querier_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -408,16 +398,15 @@ static ssize_t show_multicast_querier(struct device *d, return sprintf(buf, "%d\n", br->multicast_querier); } -static ssize_t store_multicast_querier(struct device *d, +static ssize_t multicast_querier_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_querier); } -static DEVICE_ATTR(multicast_querier, S_IRUGO | S_IWUSR, - show_multicast_querier, store_multicast_querier); +static DEVICE_ATTR_RW(multicast_querier); -static ssize_t show_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -430,31 +419,29 @@ static int set_elasticity(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_elasticity); } -static DEVICE_ATTR(hash_elasticity, S_IRUGO | S_IWUSR, show_hash_elasticity, - store_hash_elasticity); +static DEVICE_ATTR_RW(hash_elasticity); -static ssize_t show_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->hash_max); } -static ssize_t store_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_hash_max); } -static DEVICE_ATTR(hash_max, S_IRUGO | S_IWUSR, show_hash_max, - store_hash_max); +static DEVICE_ATTR_RW(hash_max); -static ssize_t show_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -468,17 +455,15 @@ static int set_last_member_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_count); } -static DEVICE_ATTR(multicast_last_member_count, S_IRUGO | S_IWUSR, - show_multicast_last_member_count, - store_multicast_last_member_count); +static DEVICE_ATTR_RW(multicast_last_member_count); -static ssize_t show_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -491,17 +476,15 @@ static int set_startup_query_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_count); } -static DEVICE_ATTR(multicast_startup_query_count, S_IRUGO | S_IWUSR, - show_multicast_startup_query_count, - store_multicast_startup_query_count); +static DEVICE_ATTR_RW(multicast_startup_query_count); -static ssize_t show_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -515,17 +498,15 @@ static int set_last_member_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_interval); } -static DEVICE_ATTR(multicast_last_member_interval, S_IRUGO | S_IWUSR, - show_multicast_last_member_interval, - store_multicast_last_member_interval); +static DEVICE_ATTR_RW(multicast_last_member_interval); -static ssize_t show_multicast_membership_interval( +static ssize_t multicast_membership_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -539,17 +520,15 @@ static int set_membership_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_membership_interval( +static ssize_t multicast_membership_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_membership_interval); } -static DEVICE_ATTR(multicast_membership_interval, S_IRUGO | S_IWUSR, - show_multicast_membership_interval, - store_multicast_membership_interval); +static DEVICE_ATTR_RW(multicast_membership_interval); -static ssize_t show_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -564,17 +543,15 @@ static int set_querier_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_querier_interval); } -static DEVICE_ATTR(multicast_querier_interval, S_IRUGO | S_IWUSR, - show_multicast_querier_interval, - store_multicast_querier_interval); +static DEVICE_ATTR_RW(multicast_querier_interval); -static ssize_t show_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -589,17 +566,15 @@ static int set_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_interval); } -static DEVICE_ATTR(multicast_query_interval, S_IRUGO | S_IWUSR, - show_multicast_query_interval, - store_multicast_query_interval); +static DEVICE_ATTR_RW(multicast_query_interval); -static ssize_t show_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -614,17 +589,15 @@ static int set_query_response_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_response_interval); } -static DEVICE_ATTR(multicast_query_response_interval, S_IRUGO | S_IWUSR, - show_multicast_query_response_interval, - store_multicast_query_response_interval); +static DEVICE_ATTR_RW(multicast_query_response_interval); -static ssize_t show_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -639,18 +612,16 @@ static int set_startup_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_interval); } -static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, - show_multicast_startup_query_interval, - store_multicast_startup_query_interval); +static DEVICE_ATTR_RW(multicast_startup_query_interval); #endif #ifdef CONFIG_BRIDGE_NETFILTER -static ssize_t show_nf_call_iptables( +static ssize_t nf_call_iptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -663,16 +634,15 @@ static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_iptables( +static ssize_t nf_call_iptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_iptables); } -static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR, - show_nf_call_iptables, store_nf_call_iptables); +static DEVICE_ATTR_RW(nf_call_iptables); -static ssize_t show_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -685,16 +655,15 @@ static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); } -static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR, - show_nf_call_ip6tables, store_nf_call_ip6tables); +static DEVICE_ATTR_RW(nf_call_ip6tables); -static ssize_t show_nf_call_arptables( +static ssize_t nf_call_arptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -707,17 +676,16 @@ static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_arptables( +static ssize_t nf_call_arptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_arptables); } -static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, - show_nf_call_arptables, store_nf_call_arptables); +static DEVICE_ATTR_RW(nf_call_arptables); #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING -static ssize_t show_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -725,14 +693,13 @@ static ssize_t show_vlan_filtering(struct device *d, return sprintf(buf, "%d\n", br->vlan_enabled); } -static ssize_t store_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } -static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR, - show_vlan_filtering, store_vlan_filtering); +static DEVICE_ATTR_RW(vlan_filtering); #endif static struct attribute *bridge_attrs[] = { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2a2cdb7..dd595bd 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -26,7 +26,7 @@ struct brport_attribute { int (*store)(struct net_bridge_port *, unsigned long); }; -#define BRPORT_ATTR(_name,_mode,_show,_store) \ +#define BRPORT_ATTR(_name, _mode, _show, _store) \ const struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ .mode = _mode }, \ @@ -209,21 +209,21 @@ static const struct brport_attribute *brport_attrs[] = { #define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr) #define to_brport(obj) container_of(obj, struct net_bridge_port, kobj) -static ssize_t brport_show(struct kobject * kobj, - struct attribute * attr, char * buf) +static ssize_t brport_show(struct kobject *kobj, + struct attribute *attr, char *buf) { - struct brport_attribute * brport_attr = to_brport_attr(attr); - struct net_bridge_port * p = to_brport(kobj); + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = to_brport(kobj); return brport_attr->show(p, buf); } -static ssize_t brport_store(struct kobject * kobj, - struct attribute * attr, - const char * buf, size_t count) +static ssize_t brport_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) { - struct brport_attribute * brport_attr = to_brport_attr(attr); - struct net_bridge_port * p = to_brport(kobj); + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = to_brport(kobj); ssize_t ret = -EINVAL; char *endp; unsigned long val; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index af5ebd1..7ffc801 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -146,7 +146,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, /* At this point, we know that the frame was filtered and contains * a valid vlan id. If the vlan id is set in the untagged bitmap, - * send untagged; otherwise, send taged. + * send untagged; otherwise, send tagged. */ br_vlan_get_tag(skb, &vid); if (test_bit(vid, pv->untagged_bitmap)) diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 19c37a4..5322a36 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -96,7 +96,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, bitmask = NF_LOG_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == - htons(ETH_P_IP)){ + htons(ETH_P_IP)) { const struct iphdr *ih; struct iphdr _iph; diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index f8f0bd1..0f6b118 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -35,7 +35,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) return EBT_DROP; if (ap->ar_hln != ETH_ALEN) goto out; - if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN)) + if (skb_store_bits(skb, sizeof(_ah), info->mac, ETH_ALEN)) return EBT_DROP; } out: diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index eae67bf..8d3f8c7 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -14,8 +14,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/if_ether.h> diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index dbd1c78..d2cdf5d6 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -23,8 +23,7 @@ static struct ebt_entries initial_chain = { .policy = EBT_ACCEPT, }; -static struct ebt_replace_kernel initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "broute", .valid_hooks = 1 << NF_BR_BROUTING, .entries_size = sizeof(struct ebt_entries), @@ -41,8 +40,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static const struct ebt_table broute_table = -{ +static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index bb2da7b..ce205aa 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -14,8 +14,7 @@ #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ (1 << NF_BR_LOCAL_OUT)) -static struct ebt_entries initial_chains[] = -{ +static struct ebt_entries initial_chains[] = { { .name = "INPUT", .policy = EBT_ACCEPT, @@ -30,8 +29,7 @@ static struct ebt_entries initial_chains[] = }, }; -static struct ebt_replace_kernel initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .entries_size = 3 * sizeof(struct ebt_entries), @@ -50,8 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static const struct ebt_table frame_filter = -{ +static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index bd238f1..a0ac298 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -14,8 +14,7 @@ #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ (1 << NF_BR_POST_ROUTING)) -static struct ebt_entries initial_chains[] = -{ +static struct ebt_entries initial_chains[] = { { .name = "PREROUTING", .policy = EBT_ACCEPT, @@ -30,8 +29,7 @@ static struct ebt_entries initial_chains[] = } }; -static struct ebt_replace_kernel initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .entries_size = 3 * sizeof(struct ebt_entries), @@ -50,8 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_nat = -{ +static struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ac78024..0e474b1 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -118,10 +118,10 @@ ebt_dev_check(const char *entry, const struct net_device *device) /* 1 is the wildcard token */ while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i]) i++; - return (devname[i] != entry[i] && entry[i] != 1); + return devname[i] != entry[i] && entry[i] != 1; } -#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg)) +#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, @@ -1441,7 +1441,7 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, return -EFAULT; if (*len != sizeof(struct ebt_replace) + entries_size + - (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) + (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) return -EINVAL; if (tmp.nentries != nentries) { @@ -1477,7 +1477,7 @@ static int do_ebt_set_ctl(struct sock *sk, if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - switch(cmd) { + switch (cmd) { case EBT_SO_SET_ENTRIES: ret = do_replace(net, user, len); break; @@ -1507,10 +1507,10 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (!t) return ret; - switch(cmd) { + switch (cmd) { case EBT_SO_GET_INFO: case EBT_SO_GET_INIT_INFO: - if (*len != sizeof(struct ebt_replace)){ + if (*len != sizeof(struct ebt_replace)) { ret = -EINVAL; mutex_unlock(&ebt_mutex); break; @@ -1525,7 +1525,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) tmp.valid_hooks = t->table->valid_hooks; } mutex_unlock(&ebt_mutex); - if (copy_to_user(user, &tmp, *len) != 0){ + if (copy_to_user(user, &tmp, *len) != 0) { BUGPRINT("c2u Didn't work\n"); ret = -EFAULT; break; @@ -2375,8 +2375,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, } #endif -static struct nf_sockopt_ops ebt_sockopts = -{ +static struct nf_sockopt_ops ebt_sockopts = { .pf = PF_INET, .set_optmin = EBT_BASE_CTL, .set_optmax = EBT_SO_SET_MAX + 1, diff --git a/net/can/gw.c b/net/can/gw.c index 3f9b0f3..88c8a39 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -844,8 +844,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (!gwj->src.dev) goto out; - /* check for CAN netdev not using header_ops - see gw_rcv() */ - if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops) + if (gwj->src.dev->type != ARPHRD_CAN) goto put_src_out; gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx); @@ -853,8 +852,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (!gwj->dst.dev) goto put_src_out; - /* check for CAN netdev not using header_ops - see gw_rcv() */ - if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) + if (gwj->dst.dev->type != ARPHRD_CAN) goto put_src_dst_out; gwj->limit_hops = limhops; diff --git a/net/compat.c b/net/compat.c index 618c6a8..dd32e34 100644 --- a/net/compat.c +++ b/net/compat.c @@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/core/Makefile b/net/core/Makefile index b33b996..9628c20 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o -obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/dev.c b/net/core/dev.c index ba3b7ea..e5e23d7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -480,7 +480,7 @@ EXPORT_SYMBOL(dev_add_offload); * and must not be freed until after all the CPU's have gone * through a quiescent state. */ -void __dev_remove_offload(struct packet_offload *po) +static void __dev_remove_offload(struct packet_offload *po) { struct list_head *head = &offload_base; struct packet_offload *po1; @@ -498,7 +498,6 @@ void __dev_remove_offload(struct packet_offload *po) out: spin_unlock(&offload_lock); } -EXPORT_SYMBOL(__dev_remove_offload); /** * dev_remove_offload - remove packet offload handler @@ -1566,14 +1565,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, - struct netdev_notifier_info *info) +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info) { ASSERT_RTNL(); netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } -EXPORT_SYMBOL(call_netdevice_notifiers_info); /** * call_netdevice_notifiers - call all network notifier blocks @@ -2145,30 +2144,42 @@ void __netif_schedule(struct Qdisc *q) } EXPORT_SYMBOL(__netif_schedule); -void dev_kfree_skb_irq(struct sk_buff *skb) +struct dev_kfree_skb_cb { + enum skb_free_reason reason; +}; + +static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) +{ + return (struct dev_kfree_skb_cb *)skb->cb; +} + +void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) { - if (atomic_dec_and_test(&skb->users)) { - struct softnet_data *sd; - unsigned long flags; + unsigned long flags; - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - skb->next = sd->completion_queue; - sd->completion_queue = skb; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); + if (likely(atomic_read(&skb->users) == 1)) { + smp_rmb(); + atomic_set(&skb->users, 0); + } else if (likely(!atomic_dec_and_test(&skb->users))) { + return; } + get_kfree_skb_cb(skb)->reason = reason; + local_irq_save(flags); + skb->next = __this_cpu_read(softnet_data.completion_queue); + __this_cpu_write(softnet_data.completion_queue, skb); + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); } -EXPORT_SYMBOL(dev_kfree_skb_irq); +EXPORT_SYMBOL(__dev_kfree_skb_irq); -void dev_kfree_skb_any(struct sk_buff *skb) +void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) { if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); + __dev_kfree_skb_irq(skb, reason); else dev_kfree_skb(skb); } -EXPORT_SYMBOL(dev_kfree_skb_any); +EXPORT_SYMBOL(__dev_kfree_skb_any); /** @@ -2442,13 +2453,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) { struct dev_gso_cb *cb; - do { - struct sk_buff *nskb = skb->next; - - skb->next = nskb->next; - nskb->next = NULL; - kfree_skb(nskb); - } while (skb->next); + kfree_skb_list(skb->next); + skb->next = NULL; cb = DEV_GSO_CB(skb); if (cb->destructor) @@ -2523,21 +2529,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) } EXPORT_SYMBOL(netif_skb_features); -/* - * Returns true if either: - * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG. - */ -static inline int skb_needs_linearize(struct sk_buff *skb, - netdev_features_t features) -{ - return skb_is_nonlinear(skb) && - ((skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && - !(features & NETIF_F_SG))); -} - int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, void *accel_priv) { @@ -2750,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); @@ -3009,7 +3000,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_rxhash(skb)) + if (!skb_get_hash(skb)) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3154,7 +3145,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; @@ -3306,7 +3297,10 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); - trace_kfree_skb(skb, net_tx_action); + if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) + trace_consume_skb(skb); + else + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } @@ -3752,7 +3746,7 @@ static int napi_gro_complete(struct sk_buff *skb) if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->callbacks.gro_complete(skb); + err = ptype->callbacks.gro_complete(skb, 0); break; } rcu_read_unlock(); @@ -3818,6 +3812,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) } } +static void skb_gro_reset_offset(struct sk_buff *skb) +{ + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + + if (skb_mac_header(skb) == skb_tail_pointer(skb) && + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + } +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -3833,6 +3844,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; + skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); rcu_read_lock(); @@ -3938,27 +3950,8 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } -static void skb_gro_reset_offset(struct sk_buff *skb) -{ - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; - - NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - - if (skb_mac_header(skb) == skb_tail_pointer(skb) && - pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { - NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); - } -} - gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - skb_gro_reset_offset(skb); - return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -3981,8 +3974,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); - if (skb) - napi->skb = skb; + napi->skb = skb; } return skb; } @@ -3993,12 +3985,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * { switch (ret) { case GRO_NORMAL: - case GRO_HELD: - skb->protocol = eth_type_trans(skb, skb->dev); - - if (ret == GRO_HELD) - skb_gro_pull(skb, -ETH_HLEN); - else if (netif_receive_skb(skb)) + if (netif_receive_skb(skb)) ret = GRO_DROP; break; @@ -4007,6 +3994,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; + case GRO_HELD: case GRO_MERGED: break; } @@ -4017,36 +4005,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; - struct ethhdr *eth; - unsigned int hlen; - unsigned int off; napi->skb = NULL; - skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); - - off = skb_gro_offset(skb); - hlen = off + sizeof(*eth); - eth = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - eth = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!eth)) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { + napi_reuse_skb(napi, skb); + return NULL; } + skb->protocol = eth_type_trans(skb, skb->dev); - skb_gro_pull(skb, sizeof(*eth)); - - /* - * This works because the only protocols we care about don't require - * special handling. We'll fix it up properly at the end. - */ - skb->protocol = eth->h_proto; - -out: return skb; } @@ -4062,7 +4029,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) EXPORT_SYMBOL(napi_gro_frags); /* - * net_rps_action sends any pending IPI's for rps. + * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. */ static void net_rps_action_and_irq_enable(struct softnet_data *sd) @@ -4267,17 +4234,10 @@ EXPORT_SYMBOL(netif_napi_add); void netif_napi_del(struct napi_struct *napi) { - struct sk_buff *skb, *next; - list_del_init(&napi->dev_list); napi_free_frags(napi); - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - kfree_skb(skb); - } - + kfree_skb_list(napi->gro_list); napi->gro_list = NULL; napi->gro_count = 0; } @@ -4394,19 +4354,6 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *adj_list) -{ - struct netdev_adjacent *adj; - - list_for_each_entry_rcu(adj, adj_list, list) { - if (adj->dev == adj_dev) - return adj; - } - return NULL; -} - static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, struct net_device *adj_dev, struct list_head *adj_list) @@ -4445,13 +4392,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -bool netdev_has_any_upper_dev(struct net_device *dev) +static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->all_adj_list.upper); } -EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device @@ -4500,7 +4446,7 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); @@ -4571,6 +4517,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); /** + * netdev_lower_get_first_private_rcu - Get the first ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * + * Gets the first netdev_adjacent->private from the dev's lower neighbour + * list. The caller must hold RCU read lock. + */ +void *netdev_lower_get_first_private_rcu(struct net_device *dev) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->adj_list.lower, + struct netdev_adjacent, list); + if (lower) + return lower->private; + return NULL; +} +EXPORT_SYMBOL(netdev_lower_get_first_private_rcu); + +/** * netdev_master_upper_dev_get_rcu - Get master upper device * @dev: device * @@ -4662,9 +4629,9 @@ free_adj: return ret; } -void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *dev_list) +static void __netdev_adjacent_dev_remove(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) { struct netdev_adjacent *adj; char linkname[IFNAMSIZ+7]; @@ -4702,11 +4669,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, kfree_rcu(adj, rcu); } -int __netdev_adjacent_dev_link_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list, - void *private, bool master) +static int __netdev_adjacent_dev_link_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list, + void *private, bool master) { int ret; @@ -4725,8 +4692,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev, return 0; } -int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) +static int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *upper_dev) { return __netdev_adjacent_dev_link_lists(dev, upper_dev, &dev->all_adj_list.upper, @@ -4734,26 +4701,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev, NULL, false); } -void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list) +static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list) { __netdev_adjacent_dev_remove(dev, upper_dev, up_list); __netdev_adjacent_dev_remove(upper_dev, dev, down_list); } -void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink_lists(dev, upper_dev, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } -int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *upper_dev, - void *private, bool master) +static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *upper_dev, + void *private, bool master) { int ret = __netdev_adjacent_dev_link(dev, upper_dev); @@ -4772,8 +4739,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, return 0; } -void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink(dev, upper_dev); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, @@ -4962,21 +4929,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void *netdev_lower_dev_get_private_rcu(struct net_device *dev, - struct net_device *lower_dev) -{ - struct netdev_adjacent *lower; - - if (!lower_dev) - return NULL; - lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); - if (!lower) - return NULL; - - return lower->private; -} -EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); - void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev) { diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index ec40a84..bb504a9 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -186,47 +186,6 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, return err; } -int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type) -{ - int err; - struct netdev_hw_addr *ha, *ha2; - unsigned char type; - - list_for_each_entry(ha, &from_list->list, list) { - type = addr_type ? addr_type : ha->type; - err = __hw_addr_add(to_list, ha->addr, addr_len, type); - if (err) - goto unroll; - } - return 0; - -unroll: - list_for_each_entry(ha2, &from_list->list, list) { - if (ha2 == ha) - break; - type = addr_type ? addr_type : ha2->type; - __hw_addr_del(to_list, ha2->addr, addr_len, type); - } - return err; -} -EXPORT_SYMBOL(__hw_addr_add_multiple); - -void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type) -{ - struct netdev_hw_addr *ha; - unsigned char type; - - list_for_each_entry(ha, &from_list->list, list) { - type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, ha->addr, addr_len, type); - } -} -EXPORT_SYMBOL(__hw_addr_del_multiple); - /* This function only works where there is a strict 1-1 relationship * between source and destionation of they synch. If you ever need to * sync addresses to more then 1 destination, you need to use @@ -264,7 +223,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_unsync); -void __hw_addr_flush(struct netdev_hw_addr_list *list) +static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; @@ -274,7 +233,6 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list) } list->count = 0; } -EXPORT_SYMBOL(__hw_addr_flush); void __hw_addr_init(struct netdev_hw_addr_list *list) { @@ -400,59 +358,6 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr, } EXPORT_SYMBOL(dev_addr_del); -/** - * dev_addr_add_multiple - Add device addresses from another device - * @to_dev: device to which addresses will be added - * @from_dev: device from which addresses will be added - * @addr_type: address type - 0 means type will be used from from_dev - * - * Add device addresses of the one device to another. - ** - * The caller must hold the rtnl_mutex. - */ -int dev_addr_add_multiple(struct net_device *to_dev, - struct net_device *from_dev, - unsigned char addr_type) -{ - int err; - - ASSERT_RTNL(); - - if (from_dev->addr_len != to_dev->addr_len) - return -EINVAL; - err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, - to_dev->addr_len, addr_type); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); - return err; -} -EXPORT_SYMBOL(dev_addr_add_multiple); - -/** - * dev_addr_del_multiple - Delete device addresses by another device - * @to_dev: device where the addresses will be deleted - * @from_dev: device supplying the addresses to be deleted - * @addr_type: address type - 0 means type will be used from from_dev - * - * Deletes addresses in to device by the list of addresses in from device. - * - * The caller must hold the rtnl_mutex. - */ -int dev_addr_del_multiple(struct net_device *to_dev, - struct net_device *from_dev, - unsigned char addr_type) -{ - ASSERT_RTNL(); - - if (from_dev->addr_len != to_dev->addr_len) - return -EINVAL; - __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, - to_dev->addr_len, addr_type); - call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); - return 0; -} -EXPORT_SYMBOL(dev_addr_del_multiple); - /* * Unicast list handling functions */ diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5b7d0e1..cf999e0 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -327,6 +327,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCBRADDIF || cmd == SIOCBRDELIF || cmd == SIOCSHWTSTAMP || + cmd == SIOCGHWTSTAMP || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; if (ops->ndo_do_ioctl) { @@ -546,6 +547,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) */ default: if (cmd == SIOCWANDEV || + cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { dev_load(net, ifr.ifr_name); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 9589718..e70301e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = { .hdrsize = 0, .name = "NET_DM", .version = 2, - .maxattr = NET_DM_CMD_MAX, }; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d6ef173..b324bfa 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -202,12 +202,12 @@ static __always_inline u32 __flow_hash_1word(u32 a) } /* - * __skb_get_rxhash: calculate a flow hash based on src/dst addresses + * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets rxhash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb * if hash is a canonical 4-tuple hash over transport ports. */ -void __skb_get_rxhash(struct sk_buff *skb) +void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; u32 hash; @@ -234,7 +234,7 @@ void __skb_get_rxhash(struct sk_buff *skb) skb->rxhash = hash; } -EXPORT_SYMBOL(__skb_get_rxhash); +EXPORT_SYMBOL(__skb_get_hash); /* * Returns a Tx hash based on the given packet descriptor a Tx queues' number diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ca15f32..ea97361 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -38,6 +38,8 @@ #include <linux/random.h> #include <linux/string.h> #include <linux/log2.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> #define DEBUG #define NEIGH_DEBUG 1 @@ -497,7 +499,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, goto out_neigh_release; } - n->confirmed = jiffies - (n->parms->base_reachable_time << 1); + n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, @@ -776,7 +778,7 @@ static void neigh_periodic_work(struct work_struct *work) tbl->last_rand = jiffies; for (p = &tbl->parms; p; p = p->next) p->reachable_time = - neigh_rand_reach_time(p->base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } for (i = 0 ; i < (1 << nht->hash_shift); i++) { @@ -799,7 +801,7 @@ static void neigh_periodic_work(struct work_struct *work) if (atomic_read(&n->refcnt) == 1 && (state == NUD_FAILED || - time_after(jiffies, n->used + n->parms->gc_staletime))) { + time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { *np = n->next; n->dead = 1; write_unlock(&n->lock); @@ -822,12 +824,12 @@ next_elt: lockdep_is_held(&tbl->lock)); } out: - /* Cycle through all hash buckets every base_reachable_time/2 ticks. - * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 - * base_reachable_time. + /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. + * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 + * BASE_REACHABLE_TIME. */ schedule_delayed_work(&tbl->gc_work, - tbl->parms.base_reachable_time >> 1); + NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); write_unlock_bh(&tbl->lock); } @@ -835,8 +837,9 @@ static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; return (n->nud_state & NUD_PROBE) ? - p->ucast_probes : - p->ucast_probes + p->app_probes + p->mcast_probes; + NEIGH_VAR(p, UCAST_PROBES) : + NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + + NEIGH_VAR(p, MCAST_PROBES); } static void neigh_invalidate(struct neighbour *neigh) @@ -901,12 +904,13 @@ static void neigh_timer_handler(unsigned long arg) neigh_dbg(2, "neigh %p is still alive\n", neigh); next = neigh->confirmed + neigh->parms->reachable_time; } else if (time_before_eq(now, - neigh->used + neigh->parms->delay_probe_time)) { + neigh->used + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_suspect(neigh); - next = now + neigh->parms->delay_probe_time; + next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->nud_state = NUD_STALE; @@ -916,7 +920,8 @@ static void neigh_timer_handler(unsigned long arg) } } else if (state & NUD_DELAY) { if (time_before_eq(now, - neigh->confirmed + neigh->parms->delay_probe_time)) { + neigh->confirmed + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is now reachable\n", neigh); neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; @@ -928,11 +933,11 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); - next = now + neigh->parms->retrans_time; + next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } } else { /* NUD_PROBE|NUD_INCOMPLETE */ - next = now + neigh->parms->retrans_time; + next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -973,13 +978,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) goto out_unlock_bh; if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { - if (neigh->parms->mcast_probes + neigh->parms->app_probes) { + if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + + NEIGH_VAR(neigh->parms, APP_PROBES)) { unsigned long next, now = jiffies; - atomic_set(&neigh->probes, neigh->parms->ucast_probes); + atomic_set(&neigh->probes, + NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; - next = now + max(neigh->parms->retrans_time, HZ/2); + next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), + HZ/2); neigh_add_timer(neigh, next); immediate_probe = true; } else { @@ -994,14 +1002,14 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; - neigh_add_timer(neigh, - jiffies + neigh->parms->delay_probe_time); + neigh_add_timer(neigh, jiffies + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); } if (neigh->nud_state == NUD_INCOMPLETE) { if (skb) { while (neigh->arp_queue_len_bytes + skb->truesize > - neigh->parms->queue_len_bytes) { + NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { struct sk_buff *buff; buff = __skb_dequeue(&neigh->arp_queue); @@ -1161,6 +1169,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->parms->reachable_time : 0))); neigh->nud_state = new; + notify = 1; } if (lladdr != neigh->ha) { @@ -1170,7 +1179,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - - (neigh->parms->base_reachable_time << 1); + (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); notify = 1; } if (new == old) @@ -1230,6 +1239,21 @@ out: } EXPORT_SYMBOL(neigh_update); +/* Update the neigh to listen temporarily for probe responses, even if it is + * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. + */ +void __neigh_set_probe_once(struct neighbour *neigh) +{ + neigh->updated = jiffies; + if (!(neigh->nud_state & NUD_FAILED)) + return; + neigh->nud_state = NUD_PROBE; + atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh_add_timer(neigh, + jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); +} +EXPORT_SYMBOL(__neigh_set_probe_once); + struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, struct net_device *dev) @@ -1274,7 +1298,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 && - dev->header_ops->rebuild(skb)) + dev_rebuild_header(skb)) return 0; return dev_queue_xmit(skb); @@ -1391,9 +1415,10 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { unsigned long now = jiffies; - unsigned long sched_next = now + (net_random() % p->proxy_delay); + unsigned long sched_next = now + (net_random() % + NEIGH_VAR(p, PROXY_DELAY)); - if (tbl->proxy_queue.qlen > p->proxy_qlen) { + if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); return; } @@ -1440,7 +1465,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->tbl = tbl; atomic_set(&p->refcnt, 1); p->reachable_time = - neigh_rand_reach_time(p->base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); p->dev = dev; write_pnet(&p->net, hold_net(net)); @@ -1457,6 +1482,8 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->next = tbl->parms.next; tbl->parms.next = p; write_unlock_bh(&tbl->lock); + + neigh_parms_data_state_cleanall(p); } return p; } @@ -1509,7 +1536,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = - neigh_rand_reach_time(tbl->parms.base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) @@ -1777,24 +1804,32 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) if ((parms->dev && nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || - nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || + nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, + NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || /* approximative value for deprecated QUEUE_LEN (in packets) */ nla_put_u32(skb, NDTPA_QUEUE_LEN, - parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || - nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || - nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || - nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || - nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) || + NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || + nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || + nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || + nla_put_u32(skb, NDTPA_UCAST_PROBES, + NEIGH_VAR(parms, UCAST_PROBES)) || + nla_put_u32(skb, NDTPA_MCAST_PROBES, + NEIGH_VAR(parms, MCAST_PROBES)) || nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, - parms->base_reachable_time) || - nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) || + NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || + nla_put_msecs(skb, NDTPA_GC_STALETIME, + NEIGH_VAR(parms, GC_STALETIME)) || nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, - parms->delay_probe_time) || - nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) || - nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) || - nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) || - nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime)) + NEIGH_VAR(parms, DELAY_PROBE_TIME)) || + nla_put_msecs(skb, NDTPA_RETRANS_TIME, + NEIGH_VAR(parms, RETRANS_TIME)) || + nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, + NEIGH_VAR(parms, ANYCAST_DELAY)) || + nla_put_msecs(skb, NDTPA_PROXY_DELAY, + NEIGH_VAR(parms, PROXY_DELAY)) || + nla_put_msecs(skb, NDTPA_LOCKTIME, + NEIGH_VAR(parms, LOCKTIME))) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -2010,44 +2045,54 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) switch (i) { case NDTPA_QUEUE_LEN: - p->queue_len_bytes = nla_get_u32(tbp[i]) * - SKB_TRUESIZE(ETH_FRAME_LEN); + NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, + nla_get_u32(tbp[i]) * + SKB_TRUESIZE(ETH_FRAME_LEN)); break; case NDTPA_QUEUE_LENBYTES: - p->queue_len_bytes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, + nla_get_u32(tbp[i])); break; case NDTPA_PROXY_QLEN: - p->proxy_qlen = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, PROXY_QLEN, + nla_get_u32(tbp[i])); break; case NDTPA_APP_PROBES: - p->app_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, APP_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_UCAST_PROBES: - p->ucast_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, UCAST_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_MCAST_PROBES: - p->mcast_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, MCAST_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_BASE_REACHABLE_TIME: - p->base_reachable_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_GC_STALETIME: - p->gc_staletime = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, GC_STALETIME, + nla_get_msecs(tbp[i])); break; case NDTPA_DELAY_PROBE_TIME: - p->delay_probe_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, DELAY_PROBE_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_RETRANS_TIME: - p->retrans_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, RETRANS_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_ANYCAST_DELAY: - p->anycast_delay = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, ANYCAST_DELAY, nla_get_msecs(tbp[i])); break; case NDTPA_PROXY_DELAY: - p->proxy_delay = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, PROXY_DELAY, nla_get_msecs(tbp[i])); break; case NDTPA_LOCKTIME: - p->locktime = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, LOCKTIME, nla_get_msecs(tbp[i])); break; } } @@ -2788,133 +2833,167 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, return ret; } -enum { - NEIGH_VAR_MCAST_PROBE, - NEIGH_VAR_UCAST_PROBE, - NEIGH_VAR_APP_PROBE, - NEIGH_VAR_RETRANS_TIME, - NEIGH_VAR_BASE_REACHABLE_TIME, - NEIGH_VAR_DELAY_PROBE_TIME, - NEIGH_VAR_GC_STALETIME, - NEIGH_VAR_QUEUE_LEN, - NEIGH_VAR_QUEUE_LEN_BYTES, - NEIGH_VAR_PROXY_QLEN, - NEIGH_VAR_ANYCAST_DELAY, - NEIGH_VAR_PROXY_DELAY, - NEIGH_VAR_LOCKTIME, - NEIGH_VAR_RETRANS_TIME_MS, - NEIGH_VAR_BASE_REACHABLE_TIME_MS, - NEIGH_VAR_GC_INTERVAL, - NEIGH_VAR_GC_THRESH1, - NEIGH_VAR_GC_THRESH2, - NEIGH_VAR_GC_THRESH3, - NEIGH_VAR_MAX -}; +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, + int index) +{ + struct net_device *dev; + int family = neigh_parms_family(p); + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + struct neigh_parms *dst_p = + neigh_get_dev_parms_rcu(dev, family); + + if (dst_p && !test_bit(index, dst_p->data_state)) + dst_p->data[index] = p->data[index]; + } + rcu_read_unlock(); +} + +static void neigh_proc_update(struct ctl_table *ctl, int write) +{ + struct net_device *dev = ctl->extra1; + struct neigh_parms *p = ctl->extra2; + struct net *net = neigh_parms_net(p); + int index = (int *) ctl->data - p->data; + + if (!write) + return; + + set_bit(index, p->data_state); + if (!dev) /* NULL dev means this is default value */ + neigh_copy_dflt_parms(net, p, index); +} + +static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp = *ctl; + int ret; + + tmp.extra1 = &zero; + tmp.extra2 = &int_max; + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + neigh_proc_update(ctl, write); + return ret; +} + +int neigh_proc_dointvec(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec); + +int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); + +static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} + +int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); + +static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} + +#define NEIGH_PARMS_DATA_OFFSET(index) \ + (&((struct neigh_parms *) 0)->data[index]) + +#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ + [NEIGH_VAR_ ## attr] = { \ + .procname = name, \ + .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ + .maxlen = sizeof(int), \ + .mode = mval, \ + .proc_handler = proc, \ + } + +#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) + +#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) + +#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) + +#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) + +#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) + +#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; } neigh_sysctl_template __read_mostly = { .neigh_vars = { - [NEIGH_VAR_MCAST_PROBE] = { - .procname = "mcast_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_UCAST_PROBE] = { - .procname = "ucast_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_APP_PROBE] = { - .procname = "app_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_RETRANS_TIME] = { - .procname = "retrans_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_BASE_REACHABLE_TIME] = { - .procname = "base_reachable_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NEIGH_VAR_DELAY_PROBE_TIME] = { - .procname = "delay_first_probe_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NEIGH_VAR_GC_STALETIME] = { - .procname = "gc_stale_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NEIGH_VAR_QUEUE_LEN] = { - .procname = "unres_qlen", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_unres_qlen, - }, - [NEIGH_VAR_QUEUE_LEN_BYTES] = { - .procname = "unres_qlen_bytes", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_PROXY_QLEN] = { - .procname = "proxy_qlen", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_ANYCAST_DELAY] = { - .procname = "anycast_delay", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_PROXY_DELAY] = { - .procname = "proxy_delay", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_LOCKTIME] = { - .procname = "locktime", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_RETRANS_TIME_MS] = { - .procname = "retrans_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = { - .procname = "base_reachable_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), + NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), + NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), + NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), [NEIGH_VAR_GC_INTERVAL] = { .procname = "gc_interval", .maxlen = sizeof(int), @@ -2950,31 +3029,23 @@ static struct neigh_sysctl_table { }; int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, - char *p_name, proc_handler *handler) + proc_handler *handler) { + int i; struct neigh_sysctl_table *t; - const char *dev_name_source = NULL; + const char *dev_name_source; char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; + char *p_name; t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); if (!t) goto err; - t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; - t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; - t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; - t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time; - t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime; - t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes; - t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes; - t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen; - t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay; - t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay; - t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time; + for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) { + t->neigh_vars[i].data += (long) p; + t->neigh_vars[i].extra1 = dev; + t->neigh_vars[i].extra2 = p; + } if (dev) { dev_name_source = dev->name; @@ -2989,26 +3060,32 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; } - if (handler) { /* RetransTime */ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev; /* ReachableTime */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev; /* RetransTime (in milliseconds)*/ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev; /* ReachableTime (in milliseconds) */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; } /* Don't export sysctls to unprivileged users */ if (neigh_parms_net(p)->user_ns != &init_user_ns) t->neigh_vars[0].procname = NULL; + switch (neigh_parms_family(p)) { + case AF_INET: + p_name = "ipv4"; + break; + case AF_INET6: + p_name = "ipv6"; + break; + default: + BUG(); + } + snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", p_name, dev_name_source); t->sysctl_header = diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f3edf96..49843bf 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -676,8 +676,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, while ((mask | (mask >> 1)) != mask) mask |= (mask >> 1); /* On 64 bit arches, must check mask fits in table->mask (u32), - * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) - * doesnt overflow. + * and on 32bit arches, must check + * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow. */ #if BITS_PER_LONG > 32 if (mask > (unsigned long)(u32)mask) @@ -1358,7 +1358,7 @@ void netdev_class_remove_file_ns(struct class_attribute *class_attr, } EXPORT_SYMBOL(netdev_class_remove_file_ns); -int netdev_kobject_init(void) +int __init netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); return class_register(&net_class); diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index bd7751e..2745a1b 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -1,7 +1,7 @@ #ifndef __NET_SYSFS_H__ #define __NET_SYSFS_H__ -int netdev_kobject_init(void); +int __init netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c new file mode 100644 index 0000000..719efd5 --- /dev/null +++ b/net/core/netclassid_cgroup.c @@ -0,0 +1,120 @@ +/* + * net/core/netclassid_cgroup.c Classid Cgroupfs Handling + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/cgroup.h> +#include <linux/fdtable.h> +#include <net/cls_cgroup.h> +#include <net/sock.h> + +static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct cgroup_cls_state, css) : NULL; +} + +struct cgroup_cls_state *task_cls_state(struct task_struct *p) +{ + return css_cls_state(task_css(p, net_cls_subsys_id)); +} +EXPORT_SYMBOL_GPL(task_cls_state); + +static struct cgroup_subsys_state * +cgrp_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct cgroup_cls_state *cs; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return ERR_PTR(-ENOMEM); + + return &cs->css; +} + +static int cgrp_css_online(struct cgroup_subsys_state *css) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); + + if (parent) + cs->classid = parent->classid; + + return 0; +} + +static void cgrp_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_cls_state(css)); +} + +static int update_classid(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + + if (sock) + sock->sk->sk_classid = (u32)(unsigned long)v; + + return 0; +} + +static void cgrp_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + void *v = (void *)(unsigned long)cs->classid; + struct task_struct *p; + + cgroup_taskset_for_each(p, css, tset) { + task_lock(p); + iterate_fd(p->files, 0, update_classid, v); + task_unlock(p); + } +} + +static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return css_cls_state(css)->classid; +} + +static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, + u64 value) +{ + css_cls_state(css)->classid = (u32) value; + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, + { } /* terminate */ +}; + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .css_alloc = cgrp_css_alloc, + .css_online = cgrp_css_online, + .css_free = cgrp_css_free, + .attach = cgrp_attach, + .subsys_id = net_cls_subsys_id, + .base_cftypes = ss_files, + .module = THIS_MODULE, +}; + +static int __init init_netclassid_cgroup(void) +{ + return cgroup_load_subsys(&net_cls_subsys); +} +__initcall(init_netclassid_cgroup); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8f97199..3030978 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, !vlan_hw_offload_capable(netif_skb_features(skb), skb->vlan_proto)) { skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) - break; + if (unlikely(!skb)) { + /* This is actually a packet drop, but we + * don't want the code at the end of this + * function to try and re-queue a NULL skb. + */ + status = NETDEV_TX_OK; + goto unlock_txq; + } skb->vlan_tci = 0; } @@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (status == NETDEV_TX_OK) txq_trans_update(txq); } + unlock_txq: __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9b7cf6c..1dda50c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -30,7 +30,7 @@ #define PRIOMAP_MIN_SZ 128 /* - * Extend @dev->priomap so that it's large enough to accomodate + * Extend @dev->priomap so that it's large enough to accommodate * @target_idx. @dev->priomap.priomap_len > @target_idx after successful * return. Must be called under rtnl lock. */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 261357a..a797fff 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2527,6 +2527,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2548,6 +2550,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf67144..e6e7d58 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -403,34 +403,16 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) } /** - * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. - * @ops: struct rtnl_af_ops * to register - * - * The caller must hold the rtnl_mutex. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_af_register(struct rtnl_af_ops *ops) -{ - list_add_tail(&ops->list, &rtnl_af_ops); - return 0; -} -EXPORT_SYMBOL_GPL(__rtnl_af_register); - -/** * rtnl_af_register - Register rtnl_af_ops with rtnetlink. * @ops: struct rtnl_af_ops * to register * * Returns 0 on success or a negative error code. */ -int rtnl_af_register(struct rtnl_af_ops *ops) +void rtnl_af_register(struct rtnl_af_ops *ops) { - int err; - rtnl_lock(); - err = __rtnl_af_register(ops); + list_add_tail(&ops->list, &rtnl_af_ops); rtnl_unlock(); - return err; } EXPORT_SYMBOL_GPL(rtnl_af_register); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 55859cb..1d641e7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -712,9 +712,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->inner_network_header = old->inner_network_header; new->inner_mac_header = old->inner_mac_header; skb_dst_copy(new, old); - new->rxhash = old->rxhash; + skb_copy_hash(new, old); new->ooo_okay = old->ooo_okay; - new->l4_rxhash = old->l4_rxhash; new->no_fcs = old->no_fcs; new->encapsulation = old->encapsulation; #ifdef CONFIG_XFRM @@ -3067,10 +3066,7 @@ perform_csum_check: return segs; err: - while ((skb = segs)) { - segs = skb->next; - kfree_skb(skb); - } + kfree_skb_list(segs); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); @@ -3669,6 +3665,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; + skb->local_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); diff --git a/net/core/sock.c b/net/core/sock.c index ab20ed9..85ad6f0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -882,7 +882,7 @@ set_rcvbuf: case SO_PEEK_OFF: if (sock->ops->set_peek_off) - sock->ops->set_peek_off(sk, val); + ret = sock->ops->set_peek_off(sk, val); else ret = -EOPNOTSUPP; break; @@ -925,8 +925,8 @@ set_rcvbuf: EXPORT_SYMBOL(sock_setsockopt); -void cred_to_ucred(struct pid *pid, const struct cred *cred, - struct ucred *ucred) +static void cred_to_ucred(struct pid *pid, const struct cred *cred, + struct ucred *ucred) { ucred->pid = pid_vnr(pid); ucred->uid = ucred->gid = -1; @@ -937,7 +937,6 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, ucred->gid = from_kgid_munged(current_ns, cred->egid); } } -EXPORT_SYMBOL_GPL(cred_to_ucred); int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) @@ -1308,19 +1307,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) -{ - u32 classid; - - classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; -} -EXPORT_SYMBOL(sock_update_classid); -#endif - -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) @@ -1666,22 +1653,6 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, EXPORT_SYMBOL(sock_wmalloc); /* - * Allocate a skb from the socket's receive buffer. - */ -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - gfp_t priority) -{ - if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { - struct sk_buff *skb = alloc_skb(size, priority); - if (skb) { - skb_set_owner_r(skb, sk); - return skb; - } - } - return NULL; -} - -/* * Allocate a memory block from the socket's option memory buffer. */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cca4441..cf9cd13 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -122,7 +122,8 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, synchronize_rcu(); kfree(cur); } else if (!cur && cpumask_test_cpu(i, mask)) { - cur = kzalloc(len, GFP_KERNEL); + cur = kzalloc_node(len, GFP_KERNEL, + cpu_to_node(i)); if (!cur) { /* not unwinding previous changes */ ret = -ENOMEM; diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c index 4f72fc4..a520d80 100644 --- a/net/dcb/dcbevent.c +++ b/net/dcb/dcbevent.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: John Fastabend <john.r.fastabend@intel.com> */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 40d5829..66fbe19 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: Lucy Liu <lucy.liu@intel.com> */ diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 62b5828..c073b81 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -8,7 +8,7 @@ #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -bool tfrc_debug; +static bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 40ee7d6..a3d8f7c 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,7 +21,6 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 3094878..c678166 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -479,7 +479,6 @@ void dccp_feat_list_purge(struct list_head *fn_list); int dccp_insert_options(struct sock *sk, struct sk_buff *skb); int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *); -int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); u32 dccp_timestamp(void); void dccp_timestamping_init(void); int dccp_insert_option(struct sk_buff *skb, unsigned char option, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d9f65fc..88299c2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -75,7 +75,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - orig_sport, orig_dport, sk, true); + orig_sport, orig_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4ac71ff..4db3c2a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -141,6 +141,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; + if (!ip6_sk_accept_pmtu(sk)) + goto out; + if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) @@ -237,7 +240,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -301,7 +304,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); @@ -512,7 +515,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl6.fl6_sport = htons(ireq->ir_num); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) goto out; } @@ -851,7 +854,6 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } @@ -932,7 +934,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; diff --git a/net/dccp/options.c b/net/dccp/options.c index a58e0b6..9bce318 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -343,38 +343,6 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } -/* FIXME: This function is currently not used anywhere */ -int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) -{ - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 2 + elapsed_time_len; - unsigned char *to; - - if (elapsed_time_len == 0) - return 0; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ELAPSED_TIME; - *to++ = len; - - if (elapsed_time_len == 2) { - const __be16 var16 = htons((u16)elapsed_time); - memcpy(to, &var16, 2); - } else { - const __be32 var32 = htonl(elapsed_time); - memcpy(to, &var32, 4); - } - - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); - static int dccp_insert_option_timestamp(struct sk_buff *skb) { __be32 now = htonl(dccp_timestamp()); diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 4c6bdf9..595ddf0 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -152,17 +152,6 @@ static const struct file_operations dccpprobe_fops = { .llseek = noop_llseek, }; -static __init int setup_jprobe(void) -{ - int ret = register_jprobe(&dccp_send_probe); - - if (ret) { - request_module("dccp"); - ret = register_jprobe(&dccp_send_probe); - } - return ret; -} - static __init int dccpprobe_init(void) { int ret = -ENOMEM; @@ -174,7 +163,13 @@ static __init int dccpprobe_init(void) if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) goto err0; - ret = setup_jprobe(); + ret = register_jprobe(&dccp_send_probe); + if (ret) { + ret = request_module("dccp"); + if (!ret) + ret = register_jprobe(&dccp_send_probe); + } + if (ret) goto err1; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index dd0dfb2..a603823 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -561,6 +561,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U16 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [IFA_FLAGS] = { .type = NLA_U32 }, }; static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -648,7 +649,8 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]); ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]); - ifa->ifa_flags = ifm->ifa_flags; + ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : + ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_dev = dn_db; @@ -669,7 +671,8 @@ static inline size_t dn_ifaddr_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + nla_total_size(2) /* IFA_ADDRESS */ - + nla_total_size(2); /* IFA_LOCAL */ + + nla_total_size(2) /* IFA_LOCAL */ + + nla_total_size(4); /* IFA_FLAGS */ } static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, @@ -677,6 +680,7 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) @@ -685,7 +689,7 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_DECnet; ifm->ifa_prefixlen = 16; - ifm->ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT; + ifm->ifa_flags = ifa_flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; @@ -694,7 +698,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, (ifa->ifa_local && nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) || (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + nla_put_u32(skb, IFA_FLAGS, ifa_flags)) goto nla_put_failure; return nlmsg_end(skb, nlh); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index f8637f9..c8121ce 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -102,19 +102,21 @@ struct neigh_table dn_neigh_table = { .id = "dn_neigh_cache", .parms ={ .tbl = &dn_neigh_table, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, - .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len_bytes = 64*1024, - .ucast_probes = 0, - .app_probes = 0, - .mcast_probes = 0, - .anycast_delay = 0, - .proxy_delay = 0, - .proxy_qlen = 0, - .locktime = 1 * HZ, + .reachable_time = 30 * HZ, + .data = { + [NEIGH_VAR_MCAST_PROBES] = 0, + [NEIGH_VAR_UCAST_PROBES] = 0, + [NEIGH_VAR_APP_PROBES] = 0, + [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, + [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, + [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_GC_STALETIME] = 60 * HZ, + [NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024, + [NEIGH_VAR_PROXY_QLEN] = 0, + [NEIGH_VAR_ANYCAST_DELAY] = 0, + [NEIGH_VAR_PROXY_DELAY] = 0, + [NEIGH_VAR_LOCKTIME] = 1 * HZ, + }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index fe32388..ad2efa5 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1288,8 +1288,6 @@ int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, stru err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->flowidn_proto) { - if (!(flags & MSG_DONTWAIT)) - fl->flowidn_flags |= FLOWI_FLAG_CAN_SLEEP; *pprt = xfrm_lookup(&init_net, *pprt, flowidn_to_flowi(fl), sk, 0); if (IS_ERR(*pprt)) { diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index f347a2c..bf85843 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -19,8 +19,7 @@ * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index c32be29..e7b6d53 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -32,8 +32,7 @@ * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h index 17c7886..7af1ed3 100644 --- a/net/dns_resolver/internal.h +++ b/net/dns_resolver/internal.h @@ -15,8 +15,7 @@ * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/compiler.h> diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 003f5bb..327060c 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -127,11 +127,6 @@ int hsr_create_self_node(struct list_head *self_node_db, return 0; } -static void node_entry_reclaim(struct rcu_head *rh) -{ - kfree(container_of(rh, struct node_entry, rcu_head)); -} - /* Add/merge node to the database of nodes. 'skb' must contain an HSR * supervision frame. @@ -175,7 +170,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) { /* Node has changed its AddrA, frame was received from SlaveB */ list_del_rcu(&node->mac_list); - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); node = NULL; } @@ -183,7 +178,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) { /* Cables have been swapped */ list_del_rcu(&node->mac_list); - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); node = NULL; } @@ -192,7 +187,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) { /* Cables have been swapped */ list_del_rcu(&node->mac_list); - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); node = NULL; } @@ -288,7 +283,8 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, static bool seq_nr_after(u16 a, u16 b) { /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) */ + * seq_nr_after(a, b) == seq_nr_before(a, b) + */ if ((int) b - a == 32768) return false; @@ -416,7 +412,7 @@ void hsr_prune_nodes(struct hsr_priv *hsr_priv) hsr_nl_nodedown(hsr_priv, node->MacAddressA); list_del_rcu(&node->mac_list); /* Note that we need to free this entry later: */ - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); } } rcu_read_unlock(); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 5325af8..01a5261 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,6 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -59,6 +61,31 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec); } +static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(dev); + + if (hsr_priv->slave[0]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex)) + goto nla_put_failure; + + if (hsr_priv->slave[1]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, + hsr_priv->sup_multicast_addr) || + nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static struct rtnl_link_ops hsr_link_ops __read_mostly = { .kind = "hsr", .maxtype = IFLA_HSR_MAX, @@ -66,6 +93,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, + .fill_info = hsr_fill_info, }; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 459e200..a2d2456 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -547,7 +547,7 @@ static int lowpan_header_create(struct sk_buff *skb, hc06_ptr += 3; } else { /* compress nothing */ - memcpy(hc06_ptr, &hdr, 4); + memcpy(hc06_ptr, hdr, 4); /* replace the top byte with new ECN | DSCP format */ *hc06_ptr = tmp; hc06_ptr += 4; diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index ef56ab5..4dd3761 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -46,7 +46,7 @@ MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB", ((signed char) (phy->transmit_power << 2)) >> 2, - (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1 ); + (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1); MASTER_SHOW(cca_mode, "%d"); static ssize_t channels_supported_show(struct device *dev, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70011e0..b8bc1a3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -126,9 +126,6 @@ static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); -struct ipv4_config ipv4_config; -EXPORT_SYMBOL(ipv4_config); - /* New destruction routine */ void inet_sock_destruct(struct sock *sk) @@ -342,7 +339,7 @@ lookup_protocol: inet->hdrincl = 1; } - if (ipv4_config.no_pmtu_disc) + if (net->ipv4.sysctl_ip_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1133,7 +1130,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk, false); + inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1377,8 +1374,12 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (!NAPI_GRO_CB(p)->same_flow) continue; - iph2 = ip_hdr(p); - + iph2 = (struct iphdr *)(p->data + off); + /* The above works because, with the exception of the top + * (inner most) layer, we only aggregate pkts with the same + * hdr length so all the hdrs we'll need to verify will start + * at the same offset. + */ if ((iph->protocol ^ iph2->protocol) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { @@ -1397,6 +1398,11 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, } NAPI_GRO_CB(skb)->flush |= flush; + skb_set_network_header(skb, off); + /* The above will be needed by the transport layer if there is one + * immediately following this IP hdr. + */ + skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); @@ -1411,10 +1417,10 @@ out: return pp; } -static int inet_gro_complete(struct sk_buff *skb) +static int inet_gro_complete(struct sk_buff *skb, int nhoff) { - __be16 newlen = htons(skb->len - skb_network_offset(skb)); - struct iphdr *iph = ip_hdr(skb); + __be16 newlen = htons(skb->len - nhoff); + struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); const struct net_offload *ops; int proto = iph->protocol; int err = -ENOSYS; @@ -1427,7 +1433,11 @@ static int inet_gro_complete(struct sk_buff *skb) if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->callbacks.gro_complete(skb); + /* Only need to add sizeof(*iph) to get to the next hdr below + * because any hdr with option will have been flushed in + * inet_gro_receive(). + */ + err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph)); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7808093..1a9b99e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -166,18 +166,20 @@ struct neigh_table arp_tbl = { .id = "arp_cache", .parms = { .tbl = &arp_tbl, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len_bytes = 64*1024, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, - .locktime = 1 * HZ, + .data = { + [NEIGH_VAR_MCAST_PROBES] = 3, + [NEIGH_VAR_UCAST_PROBES] = 3, + [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, + [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, + [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_GC_STALETIME] = 60 * HZ, + [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024, + [NEIGH_VAR_PROXY_QLEN] = 64, + [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, + [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, + [NEIGH_VAR_LOCKTIME] = 1 * HZ, + }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, @@ -359,14 +361,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) if (!saddr) saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); - probes -= neigh->parms->ucast_probes; + probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); neigh_ha_snapshot(dst_ha, neigh, dev); dst_hw = dst_ha; } else { - probes -= neigh->parms->app_probes; + probes -= NEIGH_VAR(neigh->parms, APP_PROBES); if (probes < 0) { neigh_app_ns(neigh); return; @@ -379,6 +381,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) { + struct net *net = dev_net(in_dev->dev); int scope; switch (IN_DEV_ARP_IGNORE(in_dev)) { @@ -397,6 +400,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) case 3: /* Do not reply for scope host addresses */ sip = 0; scope = RT_SCOPE_LINK; + in_dev = NULL; break; case 4: /* Reserved */ case 5: @@ -408,7 +412,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) default: return 0; } - return !inet_confirm_addr(in_dev, sip, tip, scope); + return !inet_confirm_addr(net, in_dev, sip, tip, scope); } static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) @@ -728,6 +732,7 @@ static int arp_process(struct sk_buff *skb) int addr_type; struct neighbour *n; struct net *net = dev_net(dev); + bool is_garp = false; /* arp_rcv below verifies the ARP header and verifies the device * is ARP'able. @@ -871,7 +876,7 @@ static int arp_process(struct sk_buff *skb) if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST || - in_dev->arp_parms->proxy_delay == 0) { + NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, dev->dev_addr, sha); @@ -894,10 +899,12 @@ static int arp_process(struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ + is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && + inet_addr_type(net, sip) == RTN_UNICAST; + if (n == NULL && - (arp->ar_op == htons(ARPOP_REPLY) || - (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) && - inet_addr_type(net, sip) == RTN_UNICAST) + ((arp->ar_op == htons(ARPOP_REPLY) && + inet_addr_type(net, sip) == RTN_UNICAST) || is_garp)) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } @@ -910,7 +917,10 @@ static int arp_process(struct sk_buff *skb) agents are active. Taking the first reply prevents arp trashing and chooses the fastest router. */ - override = time_after(jiffies, n->updated + n->parms->locktime); + override = time_after(jiffies, + n->updated + + NEIGH_VAR(n->parms, LOCKTIME)) || + is_garp; /* Broadcast replies and request packets do not assert neighbour reachability. @@ -1107,7 +1117,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -int arp_invalidate(struct net_device *dev, __be32 ip) +static int arp_invalidate(struct net_device *dev, __be32 ip) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; @@ -1122,7 +1132,6 @@ int arp_invalidate(struct net_device *dev, __be32 ip) return err; } -EXPORT_SYMBOL(arp_invalidate); static int arp_req_delete_public(struct net *net, struct arpreq *r, struct net_device *dev) @@ -1284,7 +1293,7 @@ void __init arp_init(void) dev_add_pack(&arp_packet_type); arp_proc_init(); #ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); + neigh_sysctl_register(NULL, &arp_tbl.parms, NULL); #endif register_netdevice_notifier(&arp_netdev_notifier); } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 667c1d4..69e77c8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -31,8 +31,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ @@ -1336,8 +1335,7 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; @@ -1432,8 +1430,7 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; @@ -1527,8 +1524,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 19e3637..8b5134c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -53,7 +53,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, - inet->inet_sport, usin->sin_port, sk, true); + inet->inet_sport, usin->sin_port, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a1b5bcb..0feebd5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, + [IFA_FLAGS] = { .type = NLA_U32 }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -500,6 +501,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) return -ENOBUFS; } ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); if (ifa->ifa_dev != in_dev) { WARN_ON(ifa->ifa_dev); in_dev_hold(in_dev); @@ -747,6 +749,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, goto errout; ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); in_dev_hold(in_dev); if (tb[IFA_ADDRESS] == NULL) @@ -755,7 +758,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, INIT_HLIST_NODE(&ifa->hash); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); - ifa->ifa_flags = ifm->ifa_flags; + ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : + ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_dev = in_dev; @@ -1236,22 +1240,21 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, /* * Confirm that local IP address exists using wildcards: - * - in_dev: only on this interface, 0=any interface + * - net: netns to check, cannot be NULL + * - in_dev: only on this interface, NULL=any interface * - dst: only in the same subnet as dst, 0=any dst * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -__be32 inet_confirm_addr(struct in_device *in_dev, +__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) { __be32 addr = 0; struct net_device *dev; - struct net *net; - if (scope != RT_SCOPE_LINK) + if (in_dev != NULL) return confirm_addr_indev(in_dev, dst, local, scope); - net = dev_net(in_dev->dev); rcu_read_lock(); for_each_netdev_rcu(net, dev) { in_dev = __in_dev_get_rcu(dev); @@ -1435,7 +1438,8 @@ static size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_ADDRESS */ + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ - + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ + + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + + nla_total_size(4); /* IFA_FLAGS */ } static inline u32 cstamp_delta(unsigned long cstamp) @@ -1503,6 +1507,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, preferred, valid)) goto nla_put_failure; @@ -1691,6 +1696,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_PROXY_NEIGH) + size += nla_total_size(4); return size; } @@ -1727,6 +1734,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_MC_FORWARDING, IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, + IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1764,6 +1775,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, + [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -1945,6 +1957,19 @@ static void inet_forward_change(struct net *net) } } +static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) +{ + if (cnf == net->ipv4.devconf_dflt) + return NETCONFA_IFINDEX_DEFAULT; + else if (cnf == net->ipv4.devconf_all) + return NETCONFA_IFINDEX_ALL; + else { + struct in_device *idev + = container_of(cnf, struct in_device, cnf); + return idev->dev->ifindex; + } +} + static int devinet_conf_proc(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1957,6 +1982,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, struct ipv4_devconf *cnf = ctl->extra1; struct net *net = ctl->extra2; int i = (int *)ctl->data - cnf->data; + int ifindex; set_bit(i, cnf->state); @@ -1966,23 +1992,19 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); + if (i == IPV4_DEVCONF_RP_FILTER - 1 && new_value != old_value) { - int ifindex; - - if (cnf == net->ipv4.devconf_dflt) - ifindex = NETCONFA_IFINDEX_DEFAULT; - else if (cnf == net->ipv4.devconf_all) - ifindex = NETCONFA_IFINDEX_ALL; - else { - struct in_device *idev = - container_of(cnf, struct in_device, - cnf); - ifindex = idev->dev->ifindex; - } + ifindex = devinet_conf_ifindex(net, cnf); inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, ifindex, cnf); } + if (i == IPV4_DEVCONF_PROXY_ARP - 1 && + new_value != old_value) { + ifindex = devinet_conf_ifindex(net, cnf); + inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + ifindex, cnf); + } } return ret; @@ -2160,7 +2182,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) static void devinet_sysctl_register(struct in_device *idev) { - neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); + neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, &idev->cnf); } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 388d113..1e4f660 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -33,8 +33,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); -int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int dflt); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 523be38..f2e1573 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -104,7 +104,10 @@ errout: static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; - struct net_device *dev = result->fi->fib_dev; + struct net_device *dev = NULL; + + if (result->fi) + dev = result->fi->fib_dev; /* do not accept result if the route does * not meet the required prefix length diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e63f47a..b53f0bf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -426,8 +426,9 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) return NULL; } -int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int dflt) +static int fib_detect_death(struct fib_info *fi, int order, + struct fib_info **last_resort, int *last_idx, + int dflt) { struct neighbour *n; int state = NUD_NONE; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e5d4361..2cd02f3 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -28,6 +28,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t enc_features; int ghl = GRE_HEADER_SECTION; struct gre_base_hdr *greh; + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; __be16 protocol = skb->protocol; int tnl_hlen; @@ -58,13 +59,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } else csum = false; + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + /* setup inner skb. */ skb->protocol = greh->protocol; skb->encapsulation = 0; - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - __skb_pull(skb, ghl); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); @@ -73,8 +74,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); goto out; + } skb = segs; tnl_hlen = skb_tnl_header_len(skb); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5c0e8bc..fb3c563 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -705,7 +705,9 @@ static void icmp_unreach(struct sk_buff *skb) case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: - if (ipv4_config.no_pmtu_disc) { + if (net->ipv4.sysctl_ip_no_pmtu_disc == 2) { + goto out; + } else if (net->ipv4.sysctl_ip_no_pmtu_disc) { LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); } else { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7defdc9..84c4329 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -310,7 +310,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) struct ip_sf_list *psf; int scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -463,7 +463,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, } first = 1; psf_prev = NULL; - for (psf=*psf_list; psf; psf=psf_next) { + for (psf = *psf_list; psf; psf = psf_next) { __be32 *psrc; psf_next = psf->sf_next; @@ -520,7 +520,7 @@ empty_source: return skb; if (pmc->crcount || isquery) { /* make sure we have room for group header */ - if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) { + if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)) { igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } @@ -576,7 +576,7 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf) struct ip_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf=*ppsf; psf; psf = psf_next) { + for (psf = *ppsf; psf; psf = psf_next) { psf_next = psf->sf_next; if (psf->sf_crcount == 0) { if (psf_prev) @@ -600,7 +600,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) /* deleted MCA's */ pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc_next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc_next) { pmc_next = pmc->next; if (pmc->sfmode == MCAST_INCLUDE) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -764,7 +764,7 @@ static void igmp_ifc_event(struct in_device *in_dev) static void igmp_timer_expire(unsigned long data) { - struct ip_mc_list *im=(struct ip_mc_list *)data; + struct ip_mc_list *im = (struct ip_mc_list *)data; struct in_device *in_dev = im->interface; spin_lock(&im->lock); @@ -794,10 +794,10 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) int i, scount; scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { /* skip inactive filters */ if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != @@ -825,10 +825,10 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) /* mark INCLUDE-mode sources */ scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) + for (i = 0; i < nsrcs; i++) if (srcs[i] == psf->sf_inaddr) { psf->sf_gsresp = 1; scount++; @@ -1103,7 +1103,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->tomb = im->tomb; pmc->sources = im->sources; im->tomb = im->sources = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = pmc->crcount; } spin_unlock_bh(&im->lock); @@ -1121,7 +1121,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc->next) { if (pmc->multiaddr == multiaddr) break; pmc_prev = pmc; @@ -1134,7 +1134,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) } spin_unlock_bh(&in_dev->mc_tomb_lock); if (pmc) { - for (psf=pmc->tomb; psf; psf=psf_next) { + for (psf = pmc->tomb; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -1167,7 +1167,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) psf = pmc->tomb; pmc->tomb = NULL; spin_unlock_bh(&pmc->lock); - for (; psf; psf=psf_next) { + for (; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -1557,7 +1557,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; @@ -1630,7 +1630,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; } err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); changerec |= rv > 0; @@ -1650,7 +1650,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); } else if (sf_setstate(pmc) || changerec) { @@ -1671,7 +1671,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, struct ip_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; @@ -1699,7 +1699,7 @@ static void sf_markstate(struct ip_mc_list *pmc) struct ip_sf_list *psf; int mca_xcount = pmc->sfcount[MCAST_EXCLUDE]; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) if (pmc->sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && @@ -1716,7 +1716,7 @@ static int sf_setstate(struct ip_mc_list *pmc) int new_in, rv; rv = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (pmc->sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -1726,7 +1726,7 @@ static int sf_setstate(struct ip_mc_list *pmc) if (!psf->sf_oldin) { struct ip_sf_list *prev = NULL; - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) { + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) { if (dpsf->sf_inaddr == psf->sf_inaddr) break; prev = dpsf; @@ -1748,7 +1748,7 @@ static int sf_setstate(struct ip_mc_list *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) if (dpsf->sf_inaddr == psf->sf_inaddr) break; if (!dpsf) { @@ -1800,7 +1800,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!delta) pmc->sfcount[sfmode]++; err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; @@ -1810,7 +1810,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!delta) pmc->sfcount[sfmode]--; - for (j=0; j<i; j++) + for (j = 0; j < i; j++) (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST @@ -1829,7 +1829,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); } else if (sf_setstate(pmc)) { @@ -1844,12 +1844,12 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) { struct ip_sf_list *psf, *nextpsf; - for (psf=pmc->tomb; psf; psf=nextpsf) { + for (psf = pmc->tomb; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } pmc->tomb = NULL; - for (psf=pmc->sources; psf; psf=nextpsf) { + for (psf = pmc->sources; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } @@ -2043,7 +2043,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!psl) goto done; /* err = -EADDRNOTAVAIL */ rv = !0; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, sizeof(__be32)); if (rv == 0) @@ -2062,7 +2062,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); - for (j=i+1; j<psl->sl_count; j++) + for (j = i+1; j < psl->sl_count; j++) psl->sl_addr[j-1] = psl->sl_addr[j]; psl->sl_count--; err = 0; @@ -2088,7 +2088,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct newpsl->sl_max = count; newpsl->sl_count = count - IP_SFBLOCK; if (psl) { - for (i=0; i<psl->sl_count; i++) + for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); @@ -2098,7 +2098,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, sizeof(__be32)); if (rv == 0) @@ -2106,7 +2106,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } if (rv == 0) /* address already there is an error */ goto done; - for (j=psl->sl_count-1; j>=i; j--) + for (j = psl->sl_count-1; j >= i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = mreqs->imr_sourceaddr; psl->sl_count++; @@ -2305,7 +2305,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { return -EFAULT; } - for (i=0; i<copycount; i++) { + for (i = 0; i < copycount; i++) { struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; @@ -2350,7 +2350,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) if (!psl) goto unlock; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } @@ -2423,7 +2423,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u rv = 1; } else if (im) { if (src_addr) { - for (psf=im->sources; psf; psf=psf->sf_next) { + for (psf = im->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == src_addr) break; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 56a964a..a0f52da 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; @@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_family = tw->tw_family; r->idiag_retrans = 0; + r->id.idiag_if = tw->tw_bound_dev_if; sock_diag_save_cookie(tw, r->id.idiag_cookie); + r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = tw->tw_rcv_saddr; r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = jiffies_to_msecs(tmo); @@ -726,8 +737,13 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = ireq->ir_rmt_port; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = ireq->ir_loc_addr; r->id.idiag_dst[0] = ireq->ir_rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 1975f52..f17ea49 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -230,29 +230,6 @@ static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, lro_desc->last_skb = skb; } -static void lro_add_frags(struct net_lro_desc *lro_desc, - int len, int hlen, int truesize, - struct skb_frag_struct *skb_frags, - struct iphdr *iph, struct tcphdr *tcph) -{ - struct sk_buff *skb = lro_desc->parent; - int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); - - lro_add_common(lro_desc, iph, tcph, tcp_data_len); - - skb->truesize += truesize; - - skb_frags[0].page_offset += hlen; - skb_frag_size_sub(&skb_frags[0], hlen); - - while (tcp_data_len > 0) { - *(lro_desc->next_frag) = *skb_frags; - tcp_data_len -= skb_frag_size(skb_frags); - lro_desc->next_frag++; - skb_frags++; - skb_shinfo(skb)->nr_frags++; - } -} static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, struct iphdr *iph, @@ -371,128 +348,6 @@ out: return 1; } - -static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, - void *mac_hdr, - int hlen, __wsum sum, - u32 ip_summed) -{ - struct sk_buff *skb; - struct skb_frag_struct *skb_frags; - int data_len = len; - int hdr_len = min(len, hlen); - - skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad); - if (!skb) - return NULL; - - skb_reserve(skb, lro_mgr->frag_align_pad); - skb->len = len; - skb->data_len = len - hdr_len; - skb->truesize += true_size; - skb->tail += hdr_len; - - memcpy(skb->data, mac_hdr, hdr_len); - - skb_frags = skb_shinfo(skb)->frags; - while (data_len > 0) { - *skb_frags = *frags; - data_len -= skb_frag_size(frags); - skb_frags++; - frags++; - skb_shinfo(skb)->nr_frags++; - } - - skb_shinfo(skb)->frags[0].page_offset += hdr_len; - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len); - - skb->ip_summed = ip_summed; - skb->csum = sum; - skb->protocol = eth_type_trans(skb, lro_mgr->dev); - return skb; -} - -static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, - void *priv, __wsum sum) -{ - struct net_lro_desc *lro_desc; - struct iphdr *iph; - struct tcphdr *tcph; - struct sk_buff *skb; - u64 flags; - void *mac_hdr; - int mac_hdr_len; - int hdr_len = LRO_MAX_PG_HLEN; - int vlan_hdr_len = 0; - - if (!lro_mgr->get_frag_header || - lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, - (void *)&tcph, &flags, priv)) { - mac_hdr = skb_frag_address(frags); - goto out1; - } - - if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) - goto out1; - - hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); - mac_hdr_len = (int)((void *)(iph) - mac_hdr); - - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); - if (!lro_desc) - goto out1; - - if (!lro_desc->active) { /* start new lro session */ - if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) - goto out1; - - skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, - hdr_len, 0, lro_mgr->ip_summed_aggr); - if (!skb) - goto out; - - if ((skb->protocol == htons(ETH_P_8021Q)) && - !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) - vlan_hdr_len = VLAN_HLEN; - - iph = (void *)(skb->data + vlan_hdr_len); - tcph = (void *)((u8 *)skb->data + vlan_hdr_len - + IP_HDR_LEN(iph)); - - lro_init_desc(lro_desc, skb, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - return NULL; - } - - if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) - goto out2; - - if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) - goto out2; - - lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - - if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || - lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) - lro_flush(lro_mgr, lro_desc); - - return NULL; - -out2: /* send aggregated packets to the stack */ - lro_flush(lro_mgr, lro_desc); - -out1: /* Original packet has to be posted to the stack */ - skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, - hdr_len, sum, lro_mgr->ip_summed); -out: - return skb; -} - void lro_receive_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, void *priv) @@ -506,23 +361,6 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, } EXPORT_SYMBOL(lro_receive_skb); -void lro_receive_frags(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, void *priv, __wsum sum) -{ - struct sk_buff *skb; - - skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum); - if (!skb) - return; - - if (lro_mgr->features & LRO_F_NAPI) - netif_receive_skb(skb); - else - netif_rx(skb); -} -EXPORT_SYMBOL(lro_receive_frags); - void lro_flush_all(struct net_lro_mgr *lro_mgr) { int i; @@ -534,14 +372,3 @@ void lro_flush_all(struct net_lro_mgr *lro_mgr) } } EXPORT_SYMBOL(lro_flush_all); - -void lro_flush_pkt(struct net_lro_mgr *lro_mgr, - struct iphdr *iph, struct tcphdr *tcph) -{ - struct net_lro_desc *lro_desc; - - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); - if (lro_desc->active) - lro_flush(lro_mgr, lro_desc); -} -EXPORT_SYMBOL(lro_flush_pkt); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 33d5537..48f4244 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -109,13 +109,6 @@ static inline void flush_check(struct inet_peer_base *base, int family) } } -void inetpeer_invalidate_family(int family) -{ - atomic_t *fp = inetpeer_seq_ptr(family); - - atomic_inc(fp); -} - #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -227,7 +220,7 @@ static int addr_compare(const struct inetpeer_addr *a, stackptr = _stack; \ *stackptr++ = &_base->root; \ for (u = rcu_deref_locked(_base->root, _base); \ - u != peer_avl_empty; ) { \ + u != peer_avl_empty;) { \ int cmp = addr_compare(_daddr, &u->daddr); \ if (cmp == 0) \ break; \ @@ -282,7 +275,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ for (u = rcu_deref_locked(*v, base); \ - u->avl_right != peer_avl_empty_rcu; ) { \ + u->avl_right != peer_avl_empty_rcu;) { \ v = &u->avl_right; \ *stackptr++ = v; \ u = rcu_deref_locked(*v, base); \ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 2481993..c10a3ce 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -704,7 +704,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(skb, user)) return NULL; - skb->rxhash = 0; + skb_clear_hash(skb); } } return skb; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d7aea4c..e560ef3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -217,6 +217,7 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) iph->saddr, iph->daddr, tpi->key); if (tunnel) { + skb_pop_mac_header(skb); ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); return PACKET_RCVD; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ec72645..f4ab72e 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -167,7 +167,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) soffset -= 4; if (soffset > 3) { memcpy(&faddr, &start[soffset-1], 4); - for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) + for (soffset -= 4, doffset = 4; soffset > 3; soffset -= 4, doffset += 4) memcpy(&dptr[doffset-1], &start[soffset-1], 4); /* * RFC1812 requires to fix illegal source routes. @@ -227,7 +227,7 @@ void ip_options_fragment(struct sk_buff *skb) continue; } optlen = optptr[1]; - if (optlen<2 || optlen>l) + if (optlen < 2 || optlen > l) return; if (!IPOPT_COPIED(*optptr)) memset(optptr, IPOPT_NOOP, optlen); @@ -275,27 +275,27 @@ int ip_options_compile(struct net *net, for (l = opt->optlen; l > 0; ) { switch (*optptr) { - case IPOPT_END: - for (optptr++, l--; l>0; optptr++, l--) { + case IPOPT_END: + for (optptr++, l--; l > 0; optptr++, l--) { if (*optptr != IPOPT_END) { *optptr = IPOPT_END; opt->is_changed = 1; } } goto eol; - case IPOPT_NOOP: + case IPOPT_NOOP: l--; optptr++; continue; } optlen = optptr[1]; - if (optlen<2 || optlen>l) { + if (optlen < 2 || optlen > l) { pp_ptr = optptr; goto error; } switch (*optptr) { - case IPOPT_SSRR: - case IPOPT_LSRR: + case IPOPT_SSRR: + case IPOPT_LSRR: if (optlen < 3) { pp_ptr = optptr + 1; goto error; @@ -321,7 +321,7 @@ int ip_options_compile(struct net *net, opt->is_strictroute = (optptr[0] == IPOPT_SSRR); opt->srr = optptr - iph; break; - case IPOPT_RR: + case IPOPT_RR: if (opt->rr) { pp_ptr = optptr; goto error; @@ -349,7 +349,7 @@ int ip_options_compile(struct net *net, } opt->rr = optptr - iph; break; - case IPOPT_TIMESTAMP: + case IPOPT_TIMESTAMP: if (opt->ts) { pp_ptr = optptr; goto error; @@ -369,13 +369,13 @@ int ip_options_compile(struct net *net, goto error; } switch (optptr[3]&0xF) { - case IPOPT_TS_TSONLY: + case IPOPT_TS_TSONLY: if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; - case IPOPT_TS_TSANDADDR: + case IPOPT_TS_TSANDADDR: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -389,7 +389,7 @@ int ip_options_compile(struct net *net, opt->ts_needtime = 1; optptr[2] += 8; break; - case IPOPT_TS_PRESPEC: + case IPOPT_TS_PRESPEC: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -405,7 +405,7 @@ int ip_options_compile(struct net *net, opt->ts_needtime = 1; optptr[2] += 8; break; - default: + default: if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { pp_ptr = optptr + 3; goto error; @@ -433,7 +433,7 @@ int ip_options_compile(struct net *net, } opt->ts = optptr - iph; break; - case IPOPT_RA: + case IPOPT_RA: if (optlen < 4) { pp_ptr = optptr + 1; goto error; @@ -441,7 +441,7 @@ int ip_options_compile(struct net *net, if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; - case IPOPT_CIPSO: + case IPOPT_CIPSO: if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) { pp_ptr = optptr; goto error; @@ -452,9 +452,9 @@ int ip_options_compile(struct net *net, goto error; } break; - case IPOPT_SEC: - case IPOPT_SID: - default: + case IPOPT_SEC: + case IPOPT_SID: + default: if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { pp_ptr = optptr; goto error; @@ -572,7 +572,7 @@ void ip_forward_options(struct sk_buff *skb) optptr = raw + opt->srr; - for ( srrptr=optptr[2], srrspace = optptr[1]; + for ( srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 ) { @@ -628,7 +628,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) if (rt->rt_type != RTN_LOCAL) return -EINVAL; - for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { + for (srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { if (srrptr + 3 > srrspace) { icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); return -EINVAL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9124027..df18461 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -828,7 +828,7 @@ static int __ip_append_data(struct sock *sk, if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, - mtu-exthdrlen); + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } @@ -1151,7 +1151,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, mtu : 0xFFFF; if (cork->length + size > maxnonfragsize - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); + ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f85826..a9fc435 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -1050,7 +1051,7 @@ e_inval: * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. - * This way, receiver doesnt make cache line misses to read rtable. + * This way, receiver doesn't make cache line misses to read rtable. */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 90ff957..07a5ed3 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -68,6 +68,63 @@ static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, IP_TNL_HASH_BITS); } +static inline void __tunnel_dst_set(struct ip_tunnel_dst *idst, + struct dst_entry *dst) +{ + struct dst_entry *old_dst; + + if (dst && (dst->flags & DST_NOCACHE)) + dst = NULL; + + spin_lock_bh(&idst->lock); + old_dst = rcu_dereference(idst->dst); + rcu_assign_pointer(idst->dst, dst); + dst_release(old_dst); + spin_unlock_bh(&idst->lock); +} + +static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) +{ + __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); +} + +static inline void tunnel_dst_reset(struct ip_tunnel *t) +{ + tunnel_dst_set(t, NULL); +} + +static void tunnel_dst_reset_all(struct ip_tunnel *t) +{ + int i; + + for_each_possible_cpu(i) + __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); +} + +static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) +{ + struct dst_entry *dst; + + rcu_read_lock(); + dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); + if (dst) + dst_hold(dst); + rcu_read_unlock(); + return dst; +} + +struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) +{ + struct dst_entry *dst = tunnel_dst_get(t); + + if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + tunnel_dst_reset(t); + return NULL; + } + + return dst; +} + /* Often modified stats are per cpu, other are shared (netdev->stats) */ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) @@ -75,7 +132,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, int i; for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); u64 rx_packets, rx_bytes, tx_packets, tx_bytes; unsigned int start; @@ -318,11 +376,10 @@ failed: return ERR_PTR(err); } -static inline struct rtable *ip_route_output_tunnel(struct net *net, - struct flowi4 *fl4, - int proto, - __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif) +static inline void init_tunnel_flow(struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; @@ -331,7 +388,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net, fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; - return ip_route_output_key(net, fl4); } static int ip_tunnel_bind_dev(struct net_device *dev) @@ -350,14 +406,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_tunnel(tunnel->net, &fl4, - tunnel->parms.iph.protocol, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); + init_tunnel_flow(&fl4, iph->protocol, iph->daddr, + iph->saddr, tunnel->parms.o_key, + RT_TOS(iph->tos), tunnel->parms.link); + rt = ip_route_output_key(tunnel->net, &fl4); + if (!IS_ERR(rt)) { tdev = rt->dst.dev; + tunnel_dst_set(tunnel, dst_clone(&rt->dst)); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -405,7 +461,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, bool log_ecn_error) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; const struct iphdr *iph = ip_hdr(skb); int err; @@ -528,10 +584,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4; u8 tos, ttl; __be16 df; - struct rtable *rt; /* Route to the other host */ + struct rtable *rt = NULL; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; + bool connected = true; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -581,27 +638,39 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, #endif else goto tx_error; + + connected = false; } tos = tnl_params->tos; if (tos & 0x1) { tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { tos = inner_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + connected = false; + } else if (skb->protocol == htons(ETH_P_IPV6)) { tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + connected = false; + } } - rt = ip_route_output_tunnel(tunnel->net, &fl4, - protocol, - dst, tnl_params->saddr, - tunnel->parms.o_key, - RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); + + if (connected) + rt = (struct rtable *)tunnel_dst_check(tunnel, 0); + + if (!rt) { + rt = ip_route_output_key(tunnel->net, &fl4); + + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (connected) + tunnel_dst_set(tunnel, dst_clone(&rt->dst)); } + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; @@ -696,6 +765,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (set_mtu) dev->mtu = mtu; } + tunnel_dst_reset_all(t); netdev_state_change(dev); } @@ -811,6 +881,7 @@ static void ip_tunnel_dev_free(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); gro_cells_destroy(&tunnel->gro_cells); + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -979,18 +1050,31 @@ int ip_tunnel_init(struct net_device *dev) int i, err; dev->destructor = ip_tunnel_dev_free; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipt_stats; + struct pcpu_sw_netstats *ipt_stats; ipt_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipt_stats->syncp); } + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + + for_each_possible_cpu(i) { + struct ip_tunnel_dst *idst = per_cpu_ptr(tunnel->dst_cache, i); + idst-> dst = NULL; + spin_lock_init(&idst->lock); + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); return err; } @@ -1015,6 +1099,8 @@ void ip_tunnel_uninit(struct net_device *dev) /* fb_tunnel_dev will be unregisted in net-exit call. */ if (itn->fb_tunnel_dev != dev) ip_tunnel_del(netdev_priv(dev)); + + tunnel_dst_reset_all(tunnel); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 42ffbc8..6156f4e 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -56,7 +56,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, skb_scrub_packet(skb, xnet); - skb->rxhash = 0; + skb_clear_hash(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -107,8 +107,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) nf_reset(skb); secpath_reset(skb); - if (!skb->l4_rxhash) - skb->rxhash = 0; + skb_clear_hash_if_not_l4(skb); skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 52b802a..0783200 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -60,7 +60,7 @@ static int vti_rcv(struct sk_buff *skb) tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; u32 oldmark = skb->mark; int ret; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 62212c7..421a249 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -425,6 +425,7 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) goto failure; ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; if (dev_open(dev)) @@ -517,6 +518,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) } ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; rcu_read_unlock(); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 40d5607..81c6910 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT config NF_TABLES_IPV4 depends on NF_TABLES tristate "IPv4 nf_tables support" - -config NFT_REJECT_IPV4 - depends on NF_TABLES_IPV4 - tristate "nf_tables IPv4 reject support" + help + This option enables the IPv4 support for nf_tables. config NFT_CHAIN_ROUTE_IPV4 depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables route chain support" + help + This option enables the "route" chain for IPv4 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, type of service and + the packet mark. config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 depends on NF_NAT_IPV4 && NFT_NAT tristate "IPv4 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv4 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config NF_TABLES_ARP depends on NF_TABLES tristate "ARP nf_tables support" + help + This option enables the ARP support for nf_tables. config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19df72b..c16be9d 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o -obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b969131..5b6e0df 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -17,10 +17,6 @@ #include <linux/udp.h> #include <linux/icmp.h> #include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> -#include <net/route.h> -#include <net/dst.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> @@ -28,128 +24,12 @@ #include <linux/netfilter_bridge.h> #endif +#include <net/netfilter/ipv4/nf_reject.h> + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); -/* Send RST reply */ -static void send_reset(struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; - const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; - - /* IP header checks: fragment. */ - if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; - - oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); - if (oth == NULL) - return; - - /* No RST for RST. */ - if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; - - /* Check checksum */ - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); - - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; - - skb_reserve(nskb, LL_MAX_HEADER); - - skb_reset_network_header(nskb); - niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); - niph->version = 4; - niph->ihl = sizeof(struct iphdr) / 4; - niph->tos = 0; - niph->id = 0; - niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; - niph->check = 0; - niph->saddr = oiph->daddr; - niph->daddr = oiph->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - memset(tcph, 0, sizeof(*tcph)); - tcph->source = oth->dest; - tcph->dest = oth->source; - tcph->doff = sizeof(struct tcphdr) / 4; - - if (oth->ack) - tcph->seq = oth->ack_seq; - else { - tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + - oldskb->len - ip_hdrlen(oldskb) - - (oth->doff << 2)); - tcph->ack = 1; - } - - tcph->rst = 1; - tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, - niph->daddr, 0); - nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (unsigned char *)tcph - nskb->head; - nskb->csum_offset = offsetof(struct tcphdr, check); - - /* ip_route_me_harder expects skb->dst to be set */ - skb_dst_set_noref(nskb, skb_dst(oldskb)); - - nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, RTN_UNSPEC)) - goto free_nskb; - - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - - /* "Never happens" */ - if (nskb->len > dst_mtu(skb_dst(nskb))) - goto free_nskb; - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - niph->tot_len = htons(nskb->len); - ip_send_check(niph); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - goto free_nskb; - dev_queue_xmit(nskb); - } else -#endif - ip_local_out(nskb); - - return; - - free_nskb: - kfree_skb(nskb); -} - -static inline void send_unreach(struct sk_buff *skb_in, int code) -{ - icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); -} - static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(skb, ICMP_NET_UNREACH); + nf_send_unreach(skb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(skb, ICMP_HOST_UNREACH); + nf_send_unreach(skb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(skb, ICMP_PROT_UNREACH); + nf_send_unreach(skb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(skb, ICMP_PORT_UNREACH); + nf_send_unreach(skb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(skb, ICMP_NET_ANO); + nf_send_unreach(skb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(skb, ICMP_HOST_ANO); + nf_send_unreach(skb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(skb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, par->hooknum); + nf_send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index f13bd91..a313c3f 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -423,6 +423,7 @@ static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) static struct xt_target synproxy_tg4_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV4, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg4, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg4_check, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ecd8bec..8127dc8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); - -void need_ipv4_conntrack(void) -{ - return; -} -EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5f011cc..d551e31 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -34,8 +34,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Author: James Morris <jmorris@intercode.com.au> * @@ -462,14 +461,14 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, } if (subid < 40) { - optr [0] = 0; - optr [1] = subid; + optr[0] = 0; + optr[1] = subid; } else if (subid < 80) { - optr [0] = 1; - optr [1] = subid - 40; + optr[0] = 1; + optr[1] = subid - 40; } else { - optr [0] = 2; - optr [1] = subid - 80; + optr[0] = 2; + optr[1] = subid - 80; } *len = 2; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 876c6ca..cae5262 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -53,8 +53,12 @@ #include <net/transp_v6.h> #endif +struct ping_table { + struct hlist_nulls_head hash[PING_HTABLE_SIZE]; + rwlock_t lock; +}; -struct ping_table ping_table; +static struct ping_table ping_table; struct pingv6_ops pingv6_ops; EXPORT_SYMBOL_GPL(pingv6_ops); @@ -668,8 +672,8 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, } EXPORT_SYMBOL_GPL(ping_common_sendmsg); -int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; @@ -772,7 +776,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } @@ -841,10 +845,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len); + return pingv6_ops.ipv6_recv_error(sk, msg, len, + addr_len); #endif } } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4a03358..a6c8a80 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -279,6 +279,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), + SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), SNMP_MIB_SENTINEL }; @@ -332,22 +333,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_printf(seq, " OutMsgs OutErrors"); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ce84846..46d6a1c 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -31,10 +31,6 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; -/* - * Add a protocol handler to the hash tables - */ - int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { if (!prot->netns_ok) { @@ -55,10 +51,6 @@ int inet_add_offload(const struct net_offload *prot, unsigned char protocol) } EXPORT_SYMBOL(inet_add_offload); -/* - * Remove a protocol from the hash tables. - */ - int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int ret; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5cb8ddb..81e6cfd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -575,7 +575,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | + inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); @@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b95331e..f2ed13c 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -121,7 +121,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ - diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS); + diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3d69ec8..1d2480a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -286,13 +286,6 @@ static struct ctl_table ipv4_table[] = { .extra2 = &ip_ttl_max, }, { - .procname = "ip_no_pmtu_disc", - .data = &ipv4_config.no_pmtu_disc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "ip_nonlocal_bind", .data = &sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), @@ -707,7 +700,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { + { .procname = "tcp_thin_dupack", .data = &sysctl_tcp_thin_dupack, .maxlen = sizeof(int), @@ -733,6 +726,15 @@ static struct ctl_table ipv4_table[] = { .extra2 = &gso_max_segs, }, { + .procname = "tcp_autocorking", + .data = &sysctl_tcp_autocorking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), @@ -822,6 +824,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_local_port_range, }, + { + .procname = "ip_no_pmtu_disc", + .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c4638e6..e2a4051 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; int sysctl_tcp_min_tso_segs __read_mostly = 2; +int sysctl_tcp_autocorking __read_mostly = 1; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -379,7 +381,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - skb_queue_head_init(&tp->out_of_order_queue); + __skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); @@ -619,19 +621,58 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) tp->snd_up = tp->write_seq; } -static inline void tcp_push(struct sock *sk, int flags, int mss_now, - int nonagle) +/* If a not yet filled skb is pushed, do not send it if + * we have data packets in Qdisc or NIC queues : + * Because TX completion will happen shortly, it gives a chance + * to coalesce future sendmsg() payload into this skb, without + * need for a timer, and with no latency trade off. + * As packets containing data payload have a bigger truesize + * than pure acks (dataless) packets, the last checks prevent + * autocorking if we only have an ACK in Qdisc/NIC queues, + * or if TX completion was delayed after we processed ACK packet. + */ +static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, + int size_goal) { - if (tcp_send_head(sk)) { - struct tcp_sock *tp = tcp_sk(sk); + return skb->len < size_goal && + sysctl_tcp_autocorking && + skb != tcp_write_queue_head(sk) && + atomic_read(&sk->sk_wmem_alloc) > skb->truesize; +} + +static void tcp_push(struct sock *sk, int flags, int mss_now, + int nonagle, int size_goal) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; - if (!(flags & MSG_MORE) || forced_push(tp)) - tcp_mark_push(tp, tcp_write_queue_tail(sk)); + if (!tcp_send_head(sk)) + return; + + skb = tcp_write_queue_tail(sk); + if (!(flags & MSG_MORE) || forced_push(tp)) + tcp_mark_push(tp, skb); + + tcp_mark_urg(tp, flags); + + if (tcp_should_autocork(sk, skb, size_goal)) { - tcp_mark_urg(tp, flags); - __tcp_push_pending_frames(sk, mss_now, - (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); + /* avoid atomic op if TSQ_THROTTLED bit is already set */ + if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); + set_bit(TSQ_THROTTLED, &tp->tsq_flags); + } + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED. + */ + if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) + return; } + + if (flags & MSG_MORE) + nonagle = TCP_NAGLE_CORK; + + __tcp_push_pending_frames(sk, mss_now, nonagle); } static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, @@ -934,7 +975,8 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -944,7 +986,7 @@ wait_for_memory: out: if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) - tcp_push(sk, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); return copied; do_error: @@ -1225,7 +1267,8 @@ wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -1236,7 +1279,7 @@ wait_for_memory: out: if (copied) - tcp_push(sk, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); release_sock(sk); return copied + copied_syn; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c53b7f3..65cf90e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -766,7 +766,7 @@ static void tcp_update_pacing_rate(struct sock *sk) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -void tcp_set_rto(struct sock *sk) +static void tcp_set_rto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) @@ -3686,7 +3686,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int opcode = *ptr++; int opsize; - switch(opcode) { + switch (opcode) { case TCPOPT_EOL: return NULL; case TCPOPT_NOP: @@ -4046,7 +4046,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) WARN_ON(before(tp->rcv_nxt, sp->end_seq)); /* Zap this SACK, by moving forward any other SACKS. */ - for (i=this_sack+1; i < num_sacks; i++) + for (i = this_sack+1; i < num_sacks; i++) tp->selective_acks[i-1] = tp->selective_acks[i]; num_sacks--; continue; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59a6f8b..7297b56 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -173,11 +173,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - orig_sport, orig_dport, sk, true); + orig_sport, orig_dport, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } @@ -827,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; int err = -1; - struct sk_buff * skb; + struct sk_buff *skb; /* First, grab a route. */ if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 03e9154..f7e522c 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,13 +6,6 @@ #include <linux/memcontrol.h> #include <linux/module.h> -static void memcg_tcp_enter_memory_pressure(struct sock *sk) -{ - if (sk->sk_cgrp->memory_pressure) - sk->sk_cgrp->memory_pressure = 1; -} -EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* @@ -60,7 +53,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { struct cg_proto *cg_proto; - u64 old_lim; int i; int ret; @@ -71,7 +63,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 97b6841..3aa9e32 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -425,7 +425,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); - skb_queue_head_init(&newtp->out_of_order_queue); + __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a2b68a1..2658a27 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -240,7 +240,7 @@ int tcp_gro_complete(struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); - skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_start = (unsigned char *)th - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; @@ -272,45 +272,45 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { + /* Use the IP hdr immediately proceeding for this transport */ const struct iphdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + + wsum = skb->csum; switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + 0); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } +skip_csum: return tcp_gro_receive(head, skb); } -static int tcp4_gro_complete(struct sk_buff *skb) +static int tcp4_gro_complete(struct sk_buff *skb, int thoff) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), - iph->saddr, iph->daddr, 0); + th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, + iph->daddr, 0); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; return tcp_gro_complete(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7820f3a..03d26b8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -363,15 +363,17 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, */ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->gso_segs = 1; - skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; + shinfo->gso_segs = 1; + shinfo->gso_size = 0; + shinfo->gso_type = 0; TCP_SKB_CB(skb)->seq = seq; if (flags & (TCPHDR_SYN | TCPHDR_FIN)) @@ -406,7 +408,7 @@ struct tcp_out_options { * Beware: Something in the Internet is very sensitive to the ordering of * TCP options, we learned this through the hard way, so be careful here. * Luckily we can at least blame others for their non-compliance but from - * inter-operatibility perspective it seems that we're somewhat stuck with + * inter-operability perspective it seems that we're somewhat stuck with * the ordering which we have been using if we want to keep working with * those broken things (not that it currently hurts anybody as there isn't * particular reason why the ordering would need to be changed). @@ -679,7 +681,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb * * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb * needs to be reallocated in a driver. - * The invariant being skb->truesize substracted from sk->sk_wmem_alloc + * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc * * Since transmit from skb destructor is forbidden, we use a tasklet * to process all sockets that eventually need to send more skbs. @@ -699,9 +701,9 @@ static void tcp_tsq_handler(struct sock *sk) tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); } /* - * One tasklest per cpu tries to send more skbs. + * One tasklet per cpu tries to send more skbs. * We run in tasklet context but need to disable irqs when - * transfering tsq->head because tcp_wfree() might + * transferring tsq->head because tcp_wfree() might * interrupt us (non NAPI drivers) */ static void tcp_tasklet_func(unsigned long data) @@ -795,7 +797,7 @@ void __init tcp_tasklet_init(void) /* * Write buffer destructor automatically called from kfree_skb. - * We cant xmit new skbs from this context, as we might already + * We can't xmit new skbs from this context, as we might already * hold qdisc lock. */ void tcp_wfree(struct sk_buff *skb) @@ -986,6 +988,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Make sure we own this skb before messing gso_size/gso_segs */ WARN_ON_ONCE(skb_cloned(skb)); @@ -993,13 +997,13 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, /* Avoid the costly divide in the normal * non-TSO case. */ - skb_shinfo(skb)->gso_segs = 1; - skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; + shinfo->gso_segs = 1; + shinfo->gso_size = 0; + shinfo->gso_type = 0; } else { - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); - skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type = sk->sk_gso_type; + shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now); + shinfo->gso_size = mss_now; + shinfo->gso_type = sk->sk_gso_type; } } @@ -1146,6 +1150,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, */ static void __pskb_trim_head(struct sk_buff *skb, int len) { + struct skb_shared_info *shinfo; int i, k, eat; eat = min_t(int, len, skb_headlen(skb)); @@ -1157,23 +1162,24 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) } eat = len; k = 0; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + shinfo = skb_shinfo(skb); + for (i = 0; i < shinfo->nr_frags; i++) { + int size = skb_frag_size(&shinfo->frags[i]); if (size <= eat) { skb_frag_unref(skb, i); eat -= size; } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + shinfo->frags[k] = shinfo->frags[i]; if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + shinfo->frags[k].page_offset += eat; + skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } k++; } } - skb_shinfo(skb)->nr_frags = k; + shinfo->nr_frags = k; skb_reset_tail_pointer(skb); skb->data_len -= len; @@ -1378,23 +1384,51 @@ static void tcp_cwnd_validate(struct sock *sk) } } -/* Returns the portion of skb which can be sent right away without - * introducing MSS oddities to segment boundaries. In rare cases where - * mss_now != mss_cache, we will request caller to create a small skb - * per input skb which could be mostly avoided here (if desired). - * - * We explicitly want to create a request for splitting write queue tail - * to a small skb for Nagle purposes while avoiding unnecessary modulos, - * thus all the complexity (cwnd_len is always MSS multiple which we - * return whenever allowed by the other factors). Basically we need the - * modulo only when the receiver window alone is the limiting factor or - * when we would be allowed to send the split-due-to-Nagle skb fully. +/* Minshall's variant of the Nagle send check. */ +static bool tcp_minshall_check(const struct tcp_sock *tp) +{ + return after(tp->snd_sml, tp->snd_una) && + !after(tp->snd_sml, tp->snd_nxt); +} + +/* Update snd_sml if this skb is under mss + * Note that a TSO packet might end with a sub-mss segment + * The test is really : + * if ((skb->len % mss) != 0) + * tp->snd_sml = TCP_SKB_CB(skb)->end_seq; + * But we can avoid doing the divide again given we already have + * skb_pcount = skb->len / mss_now + */ +static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, + const struct sk_buff *skb) +{ + if (skb->len < tcp_skb_pcount(skb) * mss_now) + tp->snd_sml = TCP_SKB_CB(skb)->end_seq; +} + +/* Return false, if packet can be sent now without violation Nagle's rules: + * 1. It is full sized. (provided by caller in %partial bool) + * 2. Or it contains FIN. (already checked by caller) + * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. + * 4. Or TCP_CORK is not set, and all sent packets are ACKed. + * With Minshall's modification: all sent small packets are ACKed. */ -static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, - unsigned int mss_now, unsigned int max_segs) +static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, + unsigned int mss_now, int nonagle) +{ + return partial && + ((nonagle & TCP_NAGLE_CORK) || + (!nonagle && tp->packets_out && tcp_minshall_check(tp))); +} +/* Returns the portion of skb which can be sent right away */ +static unsigned int tcp_mss_split_point(const struct sock *sk, + const struct sk_buff *skb, + unsigned int mss_now, + unsigned int max_segs, + int nonagle) { const struct tcp_sock *tp = tcp_sk(sk); - u32 needed, window, max_len; + u32 partial, needed, window, max_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; max_len = mss_now * max_segs; @@ -1407,7 +1441,15 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b if (max_len <= needed) return max_len; - return needed - needed % mss_now; + partial = needed % mss_now; + /* If last segment is not a full MSS, check if Nagle rules allow us + * to include this last segment in this skb. + * Otherwise, we'll split the skb at last MSS boundary + */ + if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle)) + return needed - partial; + + return needed; } /* Can at least one segment of SKB be sent right now, according to the @@ -1447,28 +1489,6 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, return tso_segs; } -/* Minshall's variant of the Nagle send check. */ -static inline bool tcp_minshall_check(const struct tcp_sock *tp) -{ - return after(tp->snd_sml, tp->snd_una) && - !after(tp->snd_sml, tp->snd_nxt); -} - -/* Return false, if packet can be sent now without violation Nagle's rules: - * 1. It is full sized. - * 2. Or it contains FIN. (already checked by caller) - * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. - * 4. Or TCP_CORK is not set, and all sent packets are ACKed. - * With Minshall's modification: all sent small packets are ACKed. - */ -static inline bool tcp_nagle_check(const struct tcp_sock *tp, - const struct sk_buff *skb, - unsigned int mss_now, int nonagle) -{ - return skb->len < mss_now && - ((nonagle & TCP_NAGLE_CORK) || - (!nonagle && tp->packets_out && tcp_minshall_check(tp))); -} /* Return true if the Nagle test allows this packet to be * sent now. @@ -1489,7 +1509,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; - if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) + if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle)) return true; return false; @@ -1892,7 +1912,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, - sk->sk_gso_max_segs)); + sk->sk_gso_max_segs), + nonagle); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) @@ -2756,7 +2777,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ -void tcp_connect_init(struct sock *sk) +static void tcp_connect_init(struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 8b97d71e..1f2d376 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -38,7 +38,7 @@ MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.1"); -static int port __read_mostly = 0; +static int port __read_mostly; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); @@ -46,7 +46,7 @@ static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); module_param(bufsize, uint, 0); -static unsigned int fwmark __read_mostly = 0; +static unsigned int fwmark __read_mostly; MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); module_param(fwmark, uint, 0); diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a347a07..1a8d271 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -3,7 +3,7 @@ * YeAH TCP * * For further details look at: - * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf + * https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf * */ #include <linux/mm.h> @@ -15,13 +15,13 @@ #include "tcp_vegas.h" -#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck -#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt -#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss -#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion -#define TCP_YEAH_PHY 8 //lin maximum delta from base -#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss -#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count +#define TCP_YEAH_ALPHA 80 /* number of packets queued at the bottleneck */ +#define TCP_YEAH_GAMMA 1 /* fraction of queue to be removed per rtt */ +#define TCP_YEAH_DELTA 3 /* log minimum fraction of cwnd to be removed on loss */ +#define TCP_YEAH_EPSILON 1 /* log maximum fraction to be removed on early decongestion */ +#define TCP_YEAH_PHY 8 /* maximum delta from base */ +#define TCP_YEAH_RHO 16 /* minimum number of consecutive rtt to consider competition on loss */ +#define TCP_YEAH_ZETA 50 /* minimum number of state switches to reset reno_count */ #define TCP_SCALABLE_AI_CNT 100U @@ -214,9 +214,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { if (yeah->doing_reno_now < TCP_YEAH_RHO) { reduction = yeah->lastQ; - reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) ); + reduction = min(reduction, max(tp->snd_cwnd>>1, 2U)); - reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); + reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); } else reduction = max(tp->snd_cwnd>>1, 2U); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5944d7d..80f649f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -560,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { - struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) - return sk; - else - return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, @@ -990,7 +986,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl4 = &fl4_stack; flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, + inet_sk_flowi_flags(sk), faddr, saddr, dport, inet->inet_sport); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); @@ -999,7 +995,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } @@ -1098,6 +1094,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; @@ -1236,7 +1235,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), @@ -1600,12 +1599,16 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } -static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +/* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use xchg() to guard against concurrent changes. + */ +static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *old; dst_hold(dst); - sk->sk_rx_dst = dst; + old = xchg(&sk->sk_rx_dst, dst); + dst_release(old); } /* @@ -1736,15 +1739,16 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (skb->sk) { + sk = skb_steal_sock(skb); + if (sk) { + struct dst_entry *dst = skb_dst(skb); int ret; - sk = skb->sk; - if (unlikely(sk->sk_rx_dst == NULL)) - udp_sk_rx_dst_set(sk, skb); + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); ret = udp_queue_rcv_skb(sk, skb); - + sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ @@ -1910,17 +1914,20 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, void udp_v4_early_demux(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *uh = udp_hdr(skb); + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; - struct net *net = dev_net(skb->dev); int dif = skb->dev->ifindex; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, @@ -2471,6 +2478,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; @@ -2490,8 +2498,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); goto out; + } outer_hlen = skb_tnl_header_len(skb); skb = segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 83206de..79c62bd 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -41,6 +41,14 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; + int offset; + __wsum csum; + + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + segs = skb_udp_tunnel_segment(skb, features); + goto out; + } mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -63,27 +71,20 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } + segs = skb_segment(skb, features); out: return segs; } diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index e3db3f9..71acd00 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -48,7 +48,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); skb_set_network_header(skb, -x->props.header_len - - hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); if (x->sel.family != AF_INET6) skb->network_header += IPV4_BEET_PHMAXLEN; skb->mac_header = skb->network_header + diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 0b2a064..542074c 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -16,7 +16,7 @@ static int xfrm4_init_flags(struct xfrm_state *x) { - if (ipv4_config.no_pmtu_disc) + if (xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) x->props.flags |= XFRM_STATE_NOPMTUDISC; return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 12c97d8..31f75ea 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -442,6 +442,8 @@ static int inet6_netconf_msgsize_devconf(int type) if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); #endif + if (type == -1 || type == NETCONFA_PROXY_NEIGH) + size += nla_total_size(4); return size; } @@ -475,6 +477,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, devconf->mc_forwarding) < 0) goto nla_put_failure; #endif + if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -509,6 +515,7 @@ errout: static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, @@ -834,6 +841,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } + neigh_parms_data_state_setall(idev->nd_parms); + ifa->addr = *addr; if (peer_addr) ifa->peer_addr = *peer_addr; @@ -986,12 +995,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) * --yoshfuji */ if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { - struct in6_addr prefix; struct rt6_info *rt; - ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - - rt = addrconf_get_prefix_route(&prefix, + rt = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, 0, RTF_GATEWAY | RTF_DEFAULT); @@ -1024,7 +1030,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i u32 addr_flags; unsigned long now = jiffies; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (ift) { spin_lock_bh(&ift->lock); memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); @@ -1036,7 +1042,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i retry: in6_dev_hold(idev); if (idev->cnf.use_tempaddr <= 0) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); pr_info("%s: use_tempaddr is disabled\n", __func__); in6_dev_put(idev); ret = -1; @@ -1046,7 +1052,7 @@ retry: if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { idev->cnf.use_tempaddr = -1; /*XXX*/ spin_unlock_bh(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", __func__); in6_dev_put(idev); @@ -1071,8 +1077,8 @@ retry: regen_advance = idev->cnf.regen_max_retry * idev->cnf.dad_transmits * - idev->nd_parms->retrans_time / HZ; - write_unlock(&idev->lock); + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred * Lifetime is greater than REGEN_ADVANCE time units. In particular, @@ -1099,7 +1105,7 @@ retry: in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); tmpaddr = &addr; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); goto retry; } @@ -1407,7 +1413,7 @@ try_nextdev: EXPORT_SYMBOL(ipv6_dev_get_saddr); int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, - unsigned char banned_flags) + u32 banned_flags) { struct inet6_ifaddr *ifp; int err = -EADDRNOTAVAIL; @@ -1424,7 +1430,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, } int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, - unsigned char banned_flags) + u32 banned_flags) { struct inet6_dev *idev; int err = -EADDRNOTAVAIL; @@ -1671,7 +1677,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1682,7 +1688,7 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1888,7 +1894,8 @@ static void ipv6_regen_rndid(unsigned long data) expires = jiffies + idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - + idev->cnf.regen_max_retry * idev->cnf.dad_transmits * + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) - idev->cnf.max_desync_factor * HZ; if (time_before(expires, jiffies)) { pr_warn("%s: too short regeneration interval; timer disabled for %s\n", @@ -2016,6 +2023,73 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return idev; } +static void manage_tempaddrs(struct inet6_dev *idev, + struct inet6_ifaddr *ifp, + __u32 valid_lft, __u32 prefered_lft, + bool create, unsigned long now) +{ + u32 flags; + struct inet6_ifaddr *ift; + + read_lock_bh(&idev->lock); + /* update all temporary addresses in the list */ + list_for_each_entry(ift, &idev->tempaddr_list, tmp_list) { + int age, max_valid, max_prefered; + + if (ifp != ift->ifpub) + continue; + + /* RFC 4941 section 3.3: + * If a received option will extend the lifetime of a public + * address, the lifetimes of temporary addresses should + * be extended, subject to the overall constraint that no + * temporary addresses should ever remain "valid" or "preferred" + * for a time longer than (TEMP_VALID_LIFETIME) or + * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively. + */ + age = (now - ift->cstamp) / HZ; + max_valid = idev->cnf.temp_valid_lft - age; + if (max_valid < 0) + max_valid = 0; + + max_prefered = idev->cnf.temp_prefered_lft - + idev->cnf.max_desync_factor - age; + if (max_prefered < 0) + max_prefered = 0; + + if (valid_lft > max_valid) + valid_lft = max_valid; + + if (prefered_lft > max_prefered) + prefered_lft = max_prefered; + + spin_lock(&ift->lock); + flags = ift->flags; + ift->valid_lft = valid_lft; + ift->prefered_lft = prefered_lft; + ift->tstamp = now; + if (prefered_lft > 0) + ift->flags &= ~IFA_F_DEPRECATED; + + spin_unlock(&ift->lock); + if (!(flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ift); + } + + if ((create || list_empty(&idev->tempaddr_list)) && + idev->cnf.use_tempaddr > 0) { + /* When a new public address is created as described + * in [ADDRCONF], also create a new temporary address. + * Also create a temporary address if it's enabled but + * no temporary address currently exists. + */ + read_unlock_bh(&idev->lock); + ipv6_create_tempaddr(ifp, NULL); + } else { + read_unlock_bh(&idev->lock); + } +} + void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; @@ -2170,6 +2244,7 @@ ok: return; } + ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; ifp->cstamp = jiffies; @@ -2178,9 +2253,8 @@ ok: } if (ifp) { - int flags; + u32 flags; unsigned long now; - struct inet6_ifaddr *ift; u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ @@ -2221,70 +2295,8 @@ ok: } else spin_unlock(&ifp->lock); - read_lock_bh(&in6_dev->lock); - /* update all temporary addresses in the list */ - list_for_each_entry(ift, &in6_dev->tempaddr_list, - tmp_list) { - int age, max_valid, max_prefered; - - if (ifp != ift->ifpub) - continue; - - /* - * RFC 4941 section 3.3: - * If a received option will extend the lifetime - * of a public address, the lifetimes of - * temporary addresses should be extended, - * subject to the overall constraint that no - * temporary addresses should ever remain - * "valid" or "preferred" for a time longer than - * (TEMP_VALID_LIFETIME) or - * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), - * respectively. - */ - age = (now - ift->cstamp) / HZ; - max_valid = in6_dev->cnf.temp_valid_lft - age; - if (max_valid < 0) - max_valid = 0; - - max_prefered = in6_dev->cnf.temp_prefered_lft - - in6_dev->cnf.max_desync_factor - - age; - if (max_prefered < 0) - max_prefered = 0; - - if (valid_lft > max_valid) - valid_lft = max_valid; - - if (prefered_lft > max_prefered) - prefered_lft = max_prefered; - - spin_lock(&ift->lock); - flags = ift->flags; - ift->valid_lft = valid_lft; - ift->prefered_lft = prefered_lft; - ift->tstamp = now; - if (prefered_lft > 0) - ift->flags &= ~IFA_F_DEPRECATED; - - spin_unlock(&ift->lock); - if (!(flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify(0, ift); - } - - if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { - /* - * When a new public address is created as - * described in [ADDRCONF], also create a new - * temporary address. Also create a temporary - * address if it's enabled but no temporary - * address currently exists. - */ - read_unlock_bh(&in6_dev->lock); - ipv6_create_tempaddr(ifp, NULL); - } else { - read_unlock_bh(&in6_dev->lock); - } + manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, + create, now); in6_ifa_put(ifp); addrconf_verify(0); @@ -2363,10 +2375,11 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, +static int inet6_addr_add(struct net *net, int ifindex, + const struct in6_addr *pfx, const struct in6_addr *peer_pfx, - unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, - __u32 valid_lft) + unsigned int plen, __u32 ifa_flags, + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2385,6 +2398,9 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; + if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64) + return -EINVAL; + dev = __dev_get_by_index(net, ifindex); if (!dev) return -ENODEV; @@ -2425,6 +2441,9 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p * manually configured addresses */ addrconf_dad_start(ifp); + if (ifa_flags & IFA_F_MANAGETEMPADDR) + manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, + true, jiffies); in6_ifa_put(ifp); addrconf_verify(0); return 0; @@ -2613,7 +2632,7 @@ static void init_loopback(struct net_device *dev) if (sp_ifa->rt) continue; - sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false); /* Failure cases are ignored */ if (!IS_ERR(sp_rt)) { @@ -3176,7 +3195,8 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time); + addrconf_mod_dad_timer(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); write_unlock(&idev->lock); @@ -3356,7 +3376,7 @@ static int if6_seq_show(struct seq_file *seq, void *v) ifp->idev->dev->ifindex, ifp->prefix_len, ifp->scope, - ifp->flags, + (u8) ifp->flags, ifp->idev->dev->name); return 0; } @@ -3456,7 +3476,12 @@ restart: &inet6_addr_lst[i], addr_lst) { unsigned long age; - if (ifp->flags & IFA_F_PERMANENT) + /* When setting preferred_lft to a value not zero or + * infinity, while valid_lft is infinity + * IFA_F_PERMANENT has a non-infinity life time. + */ + if ((ifp->flags & IFA_F_PERMANENT) && + (ifp->prefered_lft == INFINITY_LIFE_TIME)) continue; spin_lock(&ifp->lock); @@ -3481,7 +3506,8 @@ restart: ifp->flags |= IFA_F_DEPRECATED; } - if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) + if ((ifp->valid_lft != INFINITY_LIFE_TIME) && + (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))) next = ifp->tstamp + ifp->valid_lft * HZ; spin_unlock(&ifp->lock); @@ -3497,7 +3523,7 @@ restart: !(ifp->flags&IFA_F_TENTATIVE)) { unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * ifp->idev->cnf.dad_transmits * - ifp->idev->nd_parms->retrans_time / HZ; + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME) / HZ; if (age >= ifp->prefered_lft - regen_advance) { struct inet6_ifaddr *ifpub = ifp->ifpub; @@ -3572,6 +3598,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, + [IFA_FLAGS] = { .len = sizeof(u32) }, }; static int @@ -3595,16 +3622,21 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, +static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, u32 prefered_lft, u32 valid_lft) { u32 flags; clock_t expires; unsigned long timeout; + bool was_managetempaddr; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; + if (ifa_flags & IFA_F_MANAGETEMPADDR && + (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64)) + return -EINVAL; + timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) { expires = jiffies_to_clock_t(timeout * HZ); @@ -3624,7 +3656,10 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, } spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; + was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR; + ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | + IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR); + ifp->flags |= ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -3635,6 +3670,14 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, expires, flags); + + if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) { + if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR)) + valid_lft = prefered_lft = 0; + manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft, + !was_managetempaddr, jiffies); + } + addrconf_verify(0); return 0; @@ -3650,7 +3693,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct inet6_ifaddr *ifa; struct net_device *dev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; - u8 ifa_flags; + u32 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3677,8 +3720,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) if (dev == NULL) return -ENODEV; + ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + /* We ignore other flags so far. */ - ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); + ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR; ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (ifa == NULL) { @@ -3702,7 +3747,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, +static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u32 flags, u8 scope, int ifindex) { struct ifaddrmsg *ifm; @@ -3745,7 +3790,8 @@ static inline int inet6_ifaddr_msgsize(void) return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(16) /* IFA_LOCAL */ + nla_total_size(16) /* IFA_ADDRESS */ - + nla_total_size(sizeof(struct ifa_cacheinfo)); + + nla_total_size(sizeof(struct ifa_cacheinfo)) + + nla_total_size(4) /* IFA_FLAGS */; } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, @@ -3761,7 +3807,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); - if (!(ifa->flags&IFA_F_PERMANENT)) { + if (!((ifa->flags&IFA_F_PERMANENT) && + (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { @@ -3793,6 +3840,9 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) goto error; + if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) + goto error; + return nlmsg_end(skb, nlh); error: @@ -4196,7 +4246,7 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) ci.max_reasm_len = IPV6_MAXPLEN; ci.tstamp = cstamp_delta(idev->tstamp); ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); - ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); + ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME)); if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); @@ -4689,6 +4739,46 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = ctl->data; + int ret; + int old, new; + + old = *valp; + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + new = *valp; + + if (write && old != new) { + struct net *net = ctl->extra2; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (valp == &net->ipv6.devconf_dflt->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + else if (valp == &net->ipv6.devconf_all->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + else { + struct inet6_dev *idev = ctl->extra1; + + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + idev->dev->ifindex, + &idev->cnf); + } + rtnl_unlock(); + } + + return ret; +} + + static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; @@ -4875,7 +4965,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_proxy_ndp, }, { .procname = "accept_source_route", @@ -4991,7 +5081,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) static void addrconf_sysctl_register(struct inet6_dev *idev) { - neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6", + neigh_sysctl_register(idev->dev, idev->nd_parms, &ndisc_ifinfo_sysctl_change); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev, &idev->cnf); @@ -5124,9 +5214,7 @@ int __init addrconf_init(void) addrconf_verify(0); - err = rtnl_af_register(&inet6_ops); - if (err < 0) - goto errout_af; + rtnl_af_register(&inet6_ops); err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, NULL); @@ -5150,7 +5238,6 @@ int __init addrconf_init(void) return 0; errout: rtnl_af_unregister(&inet6_ops); -errout_af: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4fbdb70..c921d5d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -213,7 +213,7 @@ lookup_protocol: inet->mc_list = NULL; inet->rcv_tos = 0; - if (ipv4_config.no_pmtu_disc) + if (net->ipv4.sysctl_ip_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -661,7 +661,7 @@ int inet6_sk_rebuild_header(struct sock *sk) final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; sk->sk_err_soft = -PTR_ERR(dst); @@ -683,8 +683,7 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) if (np->rxopt.all) { if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || - ((IPV6_FLOWINFO_MASK & - *(__be32 *)skb_network_header(skb)) && + (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 82e1da3..81e496a 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors * diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a454b0f..6983058 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -73,7 +73,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; } } @@ -171,7 +170,7 @@ ipv4_connected: opt = flowlabel ? flowlabel->opt : np->opt; final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -318,7 +317,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +368,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -377,6 +377,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; + sin->sin6_port = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b8719df..6eef8a7 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors * diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e275916..3fd0a57 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -122,7 +122,11 @@ out: static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct rt6_info *rt = (struct rt6_info *) arg->result; - struct net_device *dev = rt->rt6i_idev->dev; + struct net_device *dev = NULL; + + if (rt->rt6i_idev) + dev = rt->rt6i_idev->dev; + /* do not accept result if the route does * not meet the required prefix length */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index eef8d94..5d42009 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -315,8 +315,10 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +static struct dst_entry *icmpv6_route_lookup(struct net *net, + struct sk_buff *skb, + struct sock *sk, + struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -984,7 +986,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -struct ctl_table ipv6_icmp_table_template[] = { +static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 77bb8af..c913818 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -86,7 +86,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (IS_ERR(dst)) return NULL; @@ -216,7 +216,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (!IS_ERR(dst)) __inet6_csk_dst_store(sk, dst, NULL, NULL); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5550a81..075602f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1530,7 +1530,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) + void *arg) { struct fib6_table *table; struct hlist_head *head; @@ -1542,7 +1542,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); + func, 0, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1636,7 +1636,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); - fib6_clean_all(net, fib6_age, 0, NULL); + fib6_clean_all(net, fib6_age, NULL); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8acb286..e7a440d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -61,7 +61,6 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 #define HASH_SIZE_SHIFT 5 @@ -499,7 +498,7 @@ static int ip6gre_rcv(struct sk_buff *skb) &ipv6h->saddr, &ipv6h->daddr, key, gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; @@ -846,7 +845,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -1266,12 +1265,12 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tunnel_stats; + struct pcpu_sw_netstats *ip6gre_tunnel_stats; ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6gre_tunnel_stats->syncp); } @@ -1467,12 +1466,12 @@ static int ip6gre_tap_init(struct net_device *dev) ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tap_stats; + struct pcpu_sw_netstats *ip6gre_tap_stats; ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6gre_tap_stats->syncp); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 4b85169..6fb4162 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -154,6 +154,32 @@ out: return segs; } +/* Return the total length of all the extension hdrs, following the same + * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs. + */ +static int ipv6_exthdrs_len(struct ipv6hdr *iph, + const struct net_offload **opps) +{ + struct ipv6_opt_hdr *opth = (void *)iph; + int len = 0, proto, optlen = sizeof(*iph); + + proto = iph->nexthdr; + for (;;) { + if (proto != NEXTHDR_HOP) { + *opps = rcu_dereference(inet6_offloads[proto]); + if (unlikely(!(*opps))) + break; + if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + opth = (void *)opth + optlen; + optlen = ipv6_optlen(opth); + len += optlen; + proto = opth->nexthdr; + } + return len; +} + static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -177,6 +203,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, goto out; } + skb_set_network_header(skb, off); skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); @@ -211,12 +238,16 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, if (!NAPI_GRO_CB(p)->same_flow) continue; - iph2 = ipv6_hdr(p); + iph2 = (struct ipv6hdr *)(p->data + off); first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - /* All fields must match except length and Traffic Class. */ - if (nlen != skb_network_header_len(p) || - (first_word & htonl(0xF00FFFFF)) || + /* All fields must match except length and Traffic Class. + * XXX skbs on the gro_list have all been parsed and pulled + * already so we don't need to compare nlen + * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops))) + * memcmp() alone below is suffcient, right? + */ + if ((first_word & htonl(0xF00FFFFF)) || memcmp(&iph->nexthdr, &iph2->nexthdr, nlen - offsetof(struct ipv6hdr, nexthdr))) { NAPI_GRO_CB(p)->same_flow = 0; @@ -245,21 +276,21 @@ out: return pp; } -static int ipv6_gro_complete(struct sk_buff *skb) +static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; - iph->payload_len = htons(skb->len - skb_network_offset(skb) - - sizeof(*iph)); + iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); rcu_read_lock(); - ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); + + nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->callbacks.gro_complete(skb); + err = ops->callbacks.gro_complete(skb, nhoff); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59df872..d1de956 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -336,7 +336,8 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } @@ -370,8 +371,8 @@ int ip6_forward(struct sk_buff *skb) /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -384,14 +385,15 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); @@ -448,16 +450,17 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -938,7 +941,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow. * @@ -946,8 +948,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * error code. */ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; int err; @@ -957,8 +958,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -969,7 +968,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. @@ -980,8 +978,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); int err; @@ -993,8 +990,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -1162,10 +1157,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, np->cork.hop_limit = hlimit; np->cork.tclass = tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); else - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) @@ -1193,11 +1188,35 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->tot_len : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? + mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } @@ -1222,12 +1241,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || @@ -1267,7 +1280,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc == + np->pmtudisc >= IPV6_PMTUDISC_PROBE); skb_prev = skb; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d606232..1e5e240 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -29,7 +29,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -70,7 +69,6 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define IP6_TNL_TRACE(x...) do {;} while(0) #endif -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 #define HASH_SIZE_SHIFT 5 @@ -103,16 +101,26 @@ struct ip6_tnl_net { static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_tstats sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + unsigned int start; + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + tmp.rx_packets = tstats->rx_packets; + tmp.rx_bytes = tstats->rx_bytes; + tmp.tx_packets = tstats->tx_packets; + tmp.tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + sum.rx_packets += tmp.rx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.tx_bytes += tmp.tx_bytes; } dev->stats.rx_packets = sum.rx_packets; dev->stats.rx_bytes = sum.rx_bytes; @@ -785,7 +793,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (t->parms.proto != ipproto && t->parms.proto != 0) { rcu_read_unlock(); @@ -824,8 +832,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -1131,7 +1141,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -1498,12 +1508,12 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6_tnl_stats; + struct pcpu_sw_netstats *ip6_tnl_stats; ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6_tnl_stats->syncp); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ed94ba6..b50acd5 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -24,7 +24,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -75,26 +74,6 @@ struct vti6_net { struct ip6_tnl __rcu **tnls[2]; }; -static struct net_device_stats *vti6_get_stats(struct net_device *dev) -{ - struct pcpu_tstats sum = { 0 }; - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; - } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; -} - #define for_each_vti6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) @@ -312,7 +291,7 @@ static int vti6_rcv(struct sk_buff *skb) if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); @@ -331,8 +310,10 @@ static int vti6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); skb->mark = 0; secpath_reset(skb); @@ -716,7 +697,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, .ndo_change_mtu = vti6_change_mtu, - .ndo_get_stats = vti6_get_stats, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; /** @@ -753,7 +734,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index ce507d9..da9becb 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -16,8 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ /* * [Memo] diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1c6ce31..af0ecb9 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -722,7 +722,7 @@ done: case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE) goto e_inval; np->pmtudisc = val; retv = 0; @@ -1019,7 +1019,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { - int tclass = np->rcv_tclass; + int tclass = ntohl(np->rcv_flowinfo & IPV6_TCLASS_MASK) >> 20; put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } if (np->rxopt.bits.rxoinfo) { @@ -1034,6 +1034,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxflow) { + __be32 flowinfo = np->rcv_flowinfo; + + put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); + } } len -= msg.msg_controllen; return put_user(len, optlen); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 9ac01dc..db9b6cb 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -13,8 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ /* * Authors: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3512177..09a22f4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -125,17 +125,19 @@ struct neigh_table nd_tbl = { .id = "ndisc_cache", .parms = { .tbl = &nd_tbl, - .base_reachable_time = ND_REACHABLE_TIME, - .retrans_time = ND_RETRANS_TIMER, - .gc_staletime = 60 * HZ, .reachable_time = ND_REACHABLE_TIME, - .delay_probe_time = 5 * HZ, - .queue_len_bytes = 64*1024, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, + .data = { + [NEIGH_VAR_MCAST_PROBES] = 3, + [NEIGH_VAR_UCAST_PROBES] = 3, + [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, + [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, + [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_GC_STALETIME] = 60 * HZ, + [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024, + [NEIGH_VAR_PROXY_QLEN] = 64, + [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, + [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, + }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, @@ -656,14 +658,14 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; - if ((probes -= neigh->parms->ucast_probes) < 0) { + if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) { if (!(neigh->nud_state & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); - } else if ((probes -= neigh->parms->app_probes) < 0) { + } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); @@ -790,7 +792,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc && - idev->nd_parms->proxy_delay != 0) { + NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) { /* * for anycast or proxy, * sender should delay its response @@ -1210,7 +1212,7 @@ skip_defrtr: rtime = (rtime*HZ)/1000; if (rtime < HZ/10) rtime = HZ/10; - in6_dev->nd_parms->retrans_time = rtime; + NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime); in6_dev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); } @@ -1222,9 +1224,11 @@ skip_defrtr: if (rtime < HZ/10) rtime = HZ/10; - if (rtime != in6_dev->nd_parms->base_reachable_time) { - in6_dev->nd_parms->base_reachable_time = rtime; - in6_dev->nd_parms->gc_staletime = 3 * rtime; + if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) { + NEIGH_VAR_SET(in6_dev->nd_parms, + BASE_REACHABLE_TIME, rtime); + NEIGH_VAR_SET(in6_dev->nd_parms, + GC_STALETIME, 3 * rtime); in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); in6_dev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); @@ -1277,6 +1281,9 @@ skip_linkparms: ri->prefix_len == 0) continue; #endif + if (ri->prefix_len == 0 && + !in6_dev->cnf.accept_ra_defrtr) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, @@ -1648,22 +1655,23 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) - ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) - ret = proc_dointvec_jiffies(ctl, write, - buffer, lenp, ppos); + ret = neigh_proc_dointvec_jiffies(ctl, write, + buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) - ret = proc_dointvec_ms_jiffies(ctl, write, - buffer, lenp, ppos); + ret = neigh_proc_dointvec_ms_jiffies(ctl, write, + buffer, lenp, ppos); else ret = -1; if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->data == &idev->nd_parms->base_reachable_time) - idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); + if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) + idev->nd_parms->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); @@ -1722,7 +1730,7 @@ int __init ndisc_init(void) neigh_table_init(&nd_tbl); #ifdef CONFIG_SYSCTL - err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6", + err = neigh_sysctl_register(NULL, &nd_tbl.parms, &ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 7702f9e..35750df 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -28,15 +28,27 @@ config NF_CONNTRACK_IPV6 config NF_TABLES_IPV6 depends on NF_TABLES tristate "IPv6 nf_tables support" + help + This option enables the IPv6 support for nf_tables. config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" + help + This option enables the "route" chain for IPv6 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, flowlabel, hop-limit and + the packet mark. config NFT_CHAIN_NAT_IPV6 depends on NF_TABLES_IPV6 depends on NF_NAT_IPV6 && NFT_NAT tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index da00a2e..544b0a9 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -23,181 +23,18 @@ #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <linux/netdevice.h> -#include <net/ipv6.h> -#include <net/tcp.h> #include <net/icmp.h> -#include <net/ip6_checksum.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h> #include <net/flow.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> +#include <net/netfilter/ipv6/nf_reject.h> + MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); -/* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; - const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; - u8 proto; - __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } - - proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); - - if ((tcphoff < 0) || (tcphoff > oldskb->len)) { - pr_debug("Cannot get TCP header.\n"); - return; - } - - otcplen = oldskb->len - tcphoff; - - /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; - } - - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); - - /* No RST for RST. */ - if (otcph.rst) { - pr_debug("RST is set\n"); - return; - } - - /* Check checksum. */ - if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { - pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; - } - - skb_dst_set(nskb, dst); - - skb_reserve(nskb, hh_len + dst->header_len); - - skb_put(nskb, sizeof(struct ipv6hdr)); - skb_reset_network_header(nskb); - ip6h = ipv6_hdr(nskb); - ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; - ip6h->saddr = oip6h->daddr; - ip6h->daddr = oip6h->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - /* Truncate to length (no data) */ - tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; - - if (otcph.ack) { - needs_ack = 0; - tcph->seq = otcph.ack_seq; - tcph->ack_seq = 0; - } else { - needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); - tcph->seq = 0; - } - - /* Reset flags */ - ((u_int8_t *)tcph)[13] = 0; - tcph->rst = 1; - tcph->ack = needs_ack; - tcph->window = 0; - tcph->urg_ptr = 0; - tcph->check = 0; - - /* Adjust TCP checksum */ - tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, - &ipv6_hdr(nskb)->daddr, - sizeof(struct tcphdr), IPPROTO_TCP, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip6_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - nskb->protocol = htons(ETH_P_IPV6); - ip6h->payload_len = htons(sizeof(struct tcphdr)); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - return; - dev_queue_xmit(nskb); - } else -#endif - ip6_local_out(nskb); -} - -static inline void -send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, - unsigned int hooknum) -{ - if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = net->loopback_dev; - - icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); -} static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) @@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("%s: medium point\n", __func__); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb, par->hooknum); + nf_send_reset6(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index f78f41a..a0d1727 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -446,6 +446,7 @@ static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) static struct xt_target synproxy_tg6_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg6, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg6_check, diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8815e31..15d23b8 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { return -EAFNOSUPPORT; } @@ -144,7 +145,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr); if (IS_ERR(dst)) return PTR_ERR(dst); rt = (struct rt6_info *) dst; diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 22d1bd4..e048cf1 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); -/* - * Remove a protocol from the hash tables. - */ - int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { int ret; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e24ff1d..5f10b7e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -466,10 +466,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -792,7 +792,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } @@ -865,7 +864,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7faa9d5..11dac21 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -66,8 +66,9 @@ #endif enum rt6_nud_state { - RT6_NUD_FAIL_HARD = -2, - RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_FAIL_HARD = -3, + RT6_NUD_FAIL_PROBE = -2, + RT6_NUD_FAIL_DO_RR = -1, RT6_NUD_SUCCEED = 1 }; @@ -84,6 +85,8 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_prohibit(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -101,6 +104,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif +static void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer_base *base; + struct inet_peer *peer; + + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + } +} + +static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +{ + if (rt6_has_peer(rt)) + return rt6_peer_ptr(rt); + + rt6_bind_peer(rt, create); + return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); +} + +static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 1); +} + static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *) dst; @@ -234,9 +267,6 @@ static const struct rt6_info ip6_null_entry_template = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); - static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -312,22 +342,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) { @@ -522,7 +536,7 @@ static void rt6_probe(struct rt6_info *rt) work = kmalloc(sizeof(*work), GFP_ATOMIC); if (neigh && work) - neigh->updated = jiffies; + __neigh_set_probe_once(neigh); if (neigh) write_unlock(&neigh->lock); @@ -578,11 +592,13 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; + else + ret = RT6_NUD_FAIL_PROBE; #endif read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? - RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; + RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; } rcu_read_unlock_bh(); @@ -619,16 +635,17 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, goto out; m = rt6_score_route(rt, oif, strict); - if (m == RT6_NUD_FAIL_SOFT) { + if (m == RT6_NUD_FAIL_DO_RR) { match_do_rr = true; m = 0; /* lowest valid score */ - } else if (m < 0) { + } else if (m == RT6_NUD_FAIL_HARD) { goto out; } if (strict & RT6_LOOKUP_F_REACHABLE) rt6_probe(rt); + /* note that m can be RT6_NUD_FAIL_PROBE at this point */ if (m > *mpri) { *do_rr = match_do_rr; *mpri = m; @@ -1565,21 +1582,24 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; + rt->dst.output = dst_discard; + rt->dst.input = dst_discard; break; case RTN_PROHIBIT: rt->dst.error = -EACCES; + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: - rt->dst.error = -EAGAIN; - break; default: - rt->dst.error = -ENETUNREACH; + rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN + : -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; break; } goto install_route; @@ -1903,9 +1923,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, else rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -2144,8 +2162,6 @@ static int ip6_pkt_discard_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - static int ip6_pkt_prohibit(struct sk_buff *skb) { return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); @@ -2157,8 +2173,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -#endif - /* * Allocate a dst for local (unicast / anycast) address. */ @@ -2168,12 +2182,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - - if (!rt) { - net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, + DST_NOCOUNT, NULL); + if (!rt) return ERR_PTR(-ENOMEM); - } in6_dev_hold(idev); @@ -2244,7 +2256,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) .net = net, .addr = &ifp->addr, }; - fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); + fib6_clean_all(net, fib6_remove_prefsrc, &adni); } struct arg_dev_net { @@ -2271,7 +2283,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) .net = net, }; - fib6_clean_all(net, fib6_ifdown, 0, &adn); + fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); } @@ -2326,7 +2338,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) .mtu = mtu, }; - fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b4a4a95..3dfbcf1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -478,14 +478,44 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + */ +static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct rt6_info *rt; + struct sk_buff *skb2; + + if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) + return 1; + + skb2 = skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, iph->ihl * 4); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} static int ipip6_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -500,7 +530,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; default: @@ -545,6 +574,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; + if (!ipip6_err_gen_icmpv6_unreach(skb)) + goto out; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; @@ -639,7 +671,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && tunnel->parms.iph.protocol != 0) @@ -670,8 +702,10 @@ static int ipip6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -892,7 +926,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -919,7 +953,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) @@ -934,8 +968,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); - if (IS_ERR(skb)) + if (IS_ERR(skb)) { + ip_rt_put(rt); goto out; + } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); @@ -945,7 +981,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_errors++; return NETDEV_TX_OK; @@ -985,7 +1021,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, tx_err: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } @@ -1329,12 +1365,12 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_tunnel_stats; + struct pcpu_sw_netstats *ipip6_tunnel_stats; ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipip6_tunnel_stats->syncp); } @@ -1359,12 +1395,12 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_fb_stats; + struct pcpu_sw_netstats *ipip6_fb_stats; ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipip6_fb_stats->syncp); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 535a3ad..bb53a5e7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -247,7 +247,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0740f93..ffd5fa8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -156,7 +156,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } @@ -165,12 +164,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if(ipv6_addr_any(&usin->sin6_addr)) + if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 0x1; addr_type = ipv6_addr_type(&usin->sin6_addr); - if(addr_type & IPV6_ADDR_MULTICAST) + if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { @@ -258,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; @@ -337,7 +336,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk) static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; @@ -398,6 +397,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_LISTEN) goto out; + if (!ip6_sk_accept_pmtu(sk)) + goto out; + tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); @@ -467,7 +469,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); - struct sk_buff * skb; + struct sk_buff *skb; int err = -ENOMEM; /* First, grab a route. */ @@ -801,7 +803,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * Underlying function will use this to retrieve the network * namespace */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); @@ -910,7 +912,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, } -static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) +static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); @@ -1083,9 +1085,9 @@ drop: return 0; /* don't send reset */ } -static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); @@ -1135,7 +1137,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1215,7 +1217,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1380,7 +1382,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * otherwise we just shortcircuit this and continue with * the new socket.. */ - if(nsk != sk) { + if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; @@ -1425,8 +1427,8 @@ ipv6_pktoptions: np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; - if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb)); + if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) + np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); @@ -1740,7 +1742,7 @@ static void get_openreq6(struct seq_file *seq, dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, - 0,0, /* could print option size, but that is af dependent. */ + 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->num_timeout, @@ -1799,7 +1801,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) atomic_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh ); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1097c7..0d78132 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -37,44 +37,42 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, { const struct ipv6hdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + + wsum = skb->csum; switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + wsum); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } +skip_csum: return tcp_gro_receive(head, skb); } -static int tcp6_gro_complete(struct sk_buff *skb) +static int tcp6_gro_complete(struct sk_buff *skb, int thoff) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), - &iph->saddr, &iph->daddr, 0); + th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, + &iph->daddr, 0); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; return tcp_gro_complete(skb); diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 4b0f50d..2c4e4c5 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81eb8cf..fa9d988 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -393,10 +393,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), @@ -538,8 +538,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + if (!ip6_sk_accept_pmtu(sk)) + goto out; ip6_sk_update_pmtu(skb, sk, info); + } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); goto out; @@ -1140,7 +1143,6 @@ do_udp_sendmsg: flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } @@ -1221,7 +1223,7 @@ do_udp_sendmsg: security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 63d5d49..0e01590 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -15,8 +15,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ /* * Authors: diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index de2bcfa..1c66465 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index de7db23..73baf9b 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -25,9 +25,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Linux-IrDA now supports four different types of IrDA sockets: * diff --git a/net/irda/discovery.c b/net/irda/discovery.c index b0b56a3..6786e7f 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index b797daa..4490a67 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c index d78554f..b172c65 100644 --- a/net/irda/ircomm/ircomm_event.c +++ b/net/irda/ircomm/ircomm_event.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index 3b8095c..6536114 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c index 3089391..f80b1a6 100644 --- a/net/irda/ircomm/ircomm_param.c +++ b/net/irda/ircomm/ircomm_param.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_ttp.c b/net/irda/ircomm/ircomm_ttp.c index 6e6509f..d362d71 100644 --- a/net/irda/ircomm/ircomm_ttp.c +++ b/net/irda/ircomm/ircomm_ttp.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 41ac7938..2ba8b97 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index a2a508f..2ee87bf 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index b343f50..ce94385 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 14653b8..365b895d 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 005b424..a778df5 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/parameters.c b/net/irda/parameters.c index 71cd38c..6d08697 100644 --- a/net/irda/parameters.c +++ b/net/irda/parameters.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/qos.c b/net/irda/qos.c index 798ffd9..11a7cc0 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/key/af_key.c b/net/key/af_key.c index 545f047..1a04c13 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1340,6 +1340,12 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ max_spi = range->sadb_spirange_max; } + err = verify_spi_info(x->id.proto, min_spi, max_spi); + if (err) { + xfrm_state_put(x); + return err; + } + err = xfrm_alloc_spi(x, min_spi, max_spi); resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); @@ -1380,10 +1386,9 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb return 0; spin_lock_bh(&x->lock); - if (x->km.state == XFRM_STATE_ACQ) { + if (x->km.state == XFRM_STATE_ACQ) x->km.state = XFRM_STATE_ERROR; - wake_up(&net->xfrm.km_waitq); - } + spin_unlock_bh(&x->lock); xfrm_state_put(x); return 0; @@ -1785,7 +1790,9 @@ static int pfkey_dump_sa(struct pfkey_sock *pfk) static void pfkey_dump_sa_done(struct pfkey_sock *pfk) { - xfrm_state_walk_done(&pfk->dump.u.state); + struct net *net = sock_net(&pfk->sk); + + xfrm_state_walk_done(&pfk->dump.u.state, net); } static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) @@ -1861,7 +1868,7 @@ static u32 gen_reqid(struct net *net) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); - xfrm_policy_walk_done(&walk); + xfrm_policy_walk_done(&walk, net); if (rc != -EEXIST) return reqid; } while (reqid != start); @@ -2485,6 +2492,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; struct xfrm_kmaddress k; + struct net *net = sock_net(sk); if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || @@ -2558,7 +2566,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL); + kma ? &k : NULL, net); out: return err; @@ -2659,7 +2667,9 @@ static int pfkey_dump_sp(struct pfkey_sock *pfk) static void pfkey_dump_sp_done(struct pfkey_sock *pfk) { - xfrm_policy_walk_done(&pfk->dump.u.policy); + struct net *net = sock_net((struct sock *)pfk); + + xfrm_policy_walk_done(&pfk->dump.u.policy, net); } static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) @@ -3569,6 +3579,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb, struct sk_buff *skb = NULL; struct sadb_msg *hdr = NULL; int err; + struct net *net = sock_net(sk); err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) @@ -3591,9 +3602,9 @@ static int pfkey_sendmsg(struct kiocb *kiocb, if (!hdr) goto out; - mutex_lock(&xfrm_cfg_mutex); + mutex_lock(&net->xfrm.xfrm_cfg_mutex); err = pfkey_process(sk, skb, hdr); - mutex_unlock(&xfrm_cfg_mutex); + mutex_unlock(&net->xfrm.xfrm_cfg_mutex); out: if (err && hdr && pfkey_error(hdr, err, sk) == 0) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cfd6530..29487a8 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -528,7 +528,6 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } @@ -598,7 +597,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; @@ -665,7 +664,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7b01b9f..c71b699 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -715,7 +715,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned long cpu_flags; size_t copied = 0; u32 peek_seq = 0; - u32 *seq; + u32 *seq, skb_len; unsigned long used; int target; /* Read at least this many bytes */ long timeo; @@ -812,6 +812,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } continue; found_ok_skb: + skb_len = skb->len; /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) @@ -844,7 +845,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } /* Partial read */ - if (used + offset < skb->len) + if (used + offset < skb_len) continue; } while (len > 0); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index cd87241..42dc2e4 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -753,7 +753,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) * * Sends received pdus to the connection state machine. */ -static int llc_conn_rcv(struct sock* sk, struct sk_buff *skb) +static int llc_conn_rcv(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -891,7 +891,7 @@ out_kfree_skb: * * Initializes a socket with default llc values. */ -static void llc_sk_init(struct sock* sk) +static void llc_sk_init(struct sock *sk) { struct llc_sock *llc = llc_sk(sk); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 2bb0ddf..842851c 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -23,7 +23,7 @@ #include <net/llc.h> LIST_HEAD(llc_sap_list); -DEFINE_SPINLOCK(llc_sap_list_lock); +static DEFINE_SPINLOCK(llc_sap_list_lock); /** * llc_sap_alloc - allocates and initializes sap. @@ -48,7 +48,7 @@ static struct llc_sap *llc_sap_alloc(void) static struct llc_sap *__llc_sap_find(unsigned char sap_value) { - struct llc_sap* sap; + struct llc_sap *sap; list_for_each_entry(sap, &llc_sap_list, node) if (sap->laddr.lsap == sap_value) @@ -159,7 +159,6 @@ module_init(llc_init); module_exit(llc_exit); EXPORT_SYMBOL(llc_sap_list); -EXPORT_SYMBOL(llc_sap_list_lock); EXPORT_SYMBOL(llc_sap_find); EXPORT_SYMBOL(llc_sap_open); EXPORT_SYMBOL(llc_sap_close); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index e585069..06033f6 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -66,7 +66,7 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, return skb; } -void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) +void llc_save_primitive(struct sock *sk, struct sk_buff *skb, u8 prim) { struct sockaddr_llc *addr; @@ -114,7 +114,7 @@ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) * failure. */ static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, - struct sk_buff* skb) + struct sk_buff *skb) { int i = 0; struct llc_sap_state_trans *rc = NULL; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 95667b0..ac18528 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -133,7 +133,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; + const struct ieee80211_cipher_scheme *cs = NULL; struct ieee80211_key *key; int err; @@ -145,22 +147,28 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_WEP104: - if (IS_ERR(sdata->local->wep_tx_tfm)) + if (IS_ERR(local->wep_tx_tfm)) return -EINVAL; break; + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_GCMP: + break; default: + cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type); break; } key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, - params->key, params->seq_len, params->seq); + params->key, params->seq_len, params->seq, + cs); if (IS_ERR(key)) return PTR_ERR(key); if (pairwise) key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; - mutex_lock(&sdata->local->sta_mtx); + mutex_lock(&local->sta_mtx); if (mac_addr) { if (ieee80211_vif_is_mesh(&sdata->vif)) @@ -216,10 +224,13 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, break; } + if (sta) + sta->cipher_scheme = cs; + err = ieee80211_key_link(key, sdata, sta); out_unlock: - mutex_unlock(&sdata->local->sta_mtx); + mutex_unlock(&local->sta_mtx); return err; } @@ -244,7 +255,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; if (pairwise) - key = key_mtx_dereference(local, sta->ptk); + key = key_mtx_dereference(local, sta->ptk[key_idx]); else key = key_mtx_dereference(local, sta->gtk[key_idx]); } else @@ -290,9 +301,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, if (!sta) goto out; - if (pairwise) - key = rcu_dereference(sta->ptk); - else if (key_idx < NUM_DEFAULT_KEYS) + if (pairwise && key_idx < NUM_DEFAULT_KEYS) + key = rcu_dereference(sta->ptk[key_idx]); + else if (!pairwise && + key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = rcu_dereference(sta->gtk[key_idx]); } else key = rcu_dereference(sdata->keys[key_idx]); @@ -521,8 +533,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_PEER_PM | STATION_INFO_NONPEER_PM; - sinfo->llid = le16_to_cpu(sta->llid); - sinfo->plid = le16_to_cpu(sta->plid); + sinfo->llid = sta->llid; + sinfo->plid = sta->plid; sinfo->plink_state = sta->plink_state; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { sinfo->filled |= STATION_INFO_T_OFFSET; @@ -846,7 +858,7 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, if (!resp || !resp_len) return 1; - old = rtnl_dereference(sdata->u.ap.probe_resp); + old = sdata_dereference(sdata->u.ap.probe_resp, sdata); new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) @@ -862,15 +874,16 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params) +static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_beacon_data *params) { struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; u32 changed = BSS_CHANGED_BEACON; - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); + /* Need to have a beacon head if we don't have one yet */ if (!params->head && !old) @@ -947,7 +960,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_P2P_PS; int err; - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); if (old) return -EALREADY; @@ -968,11 +981,19 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, */ sdata->control_port_protocol = params->crypto.control_port_ethertype; sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; + sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local, + ¶ms->crypto, + sdata->vif.type); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->control_port_protocol = params->crypto.control_port_ethertype; vlan->control_port_no_encrypt = params->crypto.control_port_no_encrypt; + vlan->encrypt_headroom = + ieee80211_cs_headroom(sdata->local, + ¶ms->crypto, + vlan->vif.type); } sdata->vif.bss_conf.beacon_int = params->beacon_interval; @@ -1001,7 +1022,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, err = drv_start_ap(sdata->local, sdata); if (err) { - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); + if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); @@ -1032,7 +1054,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.csa_active) return -EBUSY; - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); if (!old) return -ENOENT; @@ -1050,15 +1072,18 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) struct ieee80211_local *local = sdata->local; struct beacon_data *old_beacon; struct probe_resp *old_probe_resp; + struct cfg80211_chan_def chandef; - old_beacon = rtnl_dereference(sdata->u.ap.beacon); + old_beacon = sdata_dereference(sdata->u.ap.beacon, sdata); if (!old_beacon) return -ENOENT; - old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp); + old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata); /* abort any running channel switch */ sdata->vif.csa_active = false; - cancel_work_sync(&sdata->csa_finalize_work); + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; + cancel_work_sync(&sdata->u.ap.request_smps_work); /* turn off carrier for this interface and dependent VLANs */ @@ -1073,17 +1098,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - sta_info_flush_defer(vlan); - sta_info_flush_defer(sdata); - synchronize_net(); - rcu_barrier(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { - sta_info_flush_cleanup(vlan); - ieee80211_free_keys(vlan); - } - sta_info_flush_cleanup(sdata); - ieee80211_free_keys(sdata); + __sta_info_flush(sdata, true); + ieee80211_free_keys(sdata, true); sdata->vif.bss_conf.enable_beacon = false; sdata->vif.bss_conf.ssid_len = 0; @@ -1091,8 +1107,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); if (sdata->wdev.cac_started) { + chandef = sdata->vif.bss_conf.chandef; cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); - cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED, + cfg80211_cac_event(sdata->dev, &chandef, + NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); } @@ -1368,7 +1386,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); #endif } @@ -1953,7 +1971,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, enum ieee80211_band band; u32 changed = 0; - if (!rtnl_dereference(sdata->u.ap.beacon)) + if (!sdata_dereference(sdata->u.ap.beacon, sdata)) return -ENOENT; band = ieee80211_get_sdata_band(sdata); @@ -2488,8 +2506,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) @@ -2562,8 +2579,8 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, int j; sdata->rc_rateidx_mask[i] = mask->control[i].legacy; - memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs, - sizeof(mask->control[i].mcs)); + memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs, + sizeof(mask->control[i].ht_mcs)); sdata->rc_has_mcs_mask[i] = false; if (!sband) @@ -2964,27 +2981,33 @@ void ieee80211_csa_finalize_work(struct work_struct *work) struct ieee80211_local *local = sdata->local; int err, changed = 0; + sdata_lock(sdata); + /* AP might have been stopped while waiting for the lock. */ + if (!sdata->vif.csa_active) + goto unlock; + if (!ieee80211_sdata_running(sdata)) - return; + goto unlock; sdata->radar_required = sdata->csa_radar_required; - err = ieee80211_vif_change_channel(sdata, &local->csa_chandef, - &changed); + err = ieee80211_vif_change_channel(sdata, &changed); if (WARN_ON(err < 0)) - return; + goto unlock; if (!local->use_chanctx) { - local->_oper_chandef = local->csa_chandef; + local->_oper_chandef = sdata->csa_chandef; ieee80211_hw_config(local, 0); } ieee80211_bss_info_change_notify(sdata, changed); + sdata->vif.csa_active = false; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon); if (err < 0) - return; + goto unlock; + changed |= err; kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; @@ -2998,24 +3021,26 @@ void ieee80211_csa_finalize_work(struct work_struct *work) case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata); if (err < 0) - return; + goto unlock; break; #endif default: WARN_ON(1); - return; + goto unlock; } - sdata->vif.csa_active = false; ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - cfg80211_ch_switch_notify(sdata->dev, &local->csa_chandef); + cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + +unlock: + sdata_unlock(sdata); } -static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_csa_settings *params) +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -3024,6 +3049,8 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_if_mesh __maybe_unused *ifmsh; int err, num_chanctx; + lockdep_assert_held(&sdata->wdev.mtx); + if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; @@ -3120,9 +3147,17 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, params->chandef.chan->band) return -EINVAL; + ifmsh->chsw_init = true; + if (!ifmsh->pre_value) + ifmsh->pre_value = 1; + else + ifmsh->pre_value++; + err = ieee80211_mesh_csa_beacon(sdata, params, true); - if (err < 0) + if (err < 0) { + ifmsh->chsw_init = false; return err; + } break; #endif default: @@ -3136,7 +3171,7 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - local->csa_chandef = params->chandef; + sdata->csa_chandef = params->chandef; sdata->vif.csa_active = true; ieee80211_bss_info_change_notify(sdata, err); @@ -3146,26 +3181,25 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie) + struct cfg80211_mgmt_tx_params *params, + u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; - const struct ieee80211_mgmt *mgmt = (void *)buf; + const struct ieee80211_mgmt *mgmt = (void *)params->buf; bool need_offchan = false; u32 flags; int ret; - if (dont_wait_for_ack) + if (params->dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; else flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; - if (no_cck) + if (params->no_cck) flags |= IEEE80211_TX_CTL_NO_CCK_RATE; switch (sdata->vif.type) { @@ -3213,7 +3247,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* configurations requiring offchan cannot work if no channel has been * specified */ - if (need_offchan && !chan) + if (need_offchan && !params->chan) return -EINVAL; mutex_lock(&local->mtx); @@ -3226,8 +3260,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (chanctx_conf) { - need_offchan = chan && (chan != chanctx_conf->def.chan); - } else if (!chan) { + need_offchan = params->chan && + (params->chan != + chanctx_conf->def.chan); + } else if (!params->chan) { ret = -EINVAL; rcu_read_unlock(); goto out_unlock; @@ -3237,19 +3273,19 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, rcu_read_unlock(); } - if (need_offchan && !offchan) { + if (need_offchan && !params->offchan) { ret = -EBUSY; goto out_unlock; } - skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len); if (!skb) { ret = -ENOMEM; goto out_unlock; } skb_reserve(skb, local->hw.extra_tx_headroom); - memcpy(skb_put(skb, len), buf, len); + memcpy(skb_put(skb, params->len), params->buf, params->len); IEEE80211_SKB_CB(skb)->flags = flags; @@ -3269,8 +3305,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, local->hw.offchannel_tx_hw_queue; /* This will handle all kinds of coalescing and immediate TX */ - ret = ieee80211_start_roc_work(local, sdata, chan, - wait, cookie, skb, + ret = ieee80211_start_roc_work(local, sdata, params->chan, + params->wait, cookie, skb, IEEE80211_ROC_TYPE_MGMT_TX); if (ret) kfree_skb(skb); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 03ba6b5..a57d5d9 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -9,6 +9,140 @@ #include "ieee80211_i.h" #include "driver-ops.h" +static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta) +{ + switch (sta->bandwidth) { + case IEEE80211_STA_RX_BW_20: + if (sta->ht_cap.ht_supported) + return NL80211_CHAN_WIDTH_20; + else + return NL80211_CHAN_WIDTH_20_NOHT; + case IEEE80211_STA_RX_BW_40: + return NL80211_CHAN_WIDTH_40; + case IEEE80211_STA_RX_BW_80: + return NL80211_CHAN_WIDTH_80; + case IEEE80211_STA_RX_BW_160: + /* + * This applied for both 160 and 80+80. since we use + * the returned value to consider degradation of + * ctx->conf.min_def, we have to make sure to take + * the bigger one (NL80211_CHAN_WIDTH_160). + * Otherwise we might try degrading even when not + * needed, as the max required sta_bw returned (80+80) + * might be smaller than the configured bw (160). + */ + return NL80211_CHAN_WIDTH_160; + default: + WARN_ON(1); + return NL80211_CHAN_WIDTH_20; + } +} + +static enum nl80211_chan_width +ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata) +{ + enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { + if (sdata != sta->sdata && + !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) + continue; + + if (!sta->uploaded) + continue; + + max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); + } + rcu_read_unlock(); + + return max_bw; +} + +static enum nl80211_chan_width +ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, + struct ieee80211_chanctx_conf *conf) +{ + struct ieee80211_sub_if_data *sdata; + enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + struct ieee80211_vif *vif = &sdata->vif; + enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT; + + if (!ieee80211_sdata_running(sdata)) + continue; + + if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) + continue; + + switch (vif->type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + width = ieee80211_get_max_required_bw(sdata); + break; + case NL80211_IFTYPE_P2P_DEVICE: + continue; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: + width = vif->bss_conf.chandef.width; + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + WARN_ON_ONCE(1); + } + max_bw = max(max_bw, width); + } + rcu_read_unlock(); + + return max_bw; +} + +/* + * recalc the min required chan width of the channel context, which is + * the max of min required widths of all the interfaces bound to this + * channel context. + */ +void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + enum nl80211_chan_width max_bw; + struct cfg80211_chan_def min_def; + + lockdep_assert_held(&local->chanctx_mtx); + + /* don't optimize 5MHz, 10MHz, and radar_enabled confs */ + if (ctx->conf.def.width == NL80211_CHAN_WIDTH_5 || + ctx->conf.def.width == NL80211_CHAN_WIDTH_10 || + ctx->conf.radar_enabled) { + ctx->conf.min_def = ctx->conf.def; + return; + } + + max_bw = ieee80211_get_chanctx_max_required_bw(local, &ctx->conf); + + /* downgrade chandef up to max_bw */ + min_def = ctx->conf.def; + while (min_def.width > max_bw) + ieee80211_chandef_downgrade(&min_def); + + if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def)) + return; + + ctx->conf.min_def = min_def; + if (!ctx->driver_present) + return; + + drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_MIN_WIDTH); +} + static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *chandef) @@ -20,6 +154,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, ctx->conf.def = *chandef; drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH); + ieee80211_recalc_chanctx_min_def(local, ctx); if (!local->use_chanctx) { local->_oper_chandef = *chandef; @@ -93,6 +228,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = ieee80211_is_radar_required(local); + ieee80211_recalc_chanctx_min_def(local, ctx); if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; @@ -179,6 +315,7 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ctx->refcount++; ieee80211_recalc_txpower(sdata); + ieee80211_recalc_chanctx_min_def(local, ctx); sdata->vif.bss_conf.idle = false; if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && @@ -243,6 +380,7 @@ static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_chanctx_chantype(sdata->local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); + ieee80211_recalc_chanctx_min_def(local, ctx); } } @@ -411,12 +549,12 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, } int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, - const struct cfg80211_chan_def *chandef, u32 *changed) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + const struct cfg80211_chan_def *chandef = &sdata->csa_chandef; int ret; u32 chanctx_changed = 0; @@ -456,6 +594,7 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_chanctx_chantype(local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); + ieee80211_recalc_chanctx_min_def(local, ctx); ret = 0; out: diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 5c090e4..fa16e54 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -17,6 +17,172 @@ #define DEBUGFS_FORMAT_BUFFER_SIZE 100 +#define TX_LATENCY_BIN_DELIMTER_C ',' +#define TX_LATENCY_BIN_DELIMTER_S "," +#define TX_LATENCY_BINS_DISABLED "enable(bins disabled)\n" +#define TX_LATENCY_DISABLED "disable\n" + + +/* + * Display if Tx latency statistics & bins are enabled/disabled + */ +static ssize_t sta_tx_latency_stat_read(struct file *file, + char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + char *buf; + int bufsz, i, ret; + int pos = 0; + + rcu_read_lock(); + + tx_latency = rcu_dereference(local->tx_latency); + + if (tx_latency && tx_latency->n_ranges) { + bufsz = tx_latency->n_ranges * 15; + buf = kzalloc(bufsz, GFP_ATOMIC); + if (!buf) + goto err; + + for (i = 0; i < tx_latency->n_ranges; i++) + pos += scnprintf(buf + pos, bufsz - pos, "%d,", + tx_latency->ranges[i]); + pos += scnprintf(buf + pos, bufsz - pos, "\n"); + } else if (tx_latency) { + bufsz = sizeof(TX_LATENCY_BINS_DISABLED) + 1; + buf = kzalloc(bufsz, GFP_ATOMIC); + if (!buf) + goto err; + + pos += scnprintf(buf + pos, bufsz - pos, "%s\n", + TX_LATENCY_BINS_DISABLED); + } else { + bufsz = sizeof(TX_LATENCY_DISABLED) + 1; + buf = kzalloc(bufsz, GFP_ATOMIC); + if (!buf) + goto err; + + pos += scnprintf(buf + pos, bufsz - pos, "%s\n", + TX_LATENCY_DISABLED); + } + + rcu_read_unlock(); + + ret = simple_read_from_buffer(userbuf, count, ppos, buf, pos); + kfree(buf); + + return ret; +err: + rcu_read_unlock(); + return -ENOMEM; +} + +/* + * Receive input from user regarding Tx latency statistics + * The input should indicate if Tx latency statistics and bins are + * enabled/disabled. + * If bins are enabled input should indicate the amount of different bins and + * their ranges. Each bin will count how many Tx frames transmitted within the + * appropriate latency. + * Legal input is: + * a) "enable(bins disabled)" - to enable only general statistics + * b) "a,b,c,d,...z" - to enable general statistics and bins, where all are + * numbers and a < b < c < d.. < z + * c) "disable" - disable all statistics + * NOTE: must configure Tx latency statistics bins before stations connected. + */ + +static ssize_t sta_tx_latency_stat_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[128] = {}; + char *bins = buf; + char *token; + int buf_size, i, alloc_size; + int prev_bin = 0; + int n_ranges = 0; + int ret = count; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + + if (sizeof(buf) <= count) + return -EINVAL; + buf_size = count; + if (copy_from_user(buf, userbuf, buf_size)) + return -EFAULT; + + mutex_lock(&local->sta_mtx); + + /* cannot change config once we have stations */ + if (local->num_sta) + goto unlock; + + tx_latency = + rcu_dereference_protected(local->tx_latency, + lockdep_is_held(&local->sta_mtx)); + + /* disable Tx statistics */ + if (!strcmp(buf, TX_LATENCY_DISABLED)) { + if (!tx_latency) + goto unlock; + rcu_assign_pointer(local->tx_latency, NULL); + synchronize_rcu(); + kfree(tx_latency); + goto unlock; + } + + /* Tx latency already enabled */ + if (tx_latency) + goto unlock; + + if (strcmp(TX_LATENCY_BINS_DISABLED, buf)) { + /* check how many bins and between what ranges user requested */ + token = buf; + while (*token != '\0') { + if (*token == TX_LATENCY_BIN_DELIMTER_C) + n_ranges++; + token++; + } + n_ranges++; + } + + alloc_size = sizeof(struct ieee80211_tx_latency_bin_ranges) + + n_ranges * sizeof(u32); + tx_latency = kzalloc(alloc_size, GFP_ATOMIC); + if (!tx_latency) { + ret = -ENOMEM; + goto unlock; + } + tx_latency->n_ranges = n_ranges; + for (i = 0; i < n_ranges; i++) { /* setting bin ranges */ + token = strsep(&bins, TX_LATENCY_BIN_DELIMTER_S); + sscanf(token, "%d", &tx_latency->ranges[i]); + /* bins values should be in ascending order */ + if (prev_bin >= tx_latency->ranges[i]) { + ret = -EINVAL; + kfree(tx_latency); + goto unlock; + } + prev_bin = tx_latency->ranges[i]; + } + rcu_assign_pointer(local->tx_latency, tx_latency); + +unlock: + mutex_unlock(&local->sta_mtx); + + return ret; +} + +static const struct file_operations stats_tx_latency_ops = { + .write = sta_tx_latency_stat_write, + .read = sta_tx_latency_stat_read, + .open = simple_open, + .llseek = generic_file_llseek, +}; + int mac80211_format_buffer(char __user *userbuf, size_t count, loff_t *ppos, char *fmt, ...) { @@ -315,4 +481,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount); DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount); + + DEBUGFS_DEVSTATS_ADD(tx_latency); } diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 19c54a4..80194b5 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -38,6 +38,13 @@ static const struct file_operations sta_ ##name## _ops = { \ .llseek = generic_file_llseek, \ } +#define STA_OPS_W(name) \ +static const struct file_operations sta_ ##name## _ops = { \ + .write = sta_##name##_write, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +} + #define STA_OPS_RW(name) \ static const struct file_operations sta_ ##name## _ops = { \ .read = sta_##name##_read, \ @@ -388,6 +395,131 @@ static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf, } STA_OPS(last_rx_rate); +static int +sta_tx_latency_stat_header(struct ieee80211_tx_latency_bin_ranges *tx_latency, + char *buf, int pos, int bufsz) +{ + int i; + int range_count = tx_latency->n_ranges; + u32 *bin_ranges = tx_latency->ranges; + + pos += scnprintf(buf + pos, bufsz - pos, + "Station\t\t\tTID\tMax\tAvg"); + if (range_count) { + pos += scnprintf(buf + pos, bufsz - pos, + "\t<=%d", bin_ranges[0]); + for (i = 0; i < range_count - 1; i++) + pos += scnprintf(buf + pos, bufsz - pos, "\t%d-%d", + bin_ranges[i], bin_ranges[i+1]); + pos += scnprintf(buf + pos, bufsz - pos, + "\t%d<", bin_ranges[range_count - 1]); + } + + pos += scnprintf(buf + pos, bufsz - pos, "\n"); + + return pos; +} + +static int +sta_tx_latency_stat_table(struct ieee80211_tx_latency_bin_ranges *tx_lat_range, + struct ieee80211_tx_latency_stat *tx_lat, + char *buf, int pos, int bufsz, int tid) +{ + u32 avg = 0; + int j; + int bin_count = tx_lat->bin_count; + + pos += scnprintf(buf + pos, bufsz - pos, "\t\t\t%d", tid); + /* make sure you don't divide in 0 */ + if (tx_lat->counter) + avg = tx_lat->sum / tx_lat->counter; + + pos += scnprintf(buf + pos, bufsz - pos, "\t%d\t%d", + tx_lat->max, avg); + + if (tx_lat_range->n_ranges && tx_lat->bins) + for (j = 0; j < bin_count; j++) + pos += scnprintf(buf + pos, bufsz - pos, + "\t%d", tx_lat->bins[j]); + pos += scnprintf(buf + pos, bufsz - pos, "\n"); + + return pos; +} + +/* + * Output Tx latency statistics station && restart all statistics information + */ +static ssize_t sta_tx_latency_stat_read(struct file *file, + char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->local; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + char *buf; + int bufsz, ret, i; + int pos = 0; + + bufsz = 20 * IEEE80211_NUM_TIDS * + sizeof(struct ieee80211_tx_latency_stat); + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rcu_read_lock(); + + tx_latency = rcu_dereference(local->tx_latency); + + if (!sta->tx_lat) { + pos += scnprintf(buf + pos, bufsz - pos, + "Tx latency statistics are not enabled\n"); + goto unlock; + } + + pos = sta_tx_latency_stat_header(tx_latency, buf, pos, bufsz); + + pos += scnprintf(buf + pos, bufsz - pos, "%pM\n", sta->sta.addr); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + pos = sta_tx_latency_stat_table(tx_latency, &sta->tx_lat[i], + buf, pos, bufsz, i); +unlock: + rcu_read_unlock(); + + ret = simple_read_from_buffer(userbuf, count, ppos, buf, pos); + kfree(buf); + + return ret; +} +STA_OPS(tx_latency_stat); + +static ssize_t sta_tx_latency_stat_reset_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + u32 *bins; + int bin_count; + struct sta_info *sta = file->private_data; + int i; + + if (!sta->tx_lat) + return -EINVAL; + + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + bins = sta->tx_lat[i].bins; + bin_count = sta->tx_lat[i].bin_count; + + sta->tx_lat[i].max = 0; + sta->tx_lat[i].sum = 0; + sta->tx_lat[i].counter = 0; + + if (bin_count) + memset(bins, 0, bin_count * sizeof(u32)); + } + + return count; +} +STA_OPS_W(tx_latency_stat_reset); + #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ sta->debugfs.dir, sta, &sta_ ##name## _ops); @@ -441,6 +573,8 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(last_ack_signal); DEBUGFS_ADD(current_tx_rate); DEBUGFS_ADD(last_rx_rate); + DEBUGFS_ADD(tx_latency_stat); + DEBUGFS_ADD(tx_latency_stat_reset); DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5d03c47..ef8b385 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -242,22 +242,6 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, return ret; } -static inline void drv_set_multicast_list(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct netdev_hw_addr_list *mc_list) -{ - bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; - - trace_drv_set_multicast_list(local, sdata, mc_list->count); - - check_sdata_in_driver(sdata); - - if (local->ops->set_multicast_list) - local->ops->set_multicast_list(&local->hw, &sdata->vif, - allmulti, mc_list); - trace_drv_return_void(local); -} - static inline void drv_configure_filter(struct ieee80211_local *local, unsigned int changed_flags, unsigned int *total_flags, @@ -550,6 +534,22 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, } #endif +static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + might_sleep(); + + sdata = get_bss_sdata(sdata); + check_sdata_in_driver(sdata); + + trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta); + if (local->ops->sta_pre_rcu_remove) + local->ops->sta_pre_rcu_remove(&local->hw, &sdata->vif, + &sta->sta); + trace_drv_return_void(local); +} + static inline __must_check int drv_sta_state(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 531be04..d6ba8414 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -522,7 +522,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, if (csa_settings) ieee80211_send_action_csa(sdata, csa_settings); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + return BSS_CHANGED_BEACON; out: return ret; } @@ -534,7 +534,8 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) int err; u16 capability; - sdata_lock(sdata); + sdata_assert_lock(sdata); + /* update cfg80211 bss information with the new channel */ if (!is_zero_ether_addr(ifibss->bssid)) { capability = WLAN_CAPABILITY_IBSS; @@ -550,19 +551,21 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) capability); /* XXX: should not really modify cfg80211 data */ if (cbss) { - cbss->channel = sdata->local->csa_chandef.chan; + cbss->channel = sdata->csa_chandef.chan; cfg80211_put_bss(sdata->local->hw.wiphy, cbss); } } - ifibss->chandef = sdata->local->csa_chandef; + ifibss->chandef = sdata->csa_chandef; /* generate the beacon */ err = ieee80211_ibss_csa_beacon(sdata, NULL); - sdata_unlock(sdata); if (err < 0) return err; + if (err) + ieee80211_bss_info_change_notify(sdata, err); + return 0; } @@ -753,12 +756,16 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.ibss.csa_connection_drop_work); + sdata_lock(sdata); + ieee80211_ibss_disconnect(sdata); synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); /* trigger a scan to find another IBSS network to join */ ieee80211_queue_work(&sdata->local->hw, &sdata->work); + + sdata_unlock(sdata); } static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) @@ -784,18 +791,10 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_chanctx *chanctx; enum nl80211_channel_type ch_type; - int err, num_chanctx; + int err; u32 sta_flags; - if (sdata->vif.csa_active) - return true; - - if (!sdata->vif.bss_conf.ibss_joined) - return false; - sta_flags = IEEE80211_STA_DISABLE_VHT; switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_5: @@ -823,12 +822,13 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (err) return false; + /* channel switch is not supported, disconnect */ + if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) + goto disconnect; + params.count = csa_ie.count; params.chandef = csa_ie.chandef; - if (ifibss->chandef.chan->band != params.chandef.chan->band) - goto disconnect; - switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: @@ -884,28 +884,12 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.radar_required = true; } - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) { - rcu_read_unlock(); - goto disconnect; - } - - /* don't handle for multi-VIF cases */ - chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { - rcu_read_unlock(); - goto disconnect; - } - num_chanctx = 0; - list_for_each_entry_rcu(chanctx, &sdata->local->chanctx_list, list) - num_chanctx++; - - if (num_chanctx > 1) { - rcu_read_unlock(); - goto disconnect; + if (cfg80211_chandef_identical(¶ms.chandef, + &sdata->vif.bss_conf.chandef)) { + ibss_dbg(sdata, + "received csa with an identical chandef, ignoring\n"); + return true; } - rcu_read_unlock(); /* all checks done, now perform the channel switch. */ ibss_dbg(sdata, @@ -914,19 +898,9 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.block_tx = !!csa_ie.mode; - ieee80211_ibss_csa_beacon(sdata, ¶ms); - sdata->csa_radar_required = params.radar_required; - - if (params.block_tx) - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); - - sdata->local->csa_chandef = params.chandef; - sdata->vif.csa_active = true; - - ieee80211_bss_info_change_notify(sdata, err); - drv_channel_switch_beacon(sdata, ¶ms.chandef); + if (ieee80211_channel_switch(sdata->local->hw.wiphy, sdata->dev, + ¶ms)) + goto disconnect; ieee80211_ibss_csa_mark_radar(sdata); @@ -962,7 +936,8 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, if (len < required_len) return; - ieee80211_ibss_process_chanswitch(sdata, elems, false); + if (!sdata->vif.csa_active) + ieee80211_ibss_process_chanswitch(sdata, elems, false); } static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, @@ -1143,7 +1118,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, goto put_bss; /* process channel switch */ - if (ieee80211_ibss_process_chanswitch(sdata, elems, true)) + if (sdata->vif.csa_active || + ieee80211_ibss_process_chanswitch(sdata, elems, true)) goto put_bss; /* same BSSID */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 29dc505..fb5dbcb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -232,6 +232,7 @@ struct ieee80211_rx_data { struct beacon_data { u8 *head, *tail; int head_len, tail_len; + struct ieee80211_meshconf_ie *meshconf; struct rcu_head rcu_head; }; @@ -540,7 +541,10 @@ struct ieee80211_mesh_sync_ops { struct ieee80211_mgmt *mgmt, struct ieee802_11_elems *elems, struct ieee80211_rx_status *rx_status); - void (*adjust_tbtt)(struct ieee80211_sub_if_data *sdata); + + /* should be called with beacon_data under RCU read lock */ + void (*adjust_tbtt)(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon); /* add other framework functions here */ }; @@ -614,6 +618,9 @@ struct ieee80211_if_mesh { bool chsw_init; u8 chsw_ttl; u16 pre_value; + + /* offset from skb->data while building IE */ + int meshconf_offset; }; #ifdef CONFIG_MAC80211_MESH @@ -728,6 +735,7 @@ struct ieee80211_sub_if_data { u16 sequence_number; __be16 control_port_protocol; bool control_port_no_encrypt; + int encrypt_headroom; struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; @@ -735,6 +743,7 @@ struct ieee80211_sub_if_data { int csa_counter_offset_beacon; int csa_counter_offset_presp; bool csa_radar_required; + struct cfg80211_chan_def csa_chandef; /* used to reconfigure hardware SM PS */ struct work_struct recalc_smps; @@ -774,10 +783,6 @@ struct ieee80211_sub_if_data { u32 mntr_flags; } u; - spinlock_t cleanup_stations_lock; - struct list_head cleanup_stations; - struct work_struct cleanup_stations_wk; - #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *subdir_stations; @@ -811,6 +816,9 @@ static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) __release(&sdata->wdev.mtx); } +#define sdata_dereference(p, sdata) \ + rcu_dereference_protected(p, lockdep_is_held(&sdata->wdev.mtx)) + static inline void sdata_assert_lock(struct ieee80211_sub_if_data *sdata) { @@ -896,6 +904,24 @@ struct tpt_led_trigger { }; #endif +/* + * struct ieee80211_tx_latency_bin_ranges - Tx latency statistics bins ranges + * + * Measuring Tx latency statistics. Counts how many Tx frames transmitted in a + * certain latency range (in Milliseconds). Each station that uses these + * ranges will have bins to count the amount of frames received in that range. + * The user can configure the ranges via debugfs. + * If ranges is NULL then Tx latency statistics bins are disabled for all + * stations. + * + * @n_ranges: number of ranges that are taken in account + * @ranges: the ranges that the user requested or NULL if disabled. + */ +struct ieee80211_tx_latency_bin_ranges { + int n_ranges; + u32 ranges[]; +}; + /** * mac80211 scan flags - currently active scan mode * @@ -1048,6 +1074,12 @@ struct ieee80211_local { struct timer_list sta_cleanup; int sta_generation; + /* + * Tx latency statistics parameters for all stations. + * Can enable via debugfs (NULL when disabled). + */ + struct ieee80211_tx_latency_bin_ranges __rcu *tx_latency; + struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; @@ -1088,12 +1120,12 @@ struct ieee80211_local { struct work_struct sched_scan_stopped_work; struct ieee80211_sub_if_data __rcu *sched_scan_sdata; + struct cfg80211_sched_scan_request *sched_scan_req; unsigned long leave_oper_channel_time; enum mac80211_scan_state next_scan_state; struct delayed_work scan_work; struct ieee80211_sub_if_data __rcu *scan_sdata; - struct cfg80211_chan_def csa_chandef; /* For backward compatibility only -- do not use */ struct cfg80211_chan_def _oper_chandef; @@ -1228,6 +1260,7 @@ struct ieee80211_csa_ie { u8 mode; u8 count; u8 ttl; + u16 pre_value; }; /* Parsed Information Elements */ @@ -1396,6 +1429,9 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); /* scheduled scan handling */ +int +__ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); @@ -1414,6 +1450,8 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* channel switch handling */ void ieee80211_csa_finalize_work(struct work_struct *work); +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params); /* interface handling */ int ieee80211_iface_init(void); @@ -1436,8 +1474,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local); bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); -int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { @@ -1692,6 +1728,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata); +void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata); size_t ieee80211_ie_split(const u8 *ies, size_t ielen, const u8 *ids, int n_ids, size_t offset); @@ -1730,7 +1767,6 @@ ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, /* NOTE: only use ieee80211_vif_change_channel() for channel switch */ int __must_check ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, - const struct cfg80211_chan_def *chandef, u32 *changed); void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); @@ -1741,6 +1777,8 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); +void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx); void ieee80211_dfs_cac_timer(unsigned long data); void ieee80211_dfs_cac_timer_work(struct work_struct *work); @@ -1749,6 +1787,15 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work); int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings); +bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs); +bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n); +const struct ieee80211_cipher_scheme * +ieee80211_cs_get(struct ieee80211_local *local, u32 cipher, + enum nl80211_iftype iftype); +int ieee80211_cs_headroom(struct ieee80211_local *local, + struct cfg80211_crypto_settings *crypto, + enum nl80211_iftype iftype); + #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline #else diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ff101ea..d624ed4 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -401,6 +401,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + ieee80211_set_default_queues(sdata); ret = drv_add_interface(local, sdata); @@ -749,6 +751,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, u32 hw_reconf_flags = 0; int i, flushed; struct ps_data *ps; + struct cfg80211_chan_def chandef; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -783,10 +786,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * This is relevant only in WDS mode, in all other modes we've * already removed all stations when disconnecting or similar, * so warn otherwise. - * - * We call sta_info_flush_cleanup() later, to combine RCU waits. */ - flushed = sta_info_flush_defer(sdata); + flushed = sta_info_flush(sdata); WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); @@ -823,11 +824,13 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { + chandef = sdata->vif.bss_conf.chandef; WARN_ON(local->suspended); mutex_lock(&local->iflist_mtx); ieee80211_vif_release_channel(sdata); mutex_unlock(&local->iflist_mtx); - cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED, + cfg80211_cac_event(sdata->dev, &chandef, + NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); } @@ -886,23 +889,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->work); /* * When we get here, the interface is marked down. + * Free the remaining keys, if there are any + * (shouldn't be, except maybe in WDS mode?) * - * sta_info_flush_cleanup() requires rcu_barrier() - * first to wait for the station call_rcu() calls - * to complete, and we also need synchronize_rcu() - * to wait for the RX path in case it is using the - * interface and enqueuing frames at this very time on + * Force the key freeing to always synchronize_net() + * to wait for the RX path in case it is using this + * interface enqueuing frames * at this very time on * another CPU. */ - synchronize_rcu(); - rcu_barrier(); - sta_info_flush_cleanup(sdata); - - /* - * Free all remaining keys, there shouldn't be any, - * except maybe in WDS mode? - */ - ieee80211_free_keys(sdata); + ieee80211_free_keys(sdata, true); /* fall through */ case NL80211_IFTYPE_AP: @@ -1013,17 +1008,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) atomic_dec(&local->iff_promiscs); sdata->flags ^= IEEE80211_SDATA_PROMISC; } - - /* - * TODO: If somebody needs this on AP interfaces, - * it can be enabled easily but multicast - * addresses from VLANs need to be synced. - */ - if (sdata->vif.type != NL80211_IFTYPE_MONITOR && - sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_AP) - drv_set_multicast_list(local, sdata, &dev->mc); - spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); @@ -1036,11 +1020,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - int flushed; int i; /* free extra data */ - ieee80211_free_keys(sdata); + ieee80211_free_keys(sdata, false); ieee80211_debugfs_remove_netdev(sdata); @@ -1050,9 +1033,6 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); - - flushed = sta_info_flush(sdata); - WARN_ON(flushed); } static void ieee80211_uninit(struct net_device *dev) @@ -1270,6 +1250,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); sdata->control_port_no_encrypt = false; + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; sdata->noack_map = 0; @@ -1325,7 +1306,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = NULL; break; case NL80211_IFTYPE_AP_VLAN: - break; case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; @@ -1496,8 +1476,8 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, bool used = false; list_for_each_entry(sdata, &local->interfaces, list) { - if (memcmp(local->hw.wiphy->addresses[i].addr, - sdata->vif.addr, ETH_ALEN) == 0) { + if (ether_addr_equal(local->hw.wiphy->addresses[i].addr, + sdata->vif.addr)) { used = true; break; } @@ -1557,8 +1537,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, val += inc; list_for_each_entry(sdata, &local->interfaces, list) { - if (memcmp(tmp_addr, sdata->vif.addr, - ETH_ALEN) == 0) { + if (ether_addr_equal(tmp_addr, sdata->vif.addr)) { used = true; break; } @@ -1578,15 +1557,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } -static void ieee80211_cleanup_sdata_stas_wk(struct work_struct *wk) -{ - struct ieee80211_sub_if_data *sdata; - - sdata = container_of(wk, struct ieee80211_sub_if_data, cleanup_stations_wk); - - ieee80211_cleanup_sdata_stas(sdata); -} - int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) @@ -1659,9 +1629,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_LIST_HEAD(&sdata->key_list); - spin_lock_init(&sdata->cleanup_stations_lock); - INIT_LIST_HEAD(&sdata->cleanup_stations); - INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, ieee80211_dfs_cac_timer_work); INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, @@ -1686,6 +1653,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; sdata->user_power_level = local->user_power_level; + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 3e51dd7..6ff65a1 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -260,25 +260,29 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, int idx; bool defunikey, defmultikey, defmgmtkey; + /* caller must provide at least one old/new */ + if (WARN_ON(!new && !old)) + return; + if (new) list_add_tail(&new->list, &sdata->key_list); - if (sta && pairwise) { - rcu_assign_pointer(sta->ptk, new); - } else if (sta) { - if (old) - idx = old->conf.keyidx; - else - idx = new->conf.keyidx; - rcu_assign_pointer(sta->gtk[idx], new); - } else { - WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); + WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); - if (old) - idx = old->conf.keyidx; - else - idx = new->conf.keyidx; + if (old) + idx = old->conf.keyidx; + else + idx = new->conf.keyidx; + if (sta) { + if (pairwise) { + rcu_assign_pointer(sta->ptk[idx], new); + sta->ptk_idx = idx; + } else { + rcu_assign_pointer(sta->gtk[idx], new); + sta->gtk_idx = idx; + } + } else { defunikey = old && old == key_mtx_dereference(sdata->local, sdata->default_unicast_key); @@ -312,9 +316,11 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, list_del(&old->list); } -struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, - const u8 *key_data, - size_t seq_len, const u8 *seq) +struct ieee80211_key * +ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, + const u8 *key_data, + size_t seq_len, const u8 *seq, + const struct ieee80211_cipher_scheme *cs) { struct ieee80211_key *key; int i, j, err; @@ -393,6 +399,18 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, return ERR_PTR(err); } break; + default: + if (cs) { + size_t len = (seq_len > MAX_PN_LEN) ? + MAX_PN_LEN : seq_len; + + key->conf.iv_len = cs->hdr_len; + key->conf.icv_len = cs->mic_len; + for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) + for (j = 0; j < len; j++) + key->u.gen.rx_pn[i][j] = + seq[len - j - 1]; + } } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); @@ -475,7 +493,7 @@ int ieee80211_key_link(struct ieee80211_key *key, mutex_lock(&sdata->local->key_mtx); if (sta && pairwise) - old_key = key_mtx_dereference(sdata->local, sta->ptk); + old_key = key_mtx_dereference(sdata->local, sta->ptk[idx]); else if (sta) old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]); else @@ -571,14 +589,10 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_iter_keys); -void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) +static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, + struct list_head *keys) { struct ieee80211_key *key, *tmp; - LIST_HEAD(keys); - - cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); - - mutex_lock(&sdata->local->key_mtx); sdata->crypto_tx_tailroom_needed_cnt -= sdata->crypto_tx_tailroom_pending_dec; @@ -590,28 +604,51 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add_tail(&key->list, &keys); + list_add_tail(&key->list, keys); } ieee80211_debugfs_key_update_default(sdata); +} - if (!list_empty(&keys)) { - synchronize_net(); - list_for_each_entry_safe(key, tmp, &keys, list) - __ieee80211_key_destroy(key, false); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, + bool force_synchronize) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *vlan; + struct ieee80211_key *key, *tmp; + LIST_HEAD(keys); + + cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); + + mutex_lock(&local->key_mtx); + + ieee80211_free_keys_iface(sdata, &keys); + + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + ieee80211_free_keys_iface(vlan, &keys); } + if (!list_empty(&keys) || force_synchronize) + synchronize_net(); + list_for_each_entry_safe(key, tmp, &keys, list) + __ieee80211_key_destroy(key, false); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || sdata->crypto_tx_tailroom_pending_dec); + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || + vlan->crypto_tx_tailroom_pending_dec); + } - mutex_unlock(&sdata->local->key_mtx); + mutex_unlock(&local->key_mtx); } void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta) { - struct ieee80211_key *key, *tmp; - LIST_HEAD(keys); + struct ieee80211_key *key; int i; mutex_lock(&local->key_mtx); @@ -622,25 +659,18 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add(&key->list, &keys); + __ieee80211_key_destroy(key, true); } - key = key_mtx_dereference(local, sta->ptk); - if (key) { + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + key = key_mtx_dereference(local, sta->ptk[i]); + if (!key) + continue; ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add(&key->list, &keys); - } - - /* - * NB: the station code relies on this being - * done even if there aren't any keys - */ - synchronize_net(); - - list_for_each_entry_safe(key, tmp, &keys, list) __ieee80211_key_destroy(key, true); + } mutex_unlock(&local->key_mtx); } @@ -877,7 +907,7 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx, keyconf->keylen, keyconf->key, - 0, NULL); + 0, NULL, NULL); if (IS_ERR(key)) return ERR_CAST(key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index aaae0ed..19db686 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -18,6 +18,7 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 +#define MAX_PN_LEN 16 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -93,6 +94,10 @@ struct ieee80211_key { u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ } aes_cmac; + struct { + /* generic cipher scheme */ + u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; + } gen; } u; /* number of times this key has been used */ @@ -113,9 +118,11 @@ struct ieee80211_key { struct ieee80211_key_conf conf; }; -struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, - const u8 *key_data, - size_t seq_len, const u8 *seq); +struct ieee80211_key * +ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, + const u8 *key_data, + size_t seq_len, const u8 *seq, + const struct ieee80211_cipher_scheme *cs); /* * Insert a key into data structures (sdata, sta if necessary) * to make it used, free old key. On failure, also free the new key. @@ -129,7 +136,8 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, bool uni, bool multi); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); -void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, + bool force_synchronize); void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 21d5d44..2bd5b55 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -250,12 +250,8 @@ static void ieee80211_restart_work(struct work_struct *work) /* wait for scan work complete */ flush_workqueue(local->workqueue); - mutex_lock(&local->mtx); - WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) || - rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)), - "%s called with hardware scan in progress\n", __func__); - mutex_unlock(&local->mtx); + WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), + "%s called with hardware scan in progress\n", __func__); rtnl_lock(); ieee80211_scan_cancel(local); @@ -651,15 +647,14 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, } EXPORT_SYMBOL(ieee80211_alloc_hw); -int ieee80211_register_hw(struct ieee80211_hw *hw) +static int ieee80211_init_cipher_suites(struct ieee80211_local *local) { - struct ieee80211_local *local = hw_to_local(hw); - int result, i; - enum ieee80211_band band; - int channels, max_bitrates; - bool supp_ht, supp_vht; - netdev_features_t feature_whitelist; - struct cfg80211_chan_def dflt_chandef = {}; + bool have_wep = !(IS_ERR(local->wep_tx_tfm) || + IS_ERR(local->wep_rx_tfm)); + bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE; + const struct ieee80211_cipher_scheme *cs = local->hw.cipher_schemes; + int n_suites = 0, r = 0, w = 0; + u32 *suites; static const u32 cipher_suites[] = { /* keep WEP first, it may be removed below */ WLAN_CIPHER_SUITE_WEP40, @@ -671,6 +666,93 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) WLAN_CIPHER_SUITE_AES_CMAC }; + /* Driver specifies the ciphers, we have nothing to do... */ + if (local->hw.wiphy->cipher_suites && have_wep) + return 0; + + /* Set up cipher suites if driver relies on mac80211 cipher defs */ + if (!local->hw.wiphy->cipher_suites && !cs) { + local->hw.wiphy->cipher_suites = cipher_suites; + local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); + + if (!have_mfp) + local->hw.wiphy->n_cipher_suites--; + + if (!have_wep) { + local->hw.wiphy->cipher_suites += 2; + local->hw.wiphy->n_cipher_suites -= 2; + } + + return 0; + } + + if (!local->hw.wiphy->cipher_suites) { + /* + * Driver specifies cipher schemes only + * We start counting ciphers defined by schemes, TKIP and CCMP + */ + n_suites = local->hw.n_cipher_schemes + 2; + + /* check if we have WEP40 and WEP104 */ + if (have_wep) + n_suites += 2; + + /* check if we have AES_CMAC */ + if (have_mfp) + n_suites++; + + suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL); + if (!suites) + return -ENOMEM; + + suites[w++] = WLAN_CIPHER_SUITE_CCMP; + suites[w++] = WLAN_CIPHER_SUITE_TKIP; + + if (have_wep) { + suites[w++] = WLAN_CIPHER_SUITE_WEP40; + suites[w++] = WLAN_CIPHER_SUITE_WEP104; + } + + if (have_mfp) + suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC; + + for (r = 0; r < local->hw.n_cipher_schemes; r++) + suites[w++] = cs[r].cipher; + } else { + /* Driver provides cipher suites, but we need to exclude WEP */ + suites = kmemdup(local->hw.wiphy->cipher_suites, + sizeof(u32) * local->hw.wiphy->n_cipher_suites, + GFP_KERNEL); + if (!suites) + return -ENOMEM; + + for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { + u32 suite = local->hw.wiphy->cipher_suites[r]; + + if (suite == WLAN_CIPHER_SUITE_WEP40 || + suite == WLAN_CIPHER_SUITE_WEP104) + continue; + suites[w++] = suite; + } + } + + local->hw.wiphy->cipher_suites = suites; + local->hw.wiphy->n_cipher_suites = w; + local->wiphy_ciphers_allocated = true; + + return 0; +} + +int ieee80211_register_hw(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + int result, i; + enum ieee80211_band band; + int channels, max_bitrates; + bool supp_ht, supp_vht; + netdev_features_t feature_whitelist; + struct cfg80211_chan_def dflt_chandef = {}; + if (hw->flags & IEEE80211_HW_QUEUE_CONTROL && (local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE || local->hw.offchannel_tx_hw_queue >= local->hw.queues)) @@ -851,43 +933,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->max_scan_ie_len) local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; - /* Set up cipher suites unless driver already did */ - if (!local->hw.wiphy->cipher_suites) { - local->hw.wiphy->cipher_suites = cipher_suites; - local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); - if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) - local->hw.wiphy->n_cipher_suites--; - } - if (IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)) { - if (local->hw.wiphy->cipher_suites == cipher_suites) { - local->hw.wiphy->cipher_suites += 2; - local->hw.wiphy->n_cipher_suites -= 2; - } else { - u32 *suites; - int r, w = 0; - - /* Filter out WEP */ - - suites = kmemdup( - local->hw.wiphy->cipher_suites, - sizeof(u32) * local->hw.wiphy->n_cipher_suites, - GFP_KERNEL); - if (!suites) { - result = -ENOMEM; - goto fail_wiphy_register; - } - for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { - u32 suite = local->hw.wiphy->cipher_suites[r]; - if (suite == WLAN_CIPHER_SUITE_WEP40 || - suite == WLAN_CIPHER_SUITE_WEP104) - continue; - suites[w++] = suite; - } - local->hw.wiphy->cipher_suites = suites; - local->hw.wiphy->n_cipher_suites = w; - local->wiphy_ciphers_allocated = true; - } - } + WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes, + local->hw.n_cipher_schemes)); + + result = ieee80211_init_cipher_suites(local); + if (result < 0) + goto fail_wiphy_register; if (!local->ops->remain_on_channel) local->hw.wiphy->max_remain_on_channel_duration = 5000; @@ -940,6 +991,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", result); + local->hw.conf.flags = IEEE80211_CONF_IDLE; + ieee80211_led_init(local); rtnl_lock(); @@ -1047,6 +1100,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); + flush_work(&local->sched_scan_stopped_work); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); @@ -1087,6 +1141,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) ieee80211_free_ack_frame, NULL); idr_destroy(&local->ack_status_frames); + kfree(rcu_access_pointer(local->tx_latency)); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 896fe3b..5a74b24 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -259,6 +259,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_MESH_CONFIG; *pos++ = meshconf_len; + /* save a pointer for quick updates in pre-tbtt */ + ifmsh->meshconf_offset = pos - skb->data; + /* Active path selection protocol ID */ *pos++ = ifmsh->mesh_pp_id; /* Active path selection metric ID */ @@ -674,8 +677,6 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) rcu_read_lock(); csa = rcu_dereference(ifmsh->csa); if (csa) { - __le16 pre_value; - pos = skb_put(skb, 13); memset(pos, 0, 13); *pos++ = WLAN_EID_CHANNEL_SWITCH; @@ -697,8 +698,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); pos += 2; - pre_value = cpu_to_le16(ifmsh->pre_value); - memcpy(pos, &pre_value, 2); + put_unaligned_le16(ifmsh->pre_value, pos); pos += 2; } rcu_read_unlock(); @@ -726,6 +726,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) bcn->tail_len = skb->len; memcpy(bcn->tail, skb->data, bcn->tail_len); + bcn->meshconf = (struct ieee80211_meshconf_ie *) + (bcn->tail + ifmsh->meshconf_offset); dev_kfree_skb(skb); rcu_assign_pointer(ifmsh->beacon, bcn); @@ -943,14 +945,19 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, params.chandef.chan->center_freq); params.block_tx = csa_ie.mode & WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT; - if (beacon) + if (beacon) { ifmsh->chsw_ttl = csa_ie.ttl - 1; - else - ifmsh->chsw_ttl = 0; + if (ifmsh->pre_value >= csa_ie.pre_value) + return false; + ifmsh->pre_value = csa_ie.pre_value; + } - if (ifmsh->chsw_ttl > 0) + if (ifmsh->chsw_ttl < ifmsh->mshcfg.dot11MeshTTL) { if (ieee80211_mesh_csa_beacon(sdata, ¶ms, false) < 0) return false; + } else { + return false; + } sdata->csa_radar_required = params.radar_required; @@ -959,7 +966,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->local->csa_chandef = params.chandef; + sdata->csa_chandef = params.chandef; sdata->vif.csa_active = true; ieee80211_bss_info_change_notify(sdata, err); @@ -1163,7 +1170,6 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, offset_ttl = (len < 42) ? 7 : 10; *(pos + offset_ttl) -= 1; *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; - sdata->u.mesh.chsw_ttl = *(pos + offset_ttl); memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); @@ -1182,7 +1188,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u16 pre_value; bool fwd_csa = true; size_t baselen; - u8 *pos, ttl; + u8 *pos; if (mgmt->u.action.u.measurement.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) @@ -1193,8 +1199,8 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u.action.u.chan_switch.variable); ieee802_11_parse_elems(pos, len - baselen, false, &elems); - ttl = elems.mesh_chansw_params_ie->mesh_ttl; - if (!--ttl) + ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; + if (!--ifmsh->chsw_ttl) fwd_csa = false; pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 2bc7fd2f..f39a19f 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -215,8 +215,6 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *ie); void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); -void mesh_mgmt_ies_add(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb); int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, @@ -303,8 +301,8 @@ void mesh_mpath_table_grow(void); void mesh_mpp_table_grow(void); /* Mesh paths */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, - u8 ttl, const u8 *target, __le32 target_sn, - __le16 target_rcode, const u8 *ra); + u8 ttl, const u8 *target, u32 target_sn, + u16 target_rcode, const u8 *ra); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 486819c..f951468 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -102,12 +102,11 @@ enum mpath_frame_type { static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - const u8 *orig_addr, __le32 orig_sn, + const u8 *orig_addr, u32 orig_sn, u8 target_flags, const u8 *target, - __le32 target_sn, const u8 *da, + u32 target_sn, const u8 *da, u8 hop_count, u8 ttl, - __le32 lifetime, __le32 metric, - __le32 preq_id, + u32 lifetime, u32 metric, u32 preq_id, struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -167,33 +166,33 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, if (action == MPATH_PREP) { memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &target_sn, 4); + put_unaligned_le32(target_sn, pos); pos += 4; } else { if (action == MPATH_PREQ) { - memcpy(pos, &preq_id, 4); + put_unaligned_le32(preq_id, pos); pos += 4; } memcpy(pos, orig_addr, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &orig_sn, 4); + put_unaligned_le32(orig_sn, pos); pos += 4; } - memcpy(pos, &lifetime, 4); /* interval for RANN */ + put_unaligned_le32(lifetime, pos); /* interval for RANN */ pos += 4; - memcpy(pos, &metric, 4); + put_unaligned_le32(metric, pos); pos += 4; if (action == MPATH_PREQ) { *pos++ = 1; /* destination count */ *pos++ = target_flags; memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &target_sn, 4); + put_unaligned_le32(target_sn, pos); pos += 4; } else if (action == MPATH_PREP) { memcpy(pos, orig_addr, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &orig_sn, 4); + put_unaligned_le32(orig_sn, pos); pos += 4; } @@ -239,8 +238,8 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, * frame directly but add it to the pending queue instead. */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, - u8 ttl, const u8 *target, __le32 target_sn, - __le16 target_rcode, const u8 *ra) + u8 ttl, const u8 *target, u32 target_sn, + u16 target_rcode, const u8 *ra) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -254,13 +253,13 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, return -EAGAIN; skb = dev_alloc_skb(local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM + + sdata->encrypt_headroom + IEEE80211_ENCRYPT_TAILROOM + hdr_len + 2 + 15 /* PERR IE */); if (!skb) return -1; - skb_reserve(skb, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); + skb_reserve(skb, local->tx_headroom + sdata->encrypt_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -293,9 +292,9 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, pos++; memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &target_sn, 4); + put_unaligned_le32(target_sn, pos); pos += 4; - memcpy(pos, &target_rcode, 2); + put_unaligned_le16(target_rcode, pos); /* see note in function header */ prepare_frame_for_deferred_tx(sdata, skb); @@ -592,10 +591,9 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (ttl != 0) { mhwmp_dbg(sdata, "replying to the PREQ\n"); mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr, - cpu_to_le32(orig_sn), 0, target_addr, - cpu_to_le32(target_sn), mgmt->sa, 0, ttl, - cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, sdata); + orig_sn, 0, target_addr, + target_sn, mgmt->sa, 0, ttl, + lifetime, metric, 0, sdata); } else { ifmsh->mshstats.dropped_frames_ttl++; } @@ -625,11 +623,9 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, } mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr, - cpu_to_le32(orig_sn), target_flags, target_addr, - cpu_to_le32(target_sn), da, - hopcount, ttl, cpu_to_le32(lifetime), - cpu_to_le32(metric), cpu_to_le32(preq_id), - sdata); + orig_sn, target_flags, target_addr, + target_sn, da, hopcount, ttl, lifetime, + metric, preq_id, sdata); if (!is_multicast_ether_addr(da)) ifmsh->mshstats.fwded_unicast++; else @@ -695,11 +691,9 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, target_sn = PREP_IE_TARGET_SN(prep_elem); orig_sn = PREP_IE_ORIG_SN(prep_elem); - mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, - cpu_to_le32(orig_sn), 0, target_addr, - cpu_to_le32(target_sn), next_hop, hopcount, - ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, sdata); + mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, orig_sn, 0, + target_addr, target_sn, next_hop, hopcount, + ttl, lifetime, metric, 0, sdata); rcu_read_unlock(); sdata->u.mesh.mshstats.fwded_unicast++; @@ -750,8 +744,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, if (!ifmsh->mshcfg.dot11MeshForwarding) goto endperr; mesh_path_error_tx(sdata, ttl, target_addr, - cpu_to_le32(target_sn), - cpu_to_le16(target_rcode), + target_sn, target_rcode, broadcast_addr); } else spin_unlock_bh(&mpath->state_lock); @@ -847,11 +840,9 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, if (ifmsh->mshcfg.dot11MeshForwarding) { mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, - cpu_to_le32(orig_sn), - 0, NULL, 0, broadcast_addr, - hopcount, ttl, cpu_to_le32(interval), - cpu_to_le32(metric + metric_txsta), - 0, sdata); + orig_sn, 0, NULL, 0, broadcast_addr, + hopcount, ttl, interval, + metric + metric_txsta, 0, sdata); } rcu_read_unlock(); @@ -1049,11 +1040,9 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) spin_unlock_bh(&mpath->state_lock); da = (mpath->is_root) ? mpath->rann_snd_addr : broadcast_addr; - mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->vif.addr, - cpu_to_le32(ifmsh->sn), target_flags, mpath->dst, - cpu_to_le32(mpath->sn), da, 0, - ttl, cpu_to_le32(lifetime), 0, - cpu_to_le32(ifmsh->preq_id++), sdata); + mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->vif.addr, ifmsh->sn, + target_flags, mpath->dst, mpath->sn, da, 0, + ttl, lifetime, 0, ifmsh->preq_id++, sdata); mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); enddiscovery: @@ -1212,10 +1201,9 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) switch (ifmsh->mshcfg.dot11MeshHWMPRootMode) { case IEEE80211_PROACTIVE_RANN: mesh_path_sel_frame_tx(MPATH_RANN, flags, sdata->vif.addr, - cpu_to_le32(++ifmsh->sn), - 0, NULL, 0, broadcast_addr, - 0, ifmsh->mshcfg.element_ttl, - cpu_to_le32(interval), 0, 0, sdata); + ++ifmsh->sn, 0, NULL, 0, broadcast_addr, + 0, ifmsh->mshcfg.element_ttl, + interval, 0, 0, sdata); break; case IEEE80211_PROACTIVE_PREQ_WITH_PREP: flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG; @@ -1224,11 +1212,10 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) target_flags |= IEEE80211_PREQ_TO_FLAG | IEEE80211_PREQ_USN_FLAG; mesh_path_sel_frame_tx(MPATH_PREQ, flags, sdata->vif.addr, - cpu_to_le32(++ifmsh->sn), target_flags, - (u8 *) broadcast_addr, 0, broadcast_addr, - 0, ifmsh->mshcfg.element_ttl, - cpu_to_le32(interval), - 0, cpu_to_le32(ifmsh->preq_id++), sdata); + ++ifmsh->sn, target_flags, + (u8 *) broadcast_addr, 0, broadcast_addr, + 0, ifmsh->mshcfg.element_ttl, interval, + 0, ifmsh->preq_id++, sdata); break; default: mhwmp_dbg(sdata, "Proactive mechanism not supported\n"); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 89aacfd..7d050ed 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -722,7 +722,6 @@ void mesh_plink_broken(struct sta_info *sta) struct mpath_node *node; struct ieee80211_sub_if_data *sdata = sta->sdata; int i; - __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE); rcu_read_lock(); tbl = rcu_dereference(mesh_paths); @@ -736,9 +735,9 @@ void mesh_plink_broken(struct sta_info *sta) ++mpath->sn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(sdata, - sdata->u.mesh.mshcfg.element_ttl, - mpath->dst, cpu_to_le32(mpath->sn), - reason, bcast); + sdata->u.mesh.mshcfg.element_ttl, + mpath->dst, mpath->sn, + WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } rcu_read_unlock(); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 4301aa5..cf83217 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -19,12 +19,6 @@ #define mod_plink_timer(s, t) (mod_timer(&s->plink_timer, \ jiffies + HZ * t / 1000)) -/* We only need a valid sta if user configured a minimum rssi_threshold. */ -#define rssi_threshold_check(sta, sdata) \ - (sdata->u.mesh.mshcfg.rssi_threshold == 0 ||\ - (sta && (s8) -ewma_read(&sta->avg_signal) > \ - sdata->u.mesh.mshcfg.rssi_threshold)) - enum plink_event { PLINK_UNDEFINED, OPN_ACPT, @@ -61,7 +55,17 @@ static const char * const mplevents[] = { static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason); + u8 *da, u16 llid, u16 plid, u16 reason); + + +/* We only need a valid sta if user configured a minimum rssi_threshold. */ +static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; + return rssi_threshold == 0 || + (sta && (s8) -ewma_read(&sta->avg_signal) > rssi_threshold); +} /** * mesh_plink_fsm_restart - restart a mesh peer link finite state machine @@ -242,7 +246,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) spin_lock_bh(&sta->lock); changed = __mesh_plink_deactivate(sta); - sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED); + sta->reason = WLAN_REASON_MESH_PEER_CANCELED; mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, sta->llid, sta->plid, sta->reason); @@ -253,7 +257,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason) + u8 *da, u16 llid, u16 plid, u16 reason) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -279,7 +283,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, 2 + 8 + /* peering IE */ sdata->u.mesh.ie_len); if (!skb) - return -1; + return err; info = IEEE80211_SKB_CB(skb); skb_reserve(skb, local->tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); @@ -301,7 +305,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action == WLAN_SP_MESH_PEERING_CONFIRM) { /* AID */ pos = skb_put(skb, 2); - memcpy(pos + 2, &plid, 2); + put_unaligned_le16(plid, pos + 2); } if (ieee80211_add_srates_ie(sdata, skb, true, band) || ieee80211_add_ext_srates_ie(sdata, skb, true, band) || @@ -343,14 +347,14 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, *pos++ = ie_len; memcpy(pos, &peering_proto, 2); pos += 2; - memcpy(pos, &llid, 2); + put_unaligned_le16(llid, pos); pos += 2; if (include_plid) { - memcpy(pos, &plid, 2); + put_unaligned_le16(plid, pos); pos += 2; } if (action == WLAN_SP_MESH_PEERING_CLOSE) { - memcpy(pos, &reason, 2); + put_unaligned_le16(reason, pos); pos += 2; } @@ -518,7 +522,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, sta->plink_state == NL80211_PLINK_LISTEN && sdata->u.mesh.accepting_plinks && sdata->u.mesh.mshcfg.auto_open_plinks && - rssi_threshold_check(sta, sdata)) + rssi_threshold_check(sdata, sta)) changed = mesh_plink_open(sta); ieee80211_mps_frame_release(sta, elems); @@ -530,9 +534,10 @@ out: static void mesh_plink_timer(unsigned long data) { struct sta_info *sta; - __le16 llid, plid, reason; + u16 reason = 0; struct ieee80211_sub_if_data *sdata; struct mesh_config *mshcfg; + enum ieee80211_self_protected_actioncode action = 0; /* * This STA is valid because sta_info_destroy() will @@ -553,9 +558,6 @@ static void mesh_plink_timer(unsigned long data) mpl_dbg(sta->sdata, "Mesh plink timer for %pM fired on state %s\n", sta->sta.addr, mplstates[sta->plink_state]); - reason = 0; - llid = sta->llid; - plid = sta->plid; sdata = sta->sdata; mshcfg = &sdata->u.mesh.mshcfg; @@ -574,33 +576,31 @@ static void mesh_plink_timer(unsigned long data) rand % sta->plink_timeout; ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); + action = WLAN_SP_MESH_PEERING_OPEN; break; } - reason = cpu_to_le16(WLAN_REASON_MESH_MAX_RETRIES); + reason = WLAN_REASON_MESH_MAX_RETRIES; /* fall through on else */ case NL80211_PLINK_CNF_RCVD: /* confirm timer */ if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIRM_TIMEOUT); + reason = WLAN_REASON_MESH_CONFIRM_TIMEOUT; sta->plink_state = NL80211_PLINK_HOLDING; mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case NL80211_PLINK_HOLDING: /* holding timer */ del_timer(&sta->plink_timer); mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->lock); break; } + spin_unlock_bh(&sta->lock); + if (action) + mesh_plink_frame_tx(sdata, action, sta->sta.addr, + sta->llid, sta->plid, reason); } static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) @@ -612,9 +612,40 @@ static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) add_timer(&sta->plink_timer); } +static bool llid_in_use(struct ieee80211_sub_if_data *sdata, + u16 llid) +{ + struct ieee80211_local *local = sdata->local; + bool in_use = false; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (!memcmp(&sta->llid, &llid, sizeof(llid))) { + in_use = true; + break; + } + } + rcu_read_unlock(); + + return in_use; +} + +static u16 mesh_get_new_llid(struct ieee80211_sub_if_data *sdata) +{ + u16 llid; + + do { + get_random_bytes(&llid, sizeof(llid)); + /* for mesh PS we still only have the AID range for TIM bits */ + llid = (llid % IEEE80211_MAX_AID) + 1; + } while (llid_in_use(sdata, llid)); + + return llid; +} + u32 mesh_plink_open(struct sta_info *sta) { - __le16 llid; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; @@ -622,8 +653,7 @@ u32 mesh_plink_open(struct sta_info *sta) return 0; spin_lock_bh(&sta->lock); - get_random_bytes(&llid, 2); - sta->llid = llid; + sta->llid = mesh_get_new_llid(sdata); if (sta->plink_state != NL80211_PLINK_LISTEN && sta->plink_state != NL80211_PLINK_BLOCKED) { spin_unlock_bh(&sta->lock); @@ -640,7 +670,7 @@ u32 mesh_plink_open(struct sta_info *sta) changed = ieee80211_mps_local_status_update(sdata); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); + sta->sta.addr, sta->llid, 0, 0); return changed; } @@ -656,390 +686,147 @@ u32 mesh_plink_block(struct sta_info *sta) return changed; } - -void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct ieee80211_rx_status *rx_status) +static void mesh_plink_close(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + enum plink_event event) { struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; - struct ieee802_11_elems elems; - struct sta_info *sta; - enum plink_event event; - enum ieee80211_self_protected_actioncode ftype; - size_t baselen; - bool matches_local = true; - u8 ie_len; - u8 *baseaddr; - u32 changed = 0; - __le16 plid, llid, reason; - - /* need action_code, aux */ - if (len < IEEE80211_MIN_ACTION_SIZE + 3) - return; - - if (sdata->u.mesh.user_mpm) - /* userspace must register for these */ - return; - - if (is_multicast_ether_addr(mgmt->da)) { - mpl_dbg(sdata, - "Mesh plink: ignore frame from multicast address\n"); - return; - } - - baseaddr = mgmt->u.action.u.self_prot.variable; - baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; - if (mgmt->u.action.u.self_prot.action_code == - WLAN_SP_MESH_PEERING_CONFIRM) { - baseaddr += 4; - baselen += 4; - } - ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems); - - if (!elems.peering) { - mpl_dbg(sdata, - "Mesh plink: missing necessary peer link ie\n"); - return; - } - if (elems.rsn_len && - sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { - mpl_dbg(sdata, - "Mesh plink: can't establish link with secure peer\n"); - return; - } + u16 reason = (event == CLS_ACPT) ? + WLAN_REASON_MESH_CLOSE : WLAN_REASON_MESH_CONFIG; - ftype = mgmt->u.action.u.self_prot.action_code; - ie_len = elems.peering_len; - if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || - (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || - (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 - && ie_len != 8)) { - mpl_dbg(sdata, - "Mesh plink: incorrect plink ie length %d %d\n", - ftype, ie_len); - return; - } - - if (ftype != WLAN_SP_MESH_PEERING_CLOSE && - (!elems.mesh_id || !elems.mesh_config)) { - mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); - return; - } - /* Note the lines below are correct, the llid in the frame is the plid - * from the point of view of this host. - */ - memcpy(&plid, PLINK_GET_LLID(elems.peering), 2); - if (ftype == WLAN_SP_MESH_PEERING_CONFIRM || - (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) - memcpy(&llid, PLINK_GET_PLID(elems.peering), 2); - - /* WARNING: Only for sta pointer, is dropped & re-acquired */ - rcu_read_lock(); - - sta = sta_info_get(sdata, mgmt->sa); - if (!sta && ftype != WLAN_SP_MESH_PEERING_OPEN) { - mpl_dbg(sdata, "Mesh plink: cls or cnf from unknown peer\n"); - rcu_read_unlock(); - return; - } - - if (ftype == WLAN_SP_MESH_PEERING_OPEN && - !rssi_threshold_check(sta, sdata)) { - mpl_dbg(sdata, "Mesh plink: %pM does not meet rssi threshold\n", - mgmt->sa); - rcu_read_unlock(); - return; - } - - if (sta && !test_sta_flag(sta, WLAN_STA_AUTH)) { - mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); - rcu_read_unlock(); - return; - } + sta->reason = reason; + sta->plink_state = NL80211_PLINK_HOLDING; + mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); +} - if (sta && sta->plink_state == NL80211_PLINK_BLOCKED) { - rcu_read_unlock(); - return; - } +static u32 mesh_plink_establish(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; + u32 changed = 0; - /* Now we will figure out the appropriate event... */ - event = PLINK_UNDEFINED; - if (ftype != WLAN_SP_MESH_PEERING_CLOSE && - !mesh_matches_local(sdata, &elems)) { - matches_local = false; - switch (ftype) { - case WLAN_SP_MESH_PEERING_OPEN: - event = OPN_RJCT; - break; - case WLAN_SP_MESH_PEERING_CONFIRM: - event = CNF_RJCT; - break; - default: - break; - } - } + del_timer(&sta->plink_timer); + sta->plink_state = NL80211_PLINK_ESTAB; + changed |= mesh_plink_inc_estab_count(sdata); + changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); + mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, mshcfg->power_mode); + return changed; +} - if (!sta && !matches_local) { - rcu_read_unlock(); - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); - llid = 0; - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - mgmt->sa, llid, plid, reason); - return; - } else if (!sta) { - /* ftype == WLAN_SP_MESH_PEERING_OPEN */ - if (!mesh_plink_free_count(sdata)) { - mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); - rcu_read_unlock(); - return; - } - event = OPN_ACPT; - } else if (matches_local) { - switch (ftype) { - case WLAN_SP_MESH_PEERING_OPEN: - if (!mesh_plink_free_count(sdata) || - (sta->plid && sta->plid != plid)) - event = OPN_IGNR; - else - event = OPN_ACPT; - break; - case WLAN_SP_MESH_PEERING_CONFIRM: - if (!mesh_plink_free_count(sdata) || - (sta->llid != llid || sta->plid != plid)) - event = CNF_IGNR; - else - event = CNF_ACPT; - break; - case WLAN_SP_MESH_PEERING_CLOSE: - if (sta->plink_state == NL80211_PLINK_ESTAB) - /* Do not check for llid or plid. This does not - * follow the standard but since multiple plinks - * per sta are not supported, it is necessary in - * order to avoid a livelock when MP A sees an - * establish peer link to MP B but MP B does not - * see it. This can be caused by a timeout in - * B's peer link establishment or B beign - * restarted. - */ - event = CLS_ACPT; - else if (sta->plid != plid) - event = CLS_IGNR; - else if (ie_len == 7 && sta->llid != llid) - event = CLS_IGNR; - else - event = CLS_ACPT; - break; - default: - mpl_dbg(sdata, "Mesh plink: unknown frame subtype\n"); - rcu_read_unlock(); - return; - } - } +/** + * mesh_plink_fsm - step @sta MPM based on @event + * + * @sdata: interface + * @sta: mesh neighbor + * @event: peering event + * + * Return: changed MBSS flags + */ +static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, enum plink_event event) +{ + struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; + enum ieee80211_self_protected_actioncode action = 0; + u32 changed = 0; - if (event == OPN_ACPT) { - rcu_read_unlock(); - /* allocate sta entry if necessary and update info */ - sta = mesh_sta_info_get(sdata, mgmt->sa, &elems); - if (!sta) { - mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); - rcu_read_unlock(); - return; - } - } + mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, + mplstates[sta->plink_state], mplevents[event]); - mpl_dbg(sdata, "peer %pM in state %s got event %s\n", mgmt->sa, - mplstates[sta->plink_state], mplevents[event]); - reason = 0; spin_lock_bh(&sta->lock); switch (sta->plink_state) { - /* spin_unlock as soon as state is updated at each case */ case NL80211_PLINK_LISTEN: switch (event) { case CLS_ACPT: mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->lock); break; case OPN_ACPT: sta->plink_state = NL80211_PLINK_OPN_RCVD; - sta->plid = plid; - get_random_bytes(&llid, 2); - sta->llid = llid; + sta->llid = mesh_get_new_llid(sdata); mesh_plink_timer_set(sta, mshcfg->dot11MeshRetryTimeout); /* set the non-peer mode to active during peering */ changed |= ieee80211_mps_local_status_update(sdata); - - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_OPEN; break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_OPN_SNT: switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: - if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; - sta->plink_state = NL80211_PLINK_HOLDING; - if (!mod_plink_timer(sta, - mshcfg->dot11MeshHoldingTimeout)) - sta->ignore_plink_timer = true; - - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: /* retry timer is left untouched */ sta->plink_state = NL80211_PLINK_OPN_RCVD; - sta->plid = plid; - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; case CNF_ACPT: sta->plink_state = NL80211_PLINK_CNF_RCVD; if (!mod_plink_timer(sta, mshcfg->dot11MeshConfirmTimeout)) sta->ignore_plink_timer = true; - - spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_OPN_RCVD: switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: - if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; - sta->plink_state = NL80211_PLINK_HOLDING; - if (!mod_plink_timer(sta, - mshcfg->dot11MeshHoldingTimeout)) - sta->ignore_plink_timer = true; - - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; case CNF_ACPT: - del_timer(&sta->plink_timer); - sta->plink_state = NL80211_PLINK_ESTAB; - spin_unlock_bh(&sta->lock); - changed |= mesh_plink_inc_estab_count(sdata); - changed |= mesh_set_ht_prot_mode(sdata); - changed |= mesh_set_short_slot_time(sdata); - mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", - sta->sta.addr); - ieee80211_mps_sta_status_update(sta); - changed |= ieee80211_mps_set_sta_local_pm(sta, - mshcfg->power_mode); + changed |= mesh_plink_establish(sdata, sta); break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_CNF_RCVD: switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: - if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; - sta->plink_state = NL80211_PLINK_HOLDING; - if (!mod_plink_timer(sta, - mshcfg->dot11MeshHoldingTimeout)) - sta->ignore_plink_timer = true; - - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: - del_timer(&sta->plink_timer); - sta->plink_state = NL80211_PLINK_ESTAB; - spin_unlock_bh(&sta->lock); - changed |= mesh_plink_inc_estab_count(sdata); - changed |= mesh_set_ht_prot_mode(sdata); - changed |= mesh_set_short_slot_time(sdata); - mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", - sta->sta.addr); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); - ieee80211_mps_sta_status_update(sta); - changed |= ieee80211_mps_set_sta_local_pm(sta, - mshcfg->power_mode); + changed |= mesh_plink_establish(sdata, sta); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_ESTAB: switch (event) { case CLS_ACPT: - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; changed |= __mesh_plink_deactivate(sta); - sta->plink_state = NL80211_PLINK_HOLDING; - llid = sta->llid; - mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); - spin_unlock_bh(&sta->lock); changed |= mesh_set_ht_prot_mode(sdata); changed |= mesh_set_short_slot_time(sdata); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; default: - spin_unlock_bh(&sta->lock); break; } break; @@ -1049,32 +836,271 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, if (del_timer(&sta->plink_timer)) sta->ignore_plink_timer = 1; mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->lock); break; case OPN_ACPT: case CNF_ACPT: case OPN_RJCT: case CNF_RJCT: - llid = sta->llid; - reason = sta->reason; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + action = WLAN_SP_MESH_PEERING_CLOSE; break; default: - spin_unlock_bh(&sta->lock); + break; } break; default: /* should not get here, PLINK_BLOCKED is dealt with at the * beginning of the function */ - spin_unlock_bh(&sta->lock); break; } + spin_unlock_bh(&sta->lock); + if (action) { + mesh_plink_frame_tx(sdata, action, sta->sta.addr, + sta->llid, sta->plid, sta->reason); + + /* also send confirm in open case */ + if (action == WLAN_SP_MESH_PEERING_OPEN) { + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, sta->llid, + sta->plid, 0); + } + } + + return changed; +} + +/* + * mesh_plink_get_event - get correct MPM event + * + * @sdata: interface + * @sta: peer, leave NULL if processing a frame from a new suitable peer + * @elems: peering management IEs + * @ftype: frame type + * @llid: peer's peer link ID + * @plid: peer's local link ID + * + * Return: new peering event for @sta, but PLINK_UNDEFINED should be treated as + * an error. + */ +static enum plink_event +mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee802_11_elems *elems, + enum ieee80211_self_protected_actioncode ftype, + u16 llid, u16 plid) +{ + enum plink_event event = PLINK_UNDEFINED; + u8 ie_len = elems->peering_len; + bool matches_local; + + matches_local = (ftype == WLAN_SP_MESH_PEERING_CLOSE || + mesh_matches_local(sdata, elems)); + + /* deny open request from non-matching peer */ + if (!matches_local && !sta) { + event = OPN_RJCT; + goto out; + } + + if (!sta) { + if (ftype != WLAN_SP_MESH_PEERING_OPEN) { + mpl_dbg(sdata, "Mesh plink: cls or cnf from unknown peer\n"); + goto out; + } + /* ftype == WLAN_SP_MESH_PEERING_OPEN */ + if (!mesh_plink_free_count(sdata)) { + mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); + goto out; + } + } else { + if (!test_sta_flag(sta, WLAN_STA_AUTH)) { + mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); + goto out; + } + if (sta->plink_state == NL80211_PLINK_BLOCKED) + goto out; + } + + /* new matching peer */ + if (!sta) { + event = OPN_ACPT; + goto out; + } + + switch (ftype) { + case WLAN_SP_MESH_PEERING_OPEN: + if (!matches_local) + event = OPN_RJCT; + if (!mesh_plink_free_count(sdata) || + (sta->plid && sta->plid != plid)) + event = OPN_IGNR; + else + event = OPN_ACPT; + break; + case WLAN_SP_MESH_PEERING_CONFIRM: + if (!matches_local) + event = CNF_RJCT; + if (!mesh_plink_free_count(sdata) || + (sta->llid != llid || sta->plid != plid)) + event = CNF_IGNR; + else + event = CNF_ACPT; + break; + case WLAN_SP_MESH_PEERING_CLOSE: + if (sta->plink_state == NL80211_PLINK_ESTAB) + /* Do not check for llid or plid. This does not + * follow the standard but since multiple plinks + * per sta are not supported, it is necessary in + * order to avoid a livelock when MP A sees an + * establish peer link to MP B but MP B does not + * see it. This can be caused by a timeout in + * B's peer link establishment or B beign + * restarted. + */ + event = CLS_ACPT; + else if (sta->plid != plid) + event = CLS_IGNR; + else if (ie_len == 8 && sta->llid != llid) + event = CLS_IGNR; + else + event = CLS_ACPT; + break; + default: + mpl_dbg(sdata, "Mesh plink: unknown frame subtype\n"); + break; + } + +out: + return event; +} +static void +mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + struct ieee802_11_elems *elems) +{ + + struct sta_info *sta; + enum plink_event event; + enum ieee80211_self_protected_actioncode ftype; + u32 changed = 0; + u8 ie_len = elems->peering_len; + __le16 _plid, _llid; + u16 plid, llid = 0; + + if (!elems->peering) { + mpl_dbg(sdata, + "Mesh plink: missing necessary peer link ie\n"); + return; + } + + if (elems->rsn_len && + sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { + mpl_dbg(sdata, + "Mesh plink: can't establish link with secure peer\n"); + return; + } + + ftype = mgmt->u.action.u.self_prot.action_code; + if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || + (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 + && ie_len != 8)) { + mpl_dbg(sdata, + "Mesh plink: incorrect plink ie length %d %d\n", + ftype, ie_len); + return; + } + + if (ftype != WLAN_SP_MESH_PEERING_CLOSE && + (!elems->mesh_id || !elems->mesh_config)) { + mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); + return; + } + /* Note the lines below are correct, the llid in the frame is the plid + * from the point of view of this host. + */ + memcpy(&_plid, PLINK_GET_LLID(elems->peering), sizeof(__le16)); + plid = le16_to_cpu(_plid); + if (ftype == WLAN_SP_MESH_PEERING_CONFIRM || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) { + memcpy(&_llid, PLINK_GET_PLID(elems->peering), sizeof(__le16)); + llid = le16_to_cpu(_llid); + } + + /* WARNING: Only for sta pointer, is dropped & re-acquired */ + rcu_read_lock(); + + sta = sta_info_get(sdata, mgmt->sa); + + if (ftype == WLAN_SP_MESH_PEERING_OPEN && + !rssi_threshold_check(sdata, sta)) { + mpl_dbg(sdata, "Mesh plink: %pM does not meet rssi threshold\n", + mgmt->sa); + goto unlock_rcu; + } + + /* Now we will figure out the appropriate event... */ + event = mesh_plink_get_event(sdata, sta, elems, ftype, llid, plid); + + if (event == OPN_ACPT) { + rcu_read_unlock(); + /* allocate sta entry if necessary and update info */ + sta = mesh_sta_info_get(sdata, mgmt->sa, elems); + if (!sta) { + mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); + goto unlock_rcu; + } + sta->plid = plid; + } else if (!sta && event == OPN_RJCT) { + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + mgmt->sa, 0, plid, + WLAN_REASON_MESH_CONFIG); + goto unlock_rcu; + } else if (!sta || event == PLINK_UNDEFINED) { + /* something went wrong */ + goto unlock_rcu; + } + + changed |= mesh_plink_fsm(sdata, sta, event); + +unlock_rcu: rcu_read_unlock(); if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } + +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) +{ + struct ieee802_11_elems elems; + size_t baselen; + u8 *baseaddr; + + /* need action_code, aux */ + if (len < IEEE80211_MIN_ACTION_SIZE + 3) + return; + + if (sdata->u.mesh.user_mpm) + /* userspace must register for these */ + return; + + if (is_multicast_ether_addr(mgmt->da)) { + mpl_dbg(sdata, + "Mesh plink: ignore frame from multicast address\n"); + return; + } + + baseaddr = mgmt->u.action.u.self_prot.variable; + baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; + if (mgmt->u.action.u.self_prot.action_code == + WLAN_SP_MESH_PEERING_CONFIRM) { + baseaddr += 4; + baselen += 4; + } + ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems); + mesh_process_plink_frame(sdata, mgmt, &elems); +} diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 0f79b78..2802f9d 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -576,10 +576,9 @@ void ieee80211_mps_frame_release(struct sta_info *sta, int ac, buffer_local = 0; bool has_buffered = false; - /* TIM map only for LLID <= IEEE80211_MAX_AID */ if (sta->plink_state == NL80211_PLINK_ESTAB) has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len, - le16_to_cpu(sta->llid) % IEEE80211_MAX_AID); + sta->llid); if (has_buffered) mps_dbg(sta->sdata, "%pM indicates buffered frames\n", diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 05a256b..2bc5dc2 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -92,12 +92,20 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (stype != IEEE80211_STYPE_BEACON) return; - /* The current tsf is a first approximation for the timestamp - * for the received beacon. Further down we try to get a - * better value from the rx_status->mactime field if - * available. Also we have to call drv_get_tsf() before - * entering the rcu-read section.*/ - t_r = drv_get_tsf(local, sdata); + /* + * Get time when timestamp field was received. If we don't + * have rx timestamps, then use current tsf as an approximation. + * drv_get_tsf() must be called before entering the rcu-read + * section. + */ + if (ieee80211_have_rx_timestamp(rx_status)) + t_r = ieee80211_calculate_rx_timestamp(local, rx_status, + 24 + 12 + + elems->total_len + + FCS_LEN, + 24); + else + t_r = drv_get_tsf(local, sdata); rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -117,14 +125,6 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, goto no_sync; } - if (ieee80211_have_rx_timestamp(rx_status)) - /* time when timestamp field was received */ - t_r = ieee80211_calculate_rx_timestamp(local, rx_status, - 24 + 12 + - elems->total_len + - FCS_LEN, - 24); - /* Timing offset calculation (see 13.13.2.2.2) */ t_t = le64_to_cpu(mgmt->u.beacon.timestamp); sta->t_offset = t_t - t_r; @@ -164,12 +164,15 @@ no_sync: rcu_read_unlock(); } -static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) +static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 cap; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); BUG_ON(!rcu_read_lock_held()); + cap = beacon->meshconf->meshconf_cap; spin_lock_bh(&ifmsh->sync_offset_lock); @@ -194,6 +197,10 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) ifmsh->adjusting_tbtt = false; } spin_unlock_bh(&ifmsh->sync_offset_lock); + + beacon->meshconf->meshconf_cap = ifmsh->adjusting_tbtt ? + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING | cap : + ~IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING & cap; } static const struct sync_method sync_methods[] = { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d7504ab..9c2c7ee 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -330,6 +330,16 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, if (WARN_ON_ONCE(!sta)) return -EINVAL; + /* + * if bss configuration changed store the new one - + * this may be applicable even if channel is identical + */ + ht_opmode = le16_to_cpu(ht_oper->operation_mode); + if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { + *changed |= BSS_CHANGED_HT; + sdata->vif.bss_conf.ht_operation_mode = ht_opmode; + } + chan = sdata->vif.bss_conf.chandef.chan; sband = local->hw.wiphy->bands[chan->band]; @@ -416,14 +426,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, IEEE80211_RC_BW_CHANGED); } - ht_opmode = le16_to_cpu(ht_oper->operation_mode); - - /* if bss configuration changed store the new one */ - if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { - *changed |= BSS_CHANGED_HT; - sdata->vif.bss_conf.ht_operation_mode = ht_opmode; - } - return 0; } @@ -714,7 +716,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) } /* if present, add any custom IEs that go before HT */ - if (assoc_data->ie_len && assoc_data->ie) { + if (assoc_data->ie_len) { static const u8 before_ht[] = { WLAN_EID_SSID, WLAN_EID_SUPP_RATES, @@ -748,7 +750,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) &assoc_data->ap_vht_cap); /* if present, add any custom non-vendor IEs that go after HT */ - if (assoc_data->ie_len && assoc_data->ie) { + if (assoc_data->ie_len) { noffset = ieee80211_ie_split_vendor(assoc_data->ie, assoc_data->ie_len, offset); @@ -779,7 +781,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) } /* add any remaining custom (i.e. vendor specific here) IEs */ - if (assoc_data->ie_len && assoc_data->ie) { + if (assoc_data->ie_len) { noffset = assoc_data->ie_len; pos = skb_put(skb, noffset - offset); memcpy(pos, assoc_data->ie + offset, noffset - offset); @@ -886,8 +888,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - ret = ieee80211_vif_change_channel(sdata, &local->csa_chandef, - &changed); + ret = ieee80211_vif_change_channel(sdata, &changed); if (ret) { sdata_info(sdata, "vif channel switch failed, disconnecting\n"); @@ -897,7 +898,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) } if (!local->use_chanctx) { - local->_oper_chandef = local->csa_chandef; + local->_oper_chandef = sdata->csa_chandef; /* Call "hw_config" only if doing sw channel switch. * Otherwise update the channel directly */ @@ -908,7 +909,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) } /* XXX: shouldn't really modify cfg80211-owned data! */ - ifmgd->associated->channel = local->csa_chandef.chan; + ifmgd->associated->channel = sdata->csa_chandef.chan; /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&local->hw, @@ -1035,7 +1036,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&local->chanctx_mtx); - local->csa_chandef = csa_ie.chandef; + sdata->csa_chandef = csa_ie.chandef; if (csa_ie.mode) ieee80211_stop_queues_by_reason(&local->hw, @@ -1398,10 +1399,12 @@ void ieee80211_dfs_cac_timer_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = container_of(delayed_work, struct ieee80211_sub_if_data, dfs_cac_timer_work); + struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chandef; ieee80211_vif_release_channel(sdata); - - cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL); + cfg80211_cac_event(sdata->dev, &chandef, + NL80211_RADAR_CAC_FINISHED, + GFP_KERNEL); } /* MLME */ @@ -1695,7 +1698,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->bssid, 0, ETH_ALEN); /* remove AP and TDLS peers */ - sta_info_flush_defer(sdata); + sta_info_flush(sdata); /* finally reset all BSS / config parameters */ changed |= ieee80211_reset_erp_info(sdata); @@ -1745,6 +1748,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags = 0; ieee80211_vif_release_channel(sdata); + + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; } void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, @@ -1910,6 +1915,8 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) already = true; + ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; + mutex_unlock(&sdata->local->mtx); if (already) @@ -4189,6 +4196,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = req->crypto.control_port_ethertype; sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt; + sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto, + sdata->vif.type); /* kick off associate process */ diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 3401262..af64fb8 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -37,9 +37,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_SUSPEND); - /* flush out all packets and station cleanup call_rcu()s */ + /* flush out all packets */ synchronize_net(); - rcu_barrier(); ieee80211_flush_queues(local, NULL); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 505bc0d..b95e16c 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -54,6 +54,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; + ieee80211_sta_set_rx_nss(sta); + if (!ref) return; @@ -67,8 +69,6 @@ static inline void rate_control_rate_init(struct sta_info *sta) sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; - ieee80211_sta_set_rx_nss(sta); - ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista, priv_sta); rcu_read_unlock(); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 7fa1b36..d2f19f7 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -422,10 +422,9 @@ init_sample_table(struct minstrel_sta_info *mi) memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates); for (col = 0; col < SAMPLE_COLUMNS; col++) { + prandom_bytes(rnd, sizeof(rnd)); for (i = 0; i < mi->n_rates; i++) { - get_random_bytes(rnd, sizeof(rnd)); new_idx = (i + rnd[i & 7]) % mi->n_rates; - while (SAMPLE_TBL(mi, new_idx, col) != 0xff) new_idx = (new_idx + 1) % mi->n_rates; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5d60779..d2ed18d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -135,7 +135,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi); static int minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) { - return GROUP_IDX((rate->idx / MCS_GROUP_RATES) + 1, + return GROUP_IDX((rate->idx / 8) + 1, !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } @@ -148,7 +148,7 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (rate->flags & IEEE80211_TX_RC_MCS) { group = minstrel_ht_get_group_idx(rate); - idx = rate->idx % MCS_GROUP_RATES; + idx = rate->idx % 8; } else { group = MINSTREL_CCK_GROUP; @@ -226,7 +226,7 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - tp = 1000000 * ((mr->probability * 1000) / nsecs); + tp = 1000000 * ((prob * 1000) / nsecs); mr->cur_tp = MINSTREL_TRUNC(tp); } @@ -277,13 +277,15 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!(mg->supported & BIT(i))) continue; + index = MCS_GROUP_RATES * group + i; + /* initialize rates selections starting indexes */ if (!mg_rates_valid) { mg->max_tp_rate = mg->max_tp_rate2 = mg->max_prob_rate = i; if (!mi_rates_valid) { mi->max_tp_rate = mi->max_tp_rate2 = - mi->max_prob_rate = i; + mi->max_prob_rate = index; mi_rates_valid = true; } mg_rates_valid = true; @@ -291,7 +293,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mr = &mg->rates[i]; mr->retry_updated = false; - index = MCS_GROUP_RATES * group + i; minstrel_calc_rate_ewma(mr); minstrel_ht_calc_tp(mi, group, i); @@ -636,8 +637,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; flags = 0; } else { - idx = index % MCS_GROUP_RATES + - (group->streams - 1) * MCS_GROUP_RATES; + idx = index % MCS_GROUP_RATES + (group->streams - 1) * 8; flags = IEEE80211_TX_RC_MCS | group->flags; } @@ -701,12 +701,16 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!mi->sample_tries) return -1; - mg = &mi->groups[mi->sample_group]; + sample_group = mi->sample_group; + mg = &mi->groups[sample_group]; sample_idx = sample_table[mg->column][mg->index]; + minstrel_next_sample_idx(mi); + + if (!(mg->supported & BIT(sample_idx))) + return -1; + mr = &mg->rates[sample_idx]; - sample_group = mi->sample_group; sample_idx += sample_group * MCS_GROUP_RATES; - minstrel_next_sample_idx(mi); /* * Sampling might add some overhead (RTS, no aggregation) @@ -817,7 +821,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, } rate->idx = sample_idx % MCS_GROUP_RATES + - (sample_group->streams - 1) * MCS_GROUP_RATES; + (sample_group->streams - 1) * 8; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; } @@ -1052,10 +1056,9 @@ init_sample_table(void) memset(sample_table, 0xff, sizeof(sample_table)); for (col = 0; col < SAMPLE_COLUMNS; col++) { + prandom_bytes(rnd, sizeof(rnd)); for (i = 0; i < MCS_GROUP_RATES; i++) { - get_random_bytes(rnd, sizeof(rnd)); new_idx = (i + rnd[i]) % MCS_GROUP_RATES; - while (sample_table[col][new_idx] != 0xff) new_idx = (new_idx + 1) % MCS_GROUP_RATES; diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index df44a5a..3e7d793 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -54,8 +54,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) int r = bitrates[j % 4]; p += sprintf(p, " %2u.%1uM", r / 10, r % 10); } else { - p += sprintf(p, " MCS%-2u", (mg->streams - 1) * - MCS_GROUP_RATES + j); + p += sprintf(p, " MCS%-2u", (mg->streams - 1) * 8 + j); } tp = mr->cur_tp / 10; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index caecef8..5a2afe9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -638,6 +638,27 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) return le16_to_cpu(mmie->key_id); } +static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + __le16 fc; + int hdrlen; + u8 keyid; + + fc = hdr->frame_control; + hdrlen = ieee80211_hdrlen(fc); + + if (skb->len < hdrlen + cs->hdr_len) + return -EINVAL; + + skb_copy_bits(skb, hdrlen + cs->key_idx_off, &keyid, 1); + keyid &= cs->key_idx_mask; + keyid >>= cs->key_idx_shift; + + return keyid; +} + static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; @@ -729,9 +750,7 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata lockdep_assert_held(&tid_agg_rx->reorder_lock); while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) { - index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); } @@ -757,8 +776,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&tid_agg_rx->reorder_lock); /* release the buffer until next missing frame */ - index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; + index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; if (!tid_agg_rx->reorder_buf[index] && tid_agg_rx->stored_mpdu_num) { /* @@ -793,15 +811,11 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, } else while (tid_agg_rx->reorder_buf[index]) { ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); - index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; } if (tid_agg_rx->stored_mpdu_num) { - j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + j = index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; for (; j != (index - 1) % tid_agg_rx->buf_size; j = (j + 1) % tid_agg_rx->buf_size) { @@ -861,8 +875,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata /* Now the new frame is always in the range of the reordering buffer */ - index = ieee80211_sn_sub(mpdu_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; + index = mpdu_seq_num % tid_agg_rx->buf_size; /* check if we already stored this frame */ if (tid_agg_rx->reorder_buf[index]) { @@ -911,7 +924,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, u16 sc; u8 tid, ack_policy; - if (!ieee80211_is_data_qos(hdr->frame_control)) + if (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1)) goto dont_reorder; /* @@ -1368,6 +1382,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *sta_ptk = NULL; int mmie_keyidx = -1; __le16 fc; + const struct ieee80211_cipher_scheme *cs = NULL; /* * Key selection 101 @@ -1405,11 +1420,19 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* start without a key */ rx->key = NULL; + fc = hdr->frame_control; - if (rx->sta) - sta_ptk = rcu_dereference(rx->sta->ptk); + if (rx->sta) { + int keyid = rx->sta->ptk_idx; - fc = hdr->frame_control; + if (ieee80211_has_protected(fc) && rx->sta->cipher_scheme) { + cs = rx->sta->cipher_scheme; + keyid = iwl80211_get_cs_keyid(cs, rx->skb); + if (unlikely(keyid < 0)) + return RX_DROP_UNUSABLE; + } + sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); + } if (!ieee80211_has_protected(fc)) mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); @@ -1471,6 +1494,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } else { u8 keyid; + /* * The device doesn't give us the IV so we won't be * able to look up the key. That's ok though, we @@ -1486,15 +1510,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(fc); - if (rx->skb->len < 8 + hdrlen) - return RX_DROP_UNUSABLE; /* TODO: count this? */ + if (cs) { + keyidx = iwl80211_get_cs_keyid(cs, rx->skb); - /* - * no need to call ieee80211_wep_get_keyidx, - * it verifies a bunch of things we've done already - */ - skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); - keyidx = keyid >> 6; + if (unlikely(keyidx < 0)) + return RX_DROP_UNUSABLE; + } else { + if (rx->skb->len < 8 + hdrlen) + return RX_DROP_UNUSABLE; /* TODO: count this? */ + /* + * no need to call ieee80211_wep_get_keyidx, + * it verifies a bunch of things we've done already + */ + skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); + keyidx = keyid >> 6; + } /* check per-station GTK first, if multicast packet */ if (is_multicast_ether_addr(hdr->addr1) && rx->sta) @@ -1542,11 +1572,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_aes_cmac_decrypt(rx); break; default: - /* - * We can reach here only with HW-only algorithms - * but why didn't it decrypt the frame?! - */ - return RX_DROP_UNUSABLE; + result = ieee80211_crypto_hw_decrypt(rx); } /* the hdr variable is invalid after the decrypt handlers */ @@ -1937,20 +1963,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } } - if (skb) { - int align __maybe_unused; - #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - /* - * 'align' will only take the values 0 or 2 here - * since all frames are required to be aligned - * to 2-byte boundaries when being passed to - * mac80211; the code here works just as well if - * that isn't true, but mac80211 assumes it can - * access fields as 2-byte aligned (e.g. for - * compare_ether_addr) + if (skb) { + /* 'align' will only take the values 0 or 2 here since all + * frames are required to be aligned to 2-byte boundaries + * when being passed to mac80211; the code here works just + * as well if that isn't true, but mac80211 assumes it can + * access fields as 2-byte aligned (e.g. for ether_addr_equal) */ - align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3; + int align; + + align = (unsigned long)(skb->data + sizeof(struct ethhdr)) & 3; if (align) { if (WARN_ON(skb_headroom(skb) < 3)) { dev_kfree_skb(skb); @@ -1963,14 +1986,14 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb_set_tail_pointer(skb, len); } } + } #endif - if (skb) { - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_receive_skb(skb); - } + if (skb) { + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_receive_skb(skb); } if (xmit_skb) { @@ -2056,7 +2079,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_NOFORWARD); u16 q, hdrlen; hdr = (struct ieee80211_hdr *) skb->data; @@ -2164,7 +2186,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) } else { /* unable to resolve next hop */ mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl, - fwd_hdr->addr3, 0, reason, fwd_hdr->addr2); + fwd_hdr->addr3, 0, + WLAN_REASON_MESH_PATH_NOFORWARD, + fwd_hdr->addr2); IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route); kfree_skb(fwd_skb); return RX_DROP_MONITOR; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5ad66a8..88c8161 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -271,10 +271,11 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, - bool was_hw_scan) +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); + bool hw_scan = local->ops->hw_scan; + bool was_scanning = local->scanning; lockdep_assert_held(&local->mtx); @@ -290,7 +291,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, if (WARN_ON(!local->scan_req)) return; - if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { + if (hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { int rc; rc = drv_hw_scan(local, @@ -316,7 +317,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, /* Set power back to normal operating levels. */ ieee80211_hw_config(local, 0); - if (!was_hw_scan) { + if (!hw_scan) { ieee80211_configure_filter(local); drv_sw_scan_complete(local); ieee80211_offchannel_return(local); @@ -327,7 +328,8 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); - ieee80211_start_next_roc(local); + if (was_scanning) + ieee80211_start_next_roc(local); } void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) @@ -526,7 +528,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_hw_config(local, 0); if ((req->channels[0]->flags & - IEEE80211_CHAN_PASSIVE_SCAN) || + IEEE80211_CHAN_NO_IR) || !local->scan_req->n_ssids) { next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; } else { @@ -572,7 +574,7 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan) * TODO: channel switching also consumes quite some time, * add that delay as well to get a better estimation */ - if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) + if (chan->flags & IEEE80211_CHAN_NO_IR) return IEEE80211_PASSIVE_CHANNEL_TIME; return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME; } @@ -696,7 +698,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, * * In any case, it is not necessary for a passive scan. */ - if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN || + if (chan->flags & IEEE80211_CHAN_NO_IR || !local->scan_req->n_ssids) { *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; @@ -747,7 +749,7 @@ void ieee80211_scan_work(struct work_struct *work) container_of(work, struct ieee80211_local, scan_work.work); struct ieee80211_sub_if_data *sdata; unsigned long next_delay = 0; - bool aborted, hw_scan; + bool aborted; mutex_lock(&local->mtx); @@ -786,14 +788,6 @@ void ieee80211_scan_work(struct work_struct *work) } /* - * Avoid re-scheduling when the sdata is going away. - */ - if (!ieee80211_sdata_running(sdata)) { - aborted = true; - goto out_complete; - } - - /* * as long as no delay is required advance immediately * without scheduling a new work */ @@ -834,8 +828,7 @@ void ieee80211_scan_work(struct work_struct *work) goto out; out_complete: - hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); - __ieee80211_scan_completed(&local->hw, aborted, hw_scan); + __ieee80211_scan_completed(&local->hw, aborted); out: mutex_unlock(&local->mtx); } @@ -881,7 +874,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *tmp_ch = &local->hw.wiphy->bands[band]->channels[i]; - if (tmp_ch->flags & (IEEE80211_CHAN_NO_IBSS | + if (tmp_ch->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED)) continue; @@ -895,7 +888,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, local->int_scan_req->n_channels = n_ch; } else { - if (WARN_ON_ONCE(chan->flags & (IEEE80211_CHAN_NO_IBSS | + if (WARN_ON_ONCE(chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED))) goto unlock; @@ -973,13 +966,13 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) */ cancel_delayed_work(&local->scan_work); /* and clean up */ - __ieee80211_scan_completed(&local->hw, true, false); + __ieee80211_scan_completed(&local->hw, true); out: mutex_unlock(&local->mtx); } -int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, - struct cfg80211_sched_scan_request *req) +int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_sched_scan_ies sched_scan_ies = {}; @@ -989,17 +982,10 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, iebufsz = 2 + IEEE80211_MAX_SSID_LEN + local->scan_ies_len + req->ie_len; - mutex_lock(&local->mtx); - - if (rcu_access_pointer(local->sched_scan_sdata)) { - ret = -EBUSY; - goto out; - } + lockdep_assert_held(&local->mtx); - if (!local->ops->sched_scan_start) { - ret = -ENOTSUPP; - goto out; - } + if (!local->ops->sched_scan_start) + return -ENOTSUPP; for (i = 0; i < IEEE80211_NUM_BANDS; i++) { if (!local->hw.wiphy->bands[i]) @@ -1020,13 +1006,39 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, } ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); - if (ret == 0) + if (ret == 0) { rcu_assign_pointer(local->sched_scan_sdata, sdata); + local->sched_scan_req = req; + } out_free: while (i > 0) kfree(sched_scan_ies.ie[--i]); -out: + + if (ret) { + /* Clean in case of failure after HW restart or upon resume. */ + rcu_assign_pointer(local->sched_scan_sdata, NULL); + local->sched_scan_req = NULL; + } + + return ret; +} + +int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req) +{ + struct ieee80211_local *local = sdata->local; + int ret; + + mutex_lock(&local->mtx); + + if (rcu_access_pointer(local->sched_scan_sdata)) { + mutex_unlock(&local->mtx); + return -EBUSY; + } + + ret = __ieee80211_request_sched_scan_start(sdata, req); + mutex_unlock(&local->mtx); return ret; } @@ -1043,6 +1055,9 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) goto out; } + /* We don't want to restart sched scan anymore. */ + local->sched_scan_req = NULL; + if (rcu_access_pointer(local->sched_scan_sdata)) drv_sched_scan_stop(local, sdata); @@ -1077,6 +1092,9 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) rcu_assign_pointer(local->sched_scan_sdata, NULL); + /* If sched scan was aborted by the driver. */ + local->sched_scan_req = NULL; + mutex_unlock(&local->mtx); cfg80211_sched_scan_stopped(local->hw.wiphy); @@ -1088,6 +1106,6 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) trace_api_sched_scan_stopped(local); - ieee80211_queue_work(&local->hw, &local->sched_scan_stopped_work); + schedule_work(&local->sched_scan_stopped_work); } EXPORT_SYMBOL(ieee80211_sched_scan_stopped); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index a40da20..6ab0090 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -78,6 +78,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (elems->mesh_chansw_params_ie) { csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl; csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; + csa_ie->pre_value = le16_to_cpu( + elems->mesh_chansw_params_ie->mesh_pre_value); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1eb66e2..4576ba0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -99,23 +99,6 @@ static void cleanup_single_sta(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct ps_data *ps; - /* - * At this point, when being called as call_rcu callback, - * neither mac80211 nor the driver can reference this - * sta struct any more except by still existing timers - * associated with this station that we clean up below. - * - * Note though that this still uses the sdata and even - * calls the driver in AP and mesh mode, so interfaces - * of those types mush use call sta_info_flush_cleanup() - * (typically via sta_info_flush()) before deconfiguring - * the driver. - * - * In station mode, nothing happens here so it doesn't - * have to (and doesn't) do that, this is intentional to - * speed up roaming. - */ - if (test_sta_flag(sta, WLAN_STA_PS_STA)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -160,37 +143,6 @@ static void cleanup_single_sta(struct sta_info *sta) sta_info_free(local, sta); } -void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata) -{ - struct sta_info *sta; - - spin_lock_bh(&sdata->cleanup_stations_lock); - while (!list_empty(&sdata->cleanup_stations)) { - sta = list_first_entry(&sdata->cleanup_stations, - struct sta_info, list); - list_del(&sta->list); - spin_unlock_bh(&sdata->cleanup_stations_lock); - - cleanup_single_sta(sta); - - spin_lock_bh(&sdata->cleanup_stations_lock); - } - - spin_unlock_bh(&sdata->cleanup_stations_lock); -} - -static void free_sta_rcu(struct rcu_head *h) -{ - struct sta_info *sta = container_of(h, struct sta_info, rcu_head); - struct ieee80211_sub_if_data *sdata = sta->sdata; - - spin_lock(&sdata->cleanup_stations_lock); - list_add_tail(&sta->list, &sdata->cleanup_stations); - spin_unlock(&sdata->cleanup_stations_lock); - - ieee80211_queue_work(&sdata->local->hw, &sdata->cleanup_stations_wk); -} - /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) @@ -266,9 +218,17 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { + int i; + if (sta->rate_ctrl) rate_control_free_sta(sta); + if (sta->tx_lat) { + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + kfree(sta->tx_lat[i].bins); + kfree(sta->tx_lat); + } + sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); kfree(sta); @@ -333,6 +293,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct timespec uptime; + struct ieee80211_tx_latency_bin_ranges *tx_latency; int i; sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); @@ -410,6 +371,31 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } + rcu_read_lock(); + + tx_latency = rcu_dereference(local->tx_latency); + /* init stations Tx latency statistics && TID bins */ + if (tx_latency) + sta->tx_lat = kzalloc(IEEE80211_NUM_TIDS * + sizeof(struct ieee80211_tx_latency_stat), + GFP_ATOMIC); + + /* + * if Tx latency and bins are enabled and the previous allocation + * succeeded + */ + if (tx_latency && tx_latency->n_ranges && sta->tx_lat) + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + /* size of bins is size of the ranges +1 */ + sta->tx_lat[i].bin_count = + tx_latency->n_ranges + 1; + sta->tx_lat[i].bins = kcalloc(sta->tx_lat[i].bin_count, + sizeof(u32), + GFP_ATOMIC); + } + + rcu_read_unlock(); + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; @@ -507,6 +493,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_INSERTED); + ieee80211_recalc_min_chandef(sdata); ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); @@ -630,8 +617,8 @@ void sta_info_recalc_tim(struct sta_info *sta) #ifdef CONFIG_MAC80211_MESH } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { ps = &sta->sdata->u.mesh.ps; - /* TIM map only for PLID <= IEEE80211_MAX_AID */ - id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID; + /* TIM map only for 1 <= PLID <= IEEE80211_MAX_AID */ + id = sta->plid % (IEEE80211_MAX_AID + 1); #endif } else { return; @@ -807,7 +794,7 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, return have_buffered; } -int __must_check __sta_info_destroy(struct sta_info *sta) +static int __must_check __sta_info_destroy_part1(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -833,12 +820,35 @@ int __must_check __sta_info_destroy(struct sta_info *sta) ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); ret = sta_info_hash_del(local, sta); - if (ret) + if (WARN_ON(ret)) return ret; list_del_rcu(&sta->list); - /* this always calls synchronize_net() */ + drv_sta_pre_rcu_remove(local, sta->sdata, sta); + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + rcu_access_pointer(sdata->u.vlan.sta) == sta) + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); + + return 0; +} + +static void __sta_info_destroy_part2(struct sta_info *sta) +{ + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + int ret; + + /* + * NOTE: This assumes at least synchronize_net() was done + * after _part1 and before _part2! + */ + + might_sleep(); + lockdep_assert_held(&local->sta_mtx); + + /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); sta->dead = true; @@ -846,9 +856,6 @@ int __must_check __sta_info_destroy(struct sta_info *sta) local->num_sta--; local->sta_generation++; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); - while (sta->sta_state > IEEE80211_STA_NONE) { ret = sta_info_move_state(sta, sta->sta_state - 1); if (ret) { @@ -869,8 +876,21 @@ int __must_check __sta_info_destroy(struct sta_info *sta) rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); + ieee80211_recalc_min_chandef(sdata); + + cleanup_single_sta(sta); +} + +int __must_check __sta_info_destroy(struct sta_info *sta) +{ + int err = __sta_info_destroy_part1(sta); + + if (err) + return err; - call_rcu(&sta->rcu_head, free_sta_rcu); + synchronize_net(); + + __sta_info_destroy_part2(sta); return 0; } @@ -940,32 +960,38 @@ void sta_info_stop(struct ieee80211_local *local) } -int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata) +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; + LIST_HEAD(free_list); int ret = 0; might_sleep(); + WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP); + WARN_ON(vlans && !sdata->bss); + mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (sdata == sta->sdata) { - WARN_ON(__sta_info_destroy(sta)); + if (sdata == sta->sdata || + (vlans && sdata->bss == sta->sdata->bss)) { + if (!WARN_ON(__sta_info_destroy_part1(sta))) + list_add(&sta->free_list, &free_list); ret++; } } + + if (!list_empty(&free_list)) { + synchronize_net(); + list_for_each_entry_safe(sta, tmp, &free_list, free_list) + __sta_info_destroy_part2(sta); + } mutex_unlock(&local->sta_mtx); return ret; } -void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata) -{ - ieee80211_cleanup_sdata_stas(sdata); - cancel_work_sync(&sdata->cleanup_stations_wk); -} - void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3ef06a2..d77ff70 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -220,6 +220,25 @@ struct sta_ampdu_mlme { u8 dialog_token_allocator; }; +/* + * struct ieee80211_tx_latency_stat - Tx latency statistics + * + * Measures TX latency and jitter for a station per TID. + * + * @max: worst case latency + * @sum: sum of all latencies + * @counter: amount of Tx frames sent from interface + * @bins: each bin counts how many frames transmitted within a certain + * latency range. when disabled it is NULL. + * @bin_count: amount of bins. + */ +struct ieee80211_tx_latency_stat { + u32 max; + u32 sum; + u32 counter; + u32 *bins; + u32 bin_count; +}; /** * struct sta_info - STA information @@ -228,11 +247,14 @@ struct sta_ampdu_mlme { * mac80211 is communicating with. * * @list: global linked list entry + * @free_list: list entry for keeping track of stations to free * @hnext: hash table linked list pointer * @local: pointer to the global information * @sdata: virtual interface this station belongs to - * @ptk: peer key negotiated with this station, if any + * @ptk: peer keys negotiated with this station, if any + * @ptk_idx: last installed peer key index * @gtk: group keys negotiated with this station, if any + * @gtk_idx: last installed group key index * @rate_ctrl: rate control algorithm reference * @rate_ctrl_priv: rate control private per-STA pointer * @last_tx_rate: rate used for last transmit, to report to userspace as @@ -274,6 +296,7 @@ struct sta_ampdu_mlme { * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers + * @tx_lat: Tx latency statistics * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state @@ -303,16 +326,19 @@ struct sta_ampdu_mlme { * @chain_signal_avg: signal average (per chain) * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. + * @cipher_scheme: optional cipher scheme for this station */ struct sta_info { /* General information, mostly static */ - struct list_head list; + struct list_head list, free_list; struct rcu_head rcu_head; struct sta_info __rcu *hnext; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; - struct ieee80211_key __rcu *ptk; + struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS]; + u8 gtk_idx; + u8 ptk_idx; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; spinlock_t lock; @@ -380,14 +406,16 @@ struct sta_info { struct sta_ampdu_mlme ampdu_mlme; u8 timer_to_tid[IEEE80211_NUM_TIDS]; + struct ieee80211_tx_latency_stat *tx_lat; + #ifdef CONFIG_MAC80211_MESH /* * Mesh peer link attributes * TODO: move to a sub-structure that is referenced with pointer? */ - __le16 llid; - __le16 plid; - __le16 reason; + u16 llid; + u16 plid; + u16 reason; u8 plink_retries; bool ignore_plink_timer; enum nl80211_plink_state plink_state; @@ -414,6 +442,7 @@ struct sta_info { unsigned int beacon_loss_count; enum ieee80211_smps_mode known_smps_mode; + const struct ieee80211_cipher_scheme *cipher_scheme; /* keep last! */ struct ieee80211_sta sta; @@ -577,21 +606,6 @@ void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); -int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata); - -/** - * sta_info_flush_cleanup - flush the sta_info cleanup queue - * @sdata: the interface - * - * Flushes the sta_info cleanup queue for a given interface; - * this is necessary before the interface is removed or, for - * AP/mesh interfaces, before it is deconfigured. - * - * Note an rcu_barrier() must precede the function, after all - * stations have been flushed/removed to ensure the call_rcu() - * calls that add stations to the cleanup queue have completed. - */ -void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); /** * sta_info_flush - flush matching STA entries from the STA table @@ -599,15 +613,13 @@ void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); * Returns the number of removed STA entries. * * @sdata: sdata to remove all stations from + * @vlans: if the given interface is an AP interface, also flush VLANs */ +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans); + static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) { - int ret = sta_info_flush_defer(sdata); - - rcu_barrier(); - sta_info_flush_cleanup(sdata); - - return ret; + return __sta_info_flush(sdata, false); } void sta_set_rate_info_tx(struct sta_info *sta, @@ -623,6 +635,4 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); -void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata); - #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 52a152b..1ee85c4 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/etherdevice.h> +#include <linux/time.h> #include <net/mac80211.h> #include <asm/unaligned.h> #include "ieee80211_i.h" @@ -463,6 +464,77 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } /* + * Measure Tx frame completion and removal time for Tx latency statistics + * calculation. A single Tx frame latency should be measured from when it + * is entering the Kernel until we receive Tx complete confirmation indication + * and remove the skb. + */ +static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, + struct sk_buff *skb, + struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + ktime_t skb_dprt; + struct timespec dprt_time; + u32 msrmnt; + u16 tid; + u8 *qc; + int i, bin_range_count, bin_count; + u32 *bin_ranges; + __le16 fc; + struct ieee80211_tx_latency_stat *tx_lat; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + ktime_t skb_arv = skb->tstamp; + + tx_latency = rcu_dereference(local->tx_latency); + + /* assert Tx latency stats are enabled & frame arrived when enabled */ + if (!tx_latency || !ktime_to_ns(skb_arv)) + return; + + fc = hdr->frame_control; + + if (!ieee80211_is_data(fc)) /* make sure it is a data frame */ + return; + + /* get frame tid */ + if (ieee80211_is_data_qos(hdr->frame_control)) { + qc = ieee80211_get_qos_ctl(hdr); + tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK; + } else { + tid = 0; + } + + tx_lat = &sta->tx_lat[tid]; + + ktime_get_ts(&dprt_time); /* time stamp completion time */ + skb_dprt = ktime_set(dprt_time.tv_sec, dprt_time.tv_nsec); + msrmnt = ktime_to_ms(ktime_sub(skb_dprt, skb_arv)); + + if (tx_lat->max < msrmnt) /* update stats */ + tx_lat->max = msrmnt; + tx_lat->counter++; + tx_lat->sum += msrmnt; + + if (!tx_lat->bins) /* bins not activated */ + return; + + /* count how many Tx frames transmitted with the appropriate latency */ + bin_range_count = tx_latency->n_ranges; + bin_ranges = tx_latency->ranges; + bin_count = tx_lat->bin_count; + + for (i = 0; i < bin_range_count; i++) { + if (msrmnt <= bin_ranges[i]) { + tx_lat->bins[i]++; + break; + } + } + if (i == bin_range_count) /* msrmnt is bigger than the biggest range */ + tx_lat->bins[i]++; +} + +/* * Use a static threshold for now, best value to be determined * by testing ... * Should it depend on: @@ -620,6 +692,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (acked) sta->last_ack_signal = info->status.ack_signal; + + /* + * Measure frame removal for tx latency + * statistics calculation + */ + ieee80211_tx_latency_end_msrmnt(local, skb, sta, hdr); } rcu_read_unlock(); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d4cee98..3a669d7 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -41,14 +41,31 @@ #define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ __entry->center_freq1, __entry->center_freq2 +#define MIN_CHANDEF_ENTRY \ + __field(u32, min_control_freq) \ + __field(u32, min_chan_width) \ + __field(u32, min_center_freq1) \ + __field(u32, min_center_freq2) + +#define MIN_CHANDEF_ASSIGN(c) \ + __entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ + __entry->min_chan_width = (c)->width; \ + __entry->min_center_freq1 = (c)->center_freq1; \ + __entry->min_center_freq2 = (c)->center_freq2; +#define MIN_CHANDEF_PR_FMT " min_control:%d MHz min_width:%d min_center: %d/%d MHz" +#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_chan_width, \ + __entry->min_center_freq1, __entry->min_center_freq2 + #define CHANCTX_ENTRY CHANDEF_ENTRY \ + MIN_CHANDEF_ENTRY \ __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) #define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ + MIN_CHANDEF_ASSIGN(&ctx->conf.min_def) \ __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic -#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" -#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ +#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT " chains:%d/%d" +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic @@ -426,30 +443,6 @@ TRACE_EVENT(drv_prepare_multicast, ) ); -TRACE_EVENT(drv_set_multicast_list, - TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, int mc_count), - - TP_ARGS(local, sdata, mc_count), - - TP_STRUCT__entry( - LOCAL_ENTRY - __field(bool, allmulti) - __field(int, mc_count) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; - __entry->mc_count = mc_count; - ), - - TP_printk( - LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d", - LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti - ) -); - TRACE_EVENT(drv_configure_filter, TP_PROTO(struct ieee80211_local *local, unsigned int changed_flags, @@ -773,7 +766,7 @@ TRACE_EVENT(drv_sta_rc_update, ) ); -TRACE_EVENT(drv_sta_add, +DECLARE_EVENT_CLASS(sta_event, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), @@ -798,29 +791,25 @@ TRACE_EVENT(drv_sta_add, ) ); -TRACE_EVENT(drv_sta_remove, +DEFINE_EVENT(sta_event, drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); - TP_ARGS(local, sdata, sta), - - TP_STRUCT__entry( - LOCAL_ENTRY - VIF_ENTRY - STA_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - VIF_ASSIGN; - STA_ASSIGN; - ), +DEFINE_EVENT(sta_event, drv_sta_remove, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); - TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG - ) +DEFINE_EVENT(sta_event, drv_sta_pre_rcu_remove, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) ); TRACE_EVENT(drv_conf_tx, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c558b24..2f0e176 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -19,6 +19,7 @@ #include <linux/bitmap.h> #include <linux/rcupdate.h> #include <linux/export.h> +#include <linux/time.h> #include <net/net_namespace.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> @@ -557,7 +558,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) tx->key = NULL; - else if (tx->sta && (key = rcu_dereference(tx->sta->ptk))) + else if (tx->sta && + (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx]))) tx->key = key; else if (ieee80211_is_mgmt(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && @@ -840,15 +842,16 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx, rem -= fraglen; tmp = dev_alloc_skb(local->tx_headroom + frag_threshold + - IEEE80211_ENCRYPT_HEADROOM + + tx->sdata->encrypt_headroom + IEEE80211_ENCRYPT_TAILROOM); if (!tmp) return -ENOMEM; __skb_queue_tail(&tx->skbs, tmp); - skb_reserve(tmp, local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); + skb_reserve(tmp, + local->tx_headroom + tx->sdata->encrypt_headroom); + /* copy control information */ memcpy(tmp->cb, skb->cb, sizeof(tmp->cb)); @@ -1485,7 +1488,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, headroom = local->tx_headroom; if (may_encrypt) - headroom += IEEE80211_ENCRYPT_HEADROOM; + headroom += sdata->encrypt_headroom; headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom); @@ -1724,8 +1727,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, * radar detection by itself. We can do that later by adding a * monitor flag interfaces used for AP support. */ - if ((chan->flags & (IEEE80211_CHAN_NO_IBSS | IEEE80211_CHAN_RADAR | - IEEE80211_CHAN_PASSIVE_SCAN))) + if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))) goto fail_rcu; ieee80211_xmit(sdata, skb, chan->band); @@ -1740,6 +1742,26 @@ fail: return NETDEV_TX_OK; /* meaning, we dealt with the skb */ } +/* + * Measure Tx frame arrival time for Tx latency statistics calculation + * A single Tx frame latency should be measured from when it is entering the + * Kernel until we receive Tx complete confirmation indication and the skb is + * freed. + */ +static void ieee80211_tx_latency_start_msrmnt(struct ieee80211_local *local, + struct sk_buff *skb) +{ + struct timespec skb_arv; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + + tx_latency = rcu_dereference(local->tx_latency); + if (!tx_latency) + return; + + ktime_get_ts(&skb_arv); + skb->tstamp = ktime_set(skb_arv.tv_sec, skb_arv.tv_nsec); +} + /** * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type * subinterfaces (wlan#, WDS, and VLAN interfaces) @@ -1790,6 +1812,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, rcu_read_lock(); + /* Measure frame arrival for Tx latency statistics calculation */ + ieee80211_tx_latency_start_msrmnt(local, skb); + switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: sta = rcu_dereference(sdata->u.vlan.sta); @@ -2109,7 +2134,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, */ if (head_need > 0 || skb_cloned(skb)) { - head_need += IEEE80211_ENCRYPT_HEADROOM; + head_need += sdata->encrypt_headroom; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); if (ieee80211_skb_resize(sdata, skb, head_need, true)) { @@ -2524,7 +2549,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, */ skb = dev_alloc_skb(local->tx_headroom + beacon->head_len + - beacon->tail_len + 256); + beacon->tail_len + 256 + + local->hw.extra_beacon_tailroom); if (!skb) goto out; @@ -2556,7 +2582,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, ieee80211_update_csa(sdata, presp); - skb = dev_alloc_skb(local->tx_headroom + presp->head_len); + skb = dev_alloc_skb(local->tx_headroom + presp->head_len + + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); @@ -2577,13 +2604,13 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, ieee80211_update_csa(sdata, bcn); if (ifmsh->sync_ops) - ifmsh->sync_ops->adjust_tbtt( - sdata); + ifmsh->sync_ops->adjust_tbtt(sdata, bcn); skb = dev_alloc_skb(local->tx_headroom + bcn->head_len + 256 + /* TIM IE */ - bcn->tail_len); + bcn->tail_len + + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 592a181..591b46b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -642,6 +642,17 @@ void ieee80211_iterate_active_interfaces_rtnl( } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl); +struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + if (!ieee80211_sdata_running(sdata) || + !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) + return NULL; + return &sdata->vif; +} +EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif); + /* * Nothing should have been stuffed into the workqueue during * the suspend->resume cycle. If this WARN is seen then there @@ -1451,6 +1462,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct sta_info *sta; int res, i; bool reconfig_due_to_wowlan = false; + struct ieee80211_sub_if_data *sched_scan_sdata; + bool sched_scan_stopped = false; #ifdef CONFIG_PM if (local->suspended) @@ -1754,6 +1767,27 @@ int ieee80211_reconfig(struct ieee80211_local *local) #else WARN_ON(1); #endif + + /* + * Reconfigure sched scan if it was interrupted by FW restart or + * suspend. + */ + mutex_lock(&local->mtx); + sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, + lockdep_is_held(&local->mtx)); + if (sched_scan_sdata && local->sched_scan_req) + /* + * Sched scan stopped, but we don't want to report it. Instead, + * we're trying to reschedule. + */ + if (__ieee80211_request_sched_scan_start(sched_scan_sdata, + local->sched_scan_req)) + sched_scan_stopped = true; + mutex_unlock(&local->mtx); + + if (sched_scan_stopped) + cfg80211_sched_scan_stopped(local->hw.wiphy); + return 0; } @@ -1804,6 +1838,26 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } +void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_chanctx *chanctx; + + mutex_lock(&local->chanctx_mtx); + + chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + + if (WARN_ON_ONCE(!chanctx_conf)) + goto unlock; + + chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); + ieee80211_recalc_chanctx_min_def(local, chanctx); + unlock: + mutex_unlock(&local->chanctx_mtx); +} + static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) { int i; @@ -2259,14 +2313,17 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + struct cfg80211_chan_def chandef; mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { + chandef = sdata->vif.bss_conf.chandef; ieee80211_vif_release_channel(sdata); cfg80211_cac_event(sdata->dev, + &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); } @@ -2278,17 +2335,15 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = local->hw.conf.chandef; ieee80211_dfs_cac_cancel(local); if (local->use_chanctx) /* currently not handled */ WARN_ON(1); - else { - chandef = local->hw.conf.chandef; + else cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); - } } void ieee80211_radar_detected(struct ieee80211_hw *hw) @@ -2447,7 +2502,6 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, if (ieee80211_vif_is_mesh(&sdata->vif)) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - __le16 pre_value; skb_put(skb, 8); *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; /* EID */ @@ -2459,16 +2513,78 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); /* Reason Cd */ pos += 2; - if (!ifmsh->pre_value) - ifmsh->pre_value = 1; - else - ifmsh->pre_value++; - pre_value = cpu_to_le16(ifmsh->pre_value); - memcpy(pos, &pre_value, 2); /* Precedence Value */ + put_unaligned_le16(ifmsh->pre_value, pos);/* Precedence Value */ pos += 2; - ifmsh->chsw_init = true; } ieee80211_tx_skb(sdata, skb); return 0; } + +bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs) +{ + return !(cs == NULL || cs->cipher == 0 || + cs->hdr_len < cs->pn_len + cs->pn_off || + cs->hdr_len <= cs->key_idx_off || + cs->key_idx_shift > 7 || + cs->key_idx_mask == 0); +} + +bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n) +{ + int i; + + /* Ensure we have enough iftype bitmap space for all iftype values */ + WARN_ON((NUM_NL80211_IFTYPES / 8 + 1) > sizeof(cs[0].iftype)); + + for (i = 0; i < n; i++) + if (!ieee80211_cs_valid(&cs[i])) + return false; + + return true; +} + +const struct ieee80211_cipher_scheme * +ieee80211_cs_get(struct ieee80211_local *local, u32 cipher, + enum nl80211_iftype iftype) +{ + const struct ieee80211_cipher_scheme *l = local->hw.cipher_schemes; + int n = local->hw.n_cipher_schemes; + int i; + const struct ieee80211_cipher_scheme *cs = NULL; + + for (i = 0; i < n; i++) { + if (l[i].cipher == cipher) { + cs = &l[i]; + break; + } + } + + if (!cs || !(cs->iftype & BIT(iftype))) + return NULL; + + return cs; +} + +int ieee80211_cs_headroom(struct ieee80211_local *local, + struct cfg80211_crypto_settings *crypto, + enum nl80211_iftype iftype) +{ + const struct ieee80211_cipher_scheme *cs; + int headroom = IEEE80211_ENCRYPT_HEADROOM; + int i; + + for (i = 0; i < crypto->n_ciphers_pairwise; i++) { + cs = ieee80211_cs_get(local, crypto->ciphers_pairwise[i], + iftype); + + if (cs && headroom < cs->hdr_len) + headroom = cs->hdr_len; + } + + cs = ieee80211_cs_get(local, crypto->cipher_group, iftype); + if (cs && headroom < cs->hdr_len) + headroom = cs->hdr_len; + + return headroom; +} diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index de01127..d75f35c 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -182,16 +182,15 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_VHT_CAP_SHORT_GI_160); /* remaining ones */ - if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | - IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); - } + IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK); if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | - IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX); + IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK); if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index d657282..7313d37 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -545,6 +545,106 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +static ieee80211_tx_result +ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_key *key = tx->key; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + const struct ieee80211_cipher_scheme *cs = key->sta->cipher_scheme; + int hdrlen; + u8 *pos; + + if (info->control.hw_key && + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { + /* hwaccel has no need for preallocated head room */ + return TX_CONTINUE; + } + + if (unlikely(skb_headroom(skb) < cs->hdr_len && + pskb_expand_head(skb, cs->hdr_len, 0, GFP_ATOMIC))) + return TX_DROP; + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + pos = skb_push(skb, cs->hdr_len); + memmove(pos, pos + cs->hdr_len, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + cs->hdr_len); + + return TX_CONTINUE; +} + +static inline int ieee80211_crypto_cs_pn_compare(u8 *pn1, u8 *pn2, int len) +{ + int i; + + /* pn is little endian */ + for (i = len - 1; i >= 0; i--) { + if (pn1[i] < pn2[i]) + return -1; + else if (pn1[i] > pn2[i]) + return 1; + } + + return 0; +} + +static ieee80211_rx_result +ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx) +{ + struct ieee80211_key *key = rx->key; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + const struct ieee80211_cipher_scheme *cs = NULL; + int hdrlen = ieee80211_hdrlen(hdr->frame_control); + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + int data_len; + u8 *rx_pn; + u8 *skb_pn; + u8 qos_tid; + + if (!rx->sta || !rx->sta->cipher_scheme || + !(status->flag & RX_FLAG_DECRYPTED)) + return RX_DROP_UNUSABLE; + + if (!ieee80211_is_data(hdr->frame_control)) + return RX_CONTINUE; + + cs = rx->sta->cipher_scheme; + + data_len = rx->skb->len - hdrlen - cs->hdr_len; + + if (data_len < 0) + return RX_DROP_UNUSABLE; + + if (ieee80211_is_data_qos(hdr->frame_control)) + qos_tid = *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_TID_MASK; + else + qos_tid = 0; + + if (skb_linearize(rx->skb)) + return RX_DROP_UNUSABLE; + + hdr = (struct ieee80211_hdr *)rx->skb->data; + + rx_pn = key->u.gen.rx_pn[qos_tid]; + skb_pn = rx->skb->data + hdrlen + cs->pn_off; + + if (ieee80211_crypto_cs_pn_compare(skb_pn, rx_pn, cs->pn_len) <= 0) + return RX_DROP_UNUSABLE; + + memcpy(rx_pn, skb_pn, cs->pn_len); + + /* remove security header and MIC */ + if (pskb_trim(rx->skb, rx->skb->len - cs->mic_len)) + return RX_DROP_UNUSABLE; + + memmove(rx->skb->data + cs->hdr_len, rx->skb->data, hdrlen); + skb_pull(rx->skb, cs->hdr_len); + + return RX_CONTINUE; +} static void bip_aad(struct sk_buff *skb, u8 *aad) { @@ -685,6 +785,7 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb; struct ieee80211_tx_info *info = NULL; + ieee80211_tx_result res; skb_queue_walk(&tx->skbs, skb) { info = IEEE80211_SKB_CB(skb); @@ -692,9 +793,24 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) /* handle hw-only algorithm */ if (!info->control.hw_key) return TX_DROP; + + if (tx->key->sta->cipher_scheme) { + res = ieee80211_crypto_cs_encrypt(tx, skb); + if (res != TX_CONTINUE) + return res; + } } ieee80211_tx_set_protected(tx); return TX_CONTINUE; } + +ieee80211_rx_result +ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx) +{ + if (rx->sta->cipher_scheme) + return ieee80211_crypto_cs_decrypt(rx); + + return RX_DROP_UNUSABLE; +} diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index 07e33f8..62e5a12 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -34,5 +34,7 @@ ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx); ieee80211_tx_result ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx); +ieee80211_rx_result +ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx); #endif /* WPA_H */ diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index e24bcf9..372d8a2 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -444,8 +444,8 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) case IEEE802154_FC_TYPE_DATA: return mac802154_process_data(sdata->dev, skb); default: - pr_warning("ieee802154: bad frame received (type = %d)\n", - mac_cb_type(skb)); + pr_warn("ieee802154: bad frame received (type = %d)\n", + mac_cb_type(skb)); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c3398cd..c3b3b26 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -414,47 +414,104 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK config NF_TABLES - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK tristate "Netfilter nf_tables support" + help + nftables is the new packet classification framework that intends to + replace the existing {ip,ip6,arp,eb}_tables infrastructure. It + provides a pseudo-state machine with an extensible instruction-set + (also known as expressions) that the userspace 'nft' utility + (http://www.netfilter.org/projects/nftables) uses to build the + rule-set. It also comes with the generic set infrastructure that + allows you to construct mappings between matchings and actions + for performance lookups. + + To compile it as a module, choose M here. config NFT_EXTHDR depends on NF_TABLES tristate "Netfilter nf_tables IPv6 exthdr module" + help + This option adds the "exthdr" expression that you can use to match + IPv6 extension headers. config NFT_META depends on NF_TABLES tristate "Netfilter nf_tables meta module" + help + This option adds the "meta" expression that you can use to match and + to set packet metainformation such as the packet mark. config NFT_CT depends on NF_TABLES depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" + help + This option adds the "meta" expression that you can use to match + connection tracking information such as the flow state. config NFT_RBTREE depends on NF_TABLES tristate "Netfilter nf_tables rbtree set module" + help + This option adds the "rbtree" set type (Red Black tree) that is used + to build interval-based sets. config NFT_HASH depends on NF_TABLES tristate "Netfilter nf_tables hash set module" + help + This option adds the "hash" set type that is used to build one-way + mappings between matchings and actions. config NFT_COUNTER depends on NF_TABLES tristate "Netfilter nf_tables counter module" + help + This option adds the "counter" expression that you can use to + include packet and byte counters in a rule. config NFT_LOG depends on NF_TABLES tristate "Netfilter nf_tables log module" + help + This option adds the "log" expression that you can use to log + packets matching some criteria. config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" + help + This option adds the "limit" expression that you can use to + ratelimit rule matchings. config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables nat module" + help + This option adds the "nat" expression that you can use to perform + typical Network Address Translation (NAT) packet transformations. + +config NFT_QUEUE + depends on NF_TABLES + depends on NETFILTER_XTABLES + depends on NETFILTER_NETLINK_QUEUE + tristate "Netfilter nf_tables queue module" + help + This is required if you intend to use the userspace queueing + infrastructure (also known as NFQUEUE) from nftables. + +config NFT_REJECT + depends on NF_TABLES + depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6 + default m if NETFILTER_ADVANCED=n + tristate "Netfilter nf_tables reject support" + help + This option adds the "reject" expression that you can use to + explicitly deny and notify via TCP reset/ICMP informational errors + unallowed traffic. config NFT_COMPAT depends on NF_TABLES @@ -858,6 +915,16 @@ config NETFILTER_XT_MATCH_BPF To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CGROUP + tristate '"control group" match support' + depends on NETFILTER_ADVANCED + depends on CGROUPS + select CGROUP_NET_CLASSID + ---help--- + Socket/process control group matching allows you to match locally + generated packets based on which net_cls control group processes + belong to. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK @@ -1035,6 +1102,15 @@ config NETFILTER_XT_MATCH_HL in the IPv6 header, or the time-to-live field in the IPv4 header of the packet. +config NETFILTER_XT_MATCH_IPCOMP + tristate '"ipcomp" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of CPIs(16 bits) + inside IPComp header of IPSec packets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 394483b..78b4e1c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -76,7 +76,8 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o -#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_QUEUE) += nft_queue.o +obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o @@ -133,6 +134,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o @@ -142,6 +144,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bac7e01..de770ec 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -625,34 +625,6 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); */ /* - * Find set by name, reference it once. The reference makes sure the - * thing pointed to, does not go away under our feet. - * - * The nfnl mutex is used in the function. - */ -ip_set_id_t -ip_set_nfnl_get(struct net *net, const char *name) -{ - ip_set_id_t i, index = IPSET_INVALID_ID; - struct ip_set *s; - struct ip_set_net *inst = ip_set_pernet(net); - - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); - if (s != NULL && STREQ(s->name, name)) { - __ip_set_get(s); - index = i; - break; - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); - - return index; -} -EXPORT_SYMBOL_GPL(ip_set_nfnl_get); - -/* * Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 2bc2dec..6226803 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -59,7 +59,7 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, u32 *multi) { return ip1->ipcmp == ip2->ipcmp && - ip2->ccmp == ip2->ccmp; + ip1->ccmp == ip2->ccmp; } static inline int diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index c8beafd..5882bbf 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -19,8 +19,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * * Authors: @@ -63,6 +62,7 @@ #include <net/ip_vs.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -97,6 +97,11 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return; + /* Applications may adjust TCP seqs */ + if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && + !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) + return; + /* * The connection is not yet in the hashtable, so we update it. * CIP->VIP will remain the same, so leave the tuple in diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f63c238..db80126 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1637,7 +1637,10 @@ static int sync_thread_master(void *data) continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { - int ret = __wait_event_interruptible(*sk_sleep(sk), + /* (Ab)use interruptible sleep to avoid increasing + * the load avg. + */ + __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || kthread_should_stop()); if (unlikely(kthread_should_stop())) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 43549eb..8824ed0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -60,12 +60,6 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); -int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); -EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); - DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -361,15 +355,6 @@ begin: return NULL; } -struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) -{ - return ____nf_conntrack_find(net, zone, tuple, - hash_conntrack_raw(tuple, zone)); -} -EXPORT_SYMBOL_GPL(__nf_conntrack_find); - /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * __nf_conntrack_find_get(struct net *net, u16 zone, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 08870b8..bb322d0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2118,8 +2118,16 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + if (cda[CTA_MARK]) { + u32 mask = 0, mark, newmark; + if (cda[CTA_MARK_MASK]) + mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + mark = ntohl(nla_get_be32(cda[CTA_MARK])); + newmark = (ct->mark & mask) ^ mark; + if (newmark != ct->mark) + ct->mark = newmark; + } #endif return 0; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index ce30041..b65d586 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -92,12 +92,6 @@ nf_ct_l3proto_find_get(u_int16_t l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); -void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_put); - int nf_ct_l3proto_try_module_get(unsigned short l3proto) { diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 17c1bcb..b2d38da 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -36,6 +36,11 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, if (off == 0) return 0; + if (unlikely(!seqadj)) { + WARN(1, "Wrong seqadj usage, missing nfct_seqadj_ext_add()\n"); + return 0; + } + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); spin_lock_bh(&ct->lock); diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 902fb0a..7a394df 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -97,7 +97,6 @@ int nf_conntrack_tstamp_pernet_init(struct net *net) void nf_conntrack_tstamp_pernet_fini(struct net *net) { nf_conntrack_tstamp_fini_sysctl(net); - nf_ct_extend_unregister(&tstamp_extend); } int nf_conntrack_tstamp_init(void) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 63a8154..d3f5cd6 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -315,7 +315,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * manips not an issue. */ if (maniptype == NF_NAT_MANIP_SRC && - !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(l3proto, l4proto, orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -339,7 +339,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (l4proto->in_range(tuple, maniptype, &range->min_proto, diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index 9baaf73..83a72a2 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -74,22 +74,24 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, range_size = ntohs(range->max_proto.all) - min + 1; } - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC ? tuple->dst.u.all : tuple->src.u.all); - else + } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { + off = prandom_u32(); + } else { off = *rover; + } for (i = 0; ; ++off) { *portptr = htons(min + off % range_size); if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) *rover = off; return; } - return; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dcddc49..1fcef1e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -180,7 +180,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->res_id = 0; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -312,6 +313,9 @@ static int nf_tables_table_enable(struct nft_table *table) int err, i = 0; list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + err = nf_register_hook(&nft_base_chain(chain)->ops); if (err < 0) goto err; @@ -321,6 +325,9 @@ static int nf_tables_table_enable(struct nft_table *table) return 0; err: list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + if (i-- <= 0) break; @@ -333,8 +340,10 @@ static int nf_tables_table_disable(struct nft_table *table) { struct nft_chain *chain; - list_for_each_entry(chain, &table->chains, list) - nf_unregister_hook(&nft_base_chain(chain)->ops); + list_for_each_entry(chain, &table->chains, list) { + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + } return 0; } @@ -1717,6 +1726,19 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) return -ENOENT; } +static int nf_table_delrule_by_chain(struct nft_ctx *ctx) +{ + struct nft_rule *rule; + int err; + + list_for_each_entry(rule, &ctx->chain->rules, list) { + err = nf_tables_delrule_one(ctx, rule); + if (err < 0) + return err; + } + return 0; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1725,8 +1747,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct net *net = sock_net(skb->sk); const struct nft_table *table; - struct nft_chain *chain; - struct nft_rule *rule, *tmp; + struct nft_chain *chain = NULL; + struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; struct nft_ctx ctx; @@ -1738,22 +1760,29 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); - if (IS_ERR(chain)) - return PTR_ERR(chain); + if (nla[NFTA_RULE_CHAIN]) { + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - if (nla[NFTA_RULE_HANDLE]) { - rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); - if (IS_ERR(rule)) - return PTR_ERR(rule); + if (chain) { + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, + nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); - err = nf_tables_delrule_one(&ctx, rule); - } else { - /* Remove all rules in this chain */ - list_for_each_entry_safe(rule, tmp, &chain->rules, list) { err = nf_tables_delrule_one(&ctx, rule); + } else { + err = nf_table_delrule_by_chain(&ctx); + } + } else { + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + err = nf_table_delrule_by_chain(&ctx); if (err < 0) break; } @@ -1903,12 +1932,14 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, { struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + const struct nft_af_info *afi = NULL; const struct nft_table *table = NULL; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); + if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + } if (nla[NFTA_SET_TABLE] != NULL) { table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); @@ -1953,11 +1984,14 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, return -ENOMEM; list_for_each_entry(i, &ctx->table->sets, list) { - if (!sscanf(i->name, name, &n)) + int tmp; + + if (!sscanf(i->name, name, &tmp)) continue; - if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE) continue; - set_bit(n, inuse); + + set_bit(tmp, inuse); } n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); @@ -2074,21 +2108,25 @@ done: return skb->len; } -static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) +static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) { const struct nft_set *set; - unsigned int idx = 0, s_idx = cb->args[0]; + unsigned int idx, s_idx = cb->args[0]; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; if (cb->args[1]) return skb->len; list_for_each_entry(table, &ctx->afi->tables, list) { - if (cur_table && cur_table != table) - continue; + if (cur_table) { + if (cur_table != table) + continue; + cur_table = NULL; + } ctx->table = table; + idx = 0; list_for_each_entry(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; @@ -2107,6 +2145,61 @@ done: return skb->len; } +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx, s_idx = cb->args[0]; + const struct nft_af_info *afi; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + struct net *net = sock_net(skb->sk); + int cur_family = cb->args[3]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (cur_family) { + if (afi->family != cur_family) + continue; + + cur_family = 0; + } + + list_for_each_entry(table, &afi->tables, list) { + if (cur_table) { + if (cur_table != table) + continue; + + cur_table = NULL; + } + + ctx->table = table; + ctx->afi = afi; + idx = 0; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, + NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + cb->args[3] = afi->family; + goto done; + } +cont: + idx++; + } + if (s_idx) + s_idx = 0; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); @@ -2123,9 +2216,12 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - if (ctx.table == NULL) - ret = nf_tables_dump_sets_all(&ctx, skb, cb); - else + if (ctx.table == NULL) { + if (ctx.afi == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_family(&ctx, skb, cb); + } else ret = nf_tables_dump_sets_table(&ctx, skb, cb); return ret; @@ -2138,6 +2234,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); int err; /* Verify existance before starting dump */ @@ -2152,6 +2249,10 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } + /* Only accept unspec with dump */ + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2321,6 +2422,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_set *set; struct nft_ctx ctx; int err; @@ -2332,6 +2434,9 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2350,7 +2455,9 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, + set->dtype == NFT_DATA_VERDICT ? + NFT_DATA_VERDICT : NFT_DATA_VALUE); } int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, @@ -2501,9 +2608,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) u32 portid, seq; int event, err; - nfmsg = nlmsg_data(cb->nlh); - err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, - nft_set_elem_list_policy); + err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, + NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index cb9e685..e8fcc34 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -164,7 +164,7 @@ next_rule: break; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: @@ -172,6 +172,9 @@ next_rule: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); return data[NFT_REG_VERDICT].verdict; + } + + switch (data[NFT_REG_VERDICT].verdict) { case NFT_JUMP: if (unlikely(pkt->skb->nf_trace)) nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 3c4b69e..d292c8d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -28,8 +28,6 @@ #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/list.h> -#include <linux/jhash.h> -#include <linux/random.h> #include <linux/slab.h> #include <net/sock.h> #include <net/netfilter/nf_log.h> @@ -75,7 +73,6 @@ struct nfulnl_instance { }; #define INSTANCE_BUCKETS 16 -static unsigned int hash_init; static int nfnl_log_net_id __read_mostly; @@ -1053,6 +1050,7 @@ static void __net_exit nfnl_log_net_exit(struct net *net) #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); #endif + nf_log_unset(net, &nfulnl_logger); } static struct pernet_operations nfnl_log_net_ops = { @@ -1066,11 +1064,6 @@ static int __init nfnetlink_log_init(void) { int status = -ENOMEM; - /* it's not really all that important to have a random value, so - * we can do this from the init function, even if there hasn't - * been that much entropy yet */ - get_random_bytes(&hash_init, sizeof(hash_init)); - netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 615ee12..f072fe8 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -29,6 +29,7 @@ #include <linux/netfilter/nfnetlink_queue.h> #include <linux/list.h> #include <net/sock.h> +#include <net/tcp_states.h> #include <net/netfilter/nf_queue.h> #include <net/netns/generic.h> #include <net/netfilter/nfnetlink_queue.h> @@ -252,6 +253,31 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0; } +static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) +{ + const struct cred *cred; + + if (sk->sk_state == TCP_TIME_WAIT) + return 0; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + cred = sk->sk_socket->file->f_cred; + if (nla_put_be32(skb, NFQA_UID, + htonl(from_kuid_munged(&init_user_ns, cred->fsuid)))) + goto nla_put_failure; + if (nla_put_be32(skb, NFQA_GID, + htonl(from_kgid_munged(&init_user_ns, cred->fsgid)))) + goto nla_put_failure; + } + read_unlock_bh(&sk->sk_callback_lock); + return 0; + +nla_put_failure: + read_unlock_bh(&sk->sk_callback_lock); + return -1; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -321,6 +347,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); + if (queue->flags & NFQA_CFG_F_UID_GID) { + size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + + nla_total_size(sizeof(u_int32_t))); /* gid */ + } + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) @@ -433,6 +464,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; } + if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk && + nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) + goto nla_put_failure; + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 8e0bb75..55c939f 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -31,7 +31,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr, { struct nft_exthdr *priv = nft_expr_priv(expr); struct nft_data *dest = &data[priv->dreg]; - unsigned int offset; + unsigned int offset = 0; int err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3d3f8fc..6aae699 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -164,7 +164,7 @@ static int nft_hash_init(const struct nft_set *set, unsigned int cnt, i; if (unlikely(!nft_hash_rnd_initted)) { - get_random_bytes(&nft_hash_rnd, 4); + nft_hash_rnd = prandom_u32(); nft_hash_rnd_initted = true; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 8c28220..1ceaaa6 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -21,12 +21,15 @@ struct nft_meta { enum nft_meta_keys key:8; - enum nft_registers dreg:8; + union { + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; }; -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -132,23 +135,50 @@ err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 value = data[meta->sreg].data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = value; + break; + case NFT_META_PRIORITY: + skb->priority = value; + break; + case NFT_META_NFTRACE: + skb->nf_trace = 1; + break; + default: + WARN_ON(1); + } +} + static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, + [NFTA_META_SREG] = { .type = NLA_U32 }, }; -static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_meta_init_validate_set(uint32_t key) { - struct nft_meta *priv = nft_expr_priv(expr); - int err; - - if (tb[NFTA_META_DREG] == NULL || - tb[NFTA_META_KEY] == NULL) - return -EINVAL; + switch (key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: + return 0; + default: + return -EOPNOTSUPP; + } +} - priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (priv->key) { +static int nft_meta_init_validate_get(uint32_t key) +{ + switch (key) { case NFT_META_LEN: case NFT_META_PROTOCOL: case NFT_META_PRIORITY: @@ -167,26 +197,69 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif - break; + return 0; default: return -EOPNOTSUPP; } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); +} + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + + if (tb[NFTA_META_DREG]) { + err = nft_meta_init_validate_get(priv->key); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + return nft_validate_data_load(ctx, priv->dreg, NULL, + NFT_DATA_VALUE); + } + + err = nft_meta_init_validate_set(priv->key); if (err < 0) return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); + + return 0; } -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg))) + goto nla_put_failure; + return 0; nla_put_failure: @@ -194,17 +267,44 @@ nla_put_failure: } static struct nft_expr_type nft_meta_type; -static const struct nft_expr_ops nft_meta_ops = { +static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_eval, + .eval = nft_meta_get_eval, .init = nft_meta_init, - .dump = nft_meta_dump, + .dump = nft_meta_get_dump, }; +static const struct nft_expr_ops nft_meta_set_ops = { + .type = &nft_meta_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_init, + .dump = nft_meta_set_dump, +}; + +static const struct nft_expr_ops * +nft_meta_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_set_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", - .ops = &nft_meta_ops, + .select_ops = &nft_meta_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c deleted file mode 100644 index 71177df..0000000 --- a/net/netfilter/nft_meta_target.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/rbtree.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> - -struct nft_meta { - enum nft_meta_keys key; -}; - -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data *nfres, - struct nft_data *data, - const struct nft_pktinfo *pkt) -{ - const struct nft_meta *meta = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; - u32 val = data->data[0]; - - switch (meta->key) { - case NFT_META_MARK: - skb->mark = val; - break; - case NFT_META_PRIORITY: - skb->priority = val; - break; - case NFT_META_NFTRACE: - skb->nf_trace = val; - break; -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: - skb->secmark = val; - break; -#endif - default: - WARN_ON(1); - } -} - -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { - [NFTA_META_KEY] = { .type = NLA_U32 }, -}; - -static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - if (tb[NFTA_META_KEY] == NULL) - return -EINVAL; - - meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (meta->key) { - case NFT_META_MARK: - case NFT_META_PRIORITY: - case NFT_META_NFTRACE: -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: -#endif - break; - default: - return -EINVAL; - } - - return 0; -} - -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_ops meta_target __read_mostly = { - .name = "meta", - .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .owner = THIS_MODULE, - .eval = nft_meta_eval, - .init = nft_meta_init, - .dump = nft_meta_dump, - .policy = nft_meta_policy, - .maxattr = NFTA_META_MAX, -}; - -static int __init nft_meta_target_init(void) -{ - return nft_register_expr(&meta_target); -} - -static void __exit nft_meta_target_exit(void) -{ - nft_unregister_expr(&meta_target); -} - -module_init(nft_meta_target_init); -module_exit(nft_meta_target_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c new file mode 100644 index 0000000..cbea473 --- /dev/null +++ b/net/netfilter/nft_queue.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code partly funded by OISF + * (http://www.openinfosecfoundation.org/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/jhash.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_queue.h> + +static u32 jhash_initval __read_mostly; + +struct nft_queue { + u16 queuenum; + u16 queues_total; + u16 flags; + u8 family; +}; + +static void nft_queue_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue = priv->queuenum; + u32 ret; + + if (priv->queues_total > 1) { + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) { + int cpu = smp_processor_id(); + + queue = priv->queuenum + cpu % priv->queues_total; + } else { + queue = nfqueue_hash(pkt->skb, queue, + priv->queues_total, priv->family, + jhash_initval); + } + } + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + data[NFT_REG_VERDICT].verdict = ret; +} + +static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { + [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, + [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, + [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, +}; + +static int nft_queue_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + + if (tb[NFTA_QUEUE_NUM] == NULL) + return -EINVAL; + + init_hashrandom(&jhash_initval); + priv->family = ctx->afi->family; + priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); + + if (tb[NFTA_QUEUE_TOTAL] != NULL) + priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); + if (tb[NFTA_QUEUE_FLAGS] != NULL) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + } + return 0; +} + +static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) || + nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_queue_type; +static const struct nft_expr_ops nft_queue_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_eval, + .init = nft_queue_init, + .dump = nft_queue_dump, +}; + +static struct nft_expr_type nft_queue_type __read_mostly = { + .name = "queue", + .ops = &nft_queue_ops, + .policy = nft_queue_policy, + .maxattr = NFTA_QUEUE_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_queue_module_init(void) +{ + return nft_register_expr(&nft_queue_type); +} + +static void __exit nft_queue_module_exit(void) +{ + nft_unregister_expr(&nft_queue_type); +} + +module_init(nft_queue_module_init); +module_exit(nft_queue_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Leblond <eric@regit.org>"); +MODULE_ALIAS_NFT_EXPR("queue"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/netfilter/nft_reject.c index fff5ba1..0d690d4 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/netfilter/nft_reject.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,10 +17,16 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/icmp.h> +#include <net/netfilter/ipv4/nf_reject.h> + +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) +#include <net/netfilter/ipv6/nf_reject.h> +#endif struct nft_reject { enum nft_reject_types type:8; u8 icmp_code; + u8 family; }; static void nft_reject_eval(const struct nft_expr *expr, @@ -27,12 +34,25 @@ static void nft_reject_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + if (priv->family == NFPROTO_IPV4) + nf_send_unreach(pkt->skb, priv->icmp_code); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->hooknum); +#endif break; case NFT_REJECT_TCP_RST: + if (priv->family == NFPROTO_IPV4) + nf_send_reset(pkt->skb, pkt->hooknum); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_reset6(net, pkt->skb, pkt->hooknum); +#endif break; } @@ -53,6 +73,7 @@ static int nft_reject_init(const struct nft_ctx *ctx, if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; + priv->family = ctx->afi->family; priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: @@ -72,7 +93,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_reject *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type)) + if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) goto nla_put_failure; switch (priv->type) { diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index da35ac0..5929be6 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -211,8 +211,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, ret = 0; if ((info->ct_events || info->exp_events) && !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, - GFP_KERNEL)) + GFP_KERNEL)) { + ret = -EINVAL; goto err3; + } if (info->helper[0]) { ret = xt_ct_set_helper(ct, info->helper, par); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index ed00fef..8f1779f 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,15 +11,13 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/jhash.h> - #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFQUEUE.h> +#include <net/netfilter/nf_queue.h> + MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet forwarding to netlink"); MODULE_LICENSE("GPL"); @@ -28,7 +26,6 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; -static bool rnd_inited __read_mostly; static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) @@ -38,69 +35,16 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_QUEUE_NR(tinfo->queuenum); } -static u32 hash_v4(const struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - /* packets in either direction go into same queue */ - if ((__force u32)iph->saddr < (__force u32)iph->daddr) - return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); - - return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); -} - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -static u32 hash_v6(const struct sk_buff *skb) -{ - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - u32 a, b, c; - - if ((__force u32)ip6h->saddr.s6_addr32[3] < - (__force u32)ip6h->daddr.s6_addr32[3]) { - a = (__force u32) ip6h->saddr.s6_addr32[3]; - b = (__force u32) ip6h->daddr.s6_addr32[3]; - } else { - b = (__force u32) ip6h->saddr.s6_addr32[3]; - a = (__force u32) ip6h->daddr.s6_addr32[3]; - } - - if ((__force u32)ip6h->saddr.s6_addr32[1] < - (__force u32)ip6h->daddr.s6_addr32[1]) - c = (__force u32) ip6h->saddr.s6_addr32[1]; - else - c = (__force u32) ip6h->daddr.s6_addr32[1]; - - return jhash_3words(a, b, c, jhash_initval); -} -#endif - -static u32 -nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct xt_NFQ_info_v1 *info = par->targinfo; - u32 queue = info->queuenum; - - if (par->family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb) * info->queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb) * info->queues_total) >> 32; -#endif - - return queue; -} - static unsigned int nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; - if (info->queues_total > 1) - queue = nfqueue_hash(skb, par); - + if (info->queues_total > 1) { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } return NF_QUEUE_NR(queue); } @@ -120,10 +64,8 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; - if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_initval, sizeof(jhash_initval)); - rnd_inited = true; - } + init_hashrandom(&jhash_initval); + if (info->queues_total == 0) { pr_err("NFQUEUE: number of total queues is 0\n"); return -EINVAL; @@ -154,8 +96,10 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) int cpu = smp_processor_id(); queue = info->queuenum + cpu % info->queues_total; - } else - queue = nfqueue_hash(skb, par); + } else { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } } ret = NF_QUEUE_NR(queue); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 370adf6..190854b 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -100,7 +100,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) int ret; if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); + jhash_rnd = prandom_u32(); rnd_inited = true; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c new file mode 100644 index 0000000..9a8e77e7 --- /dev/null +++ b/net/netfilter/xt_cgroup.c @@ -0,0 +1,71 @@ +/* + * Xtables module to match the process control group. + * + * Might be used to implement individual "per-application" firewall + * policies in contrast to global policies based on control groups. + * Matching is based upon processes tagged to net_cls' classid marker. + * + * (C) 2013 Daniel Borkmann <dborkman@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/skbuff.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_cgroup.h> +#include <net/sock.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); +MODULE_DESCRIPTION("Xtables: process control group matching"); +MODULE_ALIAS("ipt_cgroup"); +MODULE_ALIAS("ip6t_cgroup"); + +static int cgroup_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + + return info->id ? 0 : -EINVAL; +} + +static bool +cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info *info = par->matchinfo; + + if (skb->sk == NULL) + return false; + + return (info->id == skb->sk->sk_classid) ^ info->invert; +} + +static struct xt_match cgroup_mt_reg __read_mostly = { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check, + .match = cgroup_mt, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init cgroup_mt_init(void) +{ + return xt_register_match(&cgroup_mt_reg); +} + +static void __exit cgroup_mt_exit(void) +{ + xt_unregister_match(&cgroup_mt_reg); +} + +module_init(cgroup_mt_init); +module_exit(cgroup_mt_exit); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index c40b269..7671e82 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -229,7 +229,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) u_int32_t rand; do { - get_random_bytes(&rand, sizeof(rand)); + rand = prandom_u32(); } while (!rand); cmpxchg(&connlimit_rnd, 0, rand); } diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 7278145..69f78e9 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -17,8 +17,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9ff035c..d819f62 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -177,7 +177,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (unlikely(!ht->rnd_initialized)) { - get_random_bytes(&ht->rnd, sizeof(ht->rnd)); + ht->rnd = prandom_u32(); ht->rnd_initialized = true; } @@ -325,21 +325,24 @@ static void htable_gc(unsigned long htlong) add_timer(&ht->timer); } -static void htable_destroy(struct xt_hashlimit_htable *hinfo) +static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net); struct proc_dir_entry *parent; - del_timer_sync(&hinfo->timer); - if (hinfo->family == NFPROTO_IPV4) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - if(parent != NULL) + if (parent != NULL) remove_proc_entry(hinfo->name, parent); +} +static void htable_destroy(struct xt_hashlimit_htable *hinfo) +{ + del_timer_sync(&hinfo->timer); + htable_remove_proc_entry(hinfo); htable_selective_cleanup(hinfo, select_all); kfree(hinfo->name); vfree(hinfo); @@ -883,21 +886,15 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { struct xt_hashlimit_htable *hinfo; - struct proc_dir_entry *pde; struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - /* recent_net_exit() is called before recent_mt_destroy(). Make sure - * that the parent xt_recent proc entry is is empty before trying to - * remove it. + /* hashlimit_net_exit() is called before hashlimit_mt_destroy(). + * Make sure that the parent ipt_hashlimit and ip6t_hashlimit proc + * entries is empty before trying to remove it. */ mutex_lock(&hashlimit_mutex); - pde = hashlimit_net->ipt_hashlimit; - if (pde == NULL) - pde = hashlimit_net->ip6t_hashlimit; - hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) - remove_proc_entry(hinfo->name, pde); - + htable_remove_proc_entry(hinfo); hashlimit_net->ipt_hashlimit = NULL; hashlimit_net->ip6t_hashlimit = NULL; mutex_unlock(&hashlimit_mutex); diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c new file mode 100644 index 0000000..a4c7561 --- /dev/null +++ b/net/netfilter/xt_ipcomp.c @@ -0,0 +1,111 @@ +/* Kernel module to match IPComp parameters for IPv4 and IPv6 + * + * Copyright (C) 2013 WindRiver + * + * Author: + * Fan Du <fan.du@windriver.com> + * + * Based on: + * net/netfilter/xt_esp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> + +#include <linux/netfilter/xt_ipcomp.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Fan Du <fan.du@windriver.com>"); +MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); + +/* Returns 1 if the spi is matched by the range, 0 otherwise */ +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) +{ + bool r; + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); + r = (spi >= min && spi <= max) ^ invert; + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); + return r; +} + +static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ip_comp_hdr _comphdr; + const struct ip_comp_hdr *chdr; + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr); + if (chdr == NULL) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + pr_debug("Dropping evil IPComp tinygram.\n"); + par->hotdrop = true; + return 0; + } + + return spi_match(compinfo->spis[0], compinfo->spis[1], + ntohl(chdr->cpi << 16), + !!(compinfo->invflags & XT_IPCOMP_INV_SPI)); +} + +static int comp_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must specify no unknown invflags */ + if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) { + pr_err("unknown flags %X\n", compinfo->invflags); + return -EINVAL; + } + return 0; +} + +static struct xt_match comp_mt_reg[] __read_mostly = { + { + .name = "ipcomp", + .family = NFPROTO_IPV4, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, + { + .name = "ipcomp", + .family = NFPROTO_IPV6, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, +}; + +static int __init comp_mt_init(void) +{ + return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +static void __exit comp_mt_exit(void) +{ + xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +module_init(comp_mt_init); +module_exit(comp_mt_exit); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 647d989..7174611 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -13,8 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 1e657cf..bfdc29f 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -334,7 +334,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, size_t sz; if (unlikely(!hash_rnd_inited)) { - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + hash_rnd = prandom_u32(); hash_rnd_inited = true; } if (info->check_set & ~XT_RECENT_VALID_FLAGS) { diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 6f17013..d0a3acf 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index a1287ce..d0f38bc 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 69345ce..c2f2a53 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index d24d774..8758268 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 85d842e..f0cb92f 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index b9be0ee..680caf4 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index dce1beb..3045a96 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 8ef83ee..e66e977 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 5a9f31c..8b6e1ab 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 43817d7..78a63c1 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 700af49..3a9e5dc 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 9650c4a..1e779bb 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 8196978..4a397cd 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6433489..34a656d9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -131,7 +131,7 @@ int netlink_add_tap(struct netlink_tap *nt) } EXPORT_SYMBOL_GPL(netlink_add_tap); -int __netlink_remove_tap(struct netlink_tap *nt) +static int __netlink_remove_tap(struct netlink_tap *nt) { bool found = false; struct netlink_tap *tmp; @@ -155,7 +155,6 @@ out: return found ? 0 : -ENODEV; } -EXPORT_SYMBOL_GPL(__netlink_remove_tap); int netlink_remove_tap(struct netlink_tap *nt) { @@ -204,6 +203,8 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); + nskb->pkt_type = netlink_is_kernel(sk) ? + PACKET_KERNEL : PACKET_USER; ret = dev_queue_xmit(nskb); if (unlikely(ret > 0)) @@ -239,6 +240,13 @@ static void netlink_deliver_tap(struct sk_buff *skb) rcu_read_unlock(); } +static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, + struct sk_buff *skb) +{ + if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) + netlink_deliver_tap(skb); +} + static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1697,14 +1705,10 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { - /* We could do a netlink_deliver_tap(skb) here as well - * but since this is intended for the kernel only, we - * should rather let it stay under the hood. - */ - ret = skb->len; netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; + netlink_deliver_tap_kernel(sk, ssk, skb); nlk->netlink_rcv(skb); consume_skb(skb); } else { @@ -2539,21 +2543,6 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) netlink_update_socket_mc(nlk_sk(sk), group, 0); } -/** - * netlink_clear_multicast_users - kick off multicast listeners - * - * This function removes all listeners from the given group. - * @ksk: The kernel netlink socket, as returned by - * netlink_kernel_create(). - * @group: The multicast group to clear. - */ -void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) -{ - netlink_table_grab(); - __netlink_clear_multicast_users(ksk, group); - netlink_table_ungrab(); -} - struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 85bf42e..b1dcdb9 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -74,9 +74,12 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE]; * Bit 17 is marked as already used since the VFS quota code * also abused this API and relied on family == group ID, we * cater to that by giving it a static family and group ID. + * Bit 18 is marked as already used since the PMCRAID driver + * did the same thing as the VFS quota code (maybe copied?) */ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | - BIT(GENL_ID_VFS_DQUOT); + BIT(GENL_ID_VFS_DQUOT) | + BIT(GENL_ID_PMCRAID); static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; @@ -139,6 +142,7 @@ static u16 genl_generate_id(void) for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { if (id_gen_idx != GENL_ID_VFS_DQUOT && + id_gen_idx != GENL_ID_PMCRAID && !genl_family_find_byid(id_gen_idx)) return id_gen_idx; if (++id_gen_idx > GENL_MAX_ID) @@ -214,7 +218,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) { int first_id; int n_groups = family->n_mcgrps; - int err, i; + int err = 0, i; bool groups_allocated = false; if (!n_groups) @@ -236,9 +240,12 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) } else if (strcmp(family->name, "NET_DM") == 0) { first_id = 1; BUG_ON(n_groups != 1); - } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + } else if (family->id == GENL_ID_VFS_DQUOT) { first_id = GENL_ID_VFS_DQUOT; BUG_ON(n_groups != 1); + } else if (family->id == GENL_ID_PMCRAID) { + first_id = GENL_ID_PMCRAID; + BUG_ON(n_groups != 1); } else { groups_allocated = true; err = genl_allocate_reserve_groups(n_groups, &first_id); diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 9d68441..2277276 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/nfc.h> diff --git a/net/nfc/core.c b/net/nfc/core.c index 8725291..02ab341 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 64f922b..a9f4d2e 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "hci: %s: " fmt, __func__ diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d07ca4c..3b96100 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "hci: %s: " fmt, __func__ diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index b274d12..c3d2e2c 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef __LOCAL_HCI_H diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index b6b4109..e9de151 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "hci: %s: " fmt, __func__ diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index fe5e966..a07d2b8 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <net/nfc/llc.h> diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index 7be0b7f..5dad4c5 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef __LOCAL_LLC_H_ diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c index 87b1029..d0435d5 100644 --- a/net/nfc/hci/llc_nop.c +++ b/net/nfc/hci/llc_nop.c @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/types.h> diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 27b313b..719ad0a 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "shdlc: %s: " fmt, __func__ diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index f4d48b5..de1789e 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ enum llcp_state { diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 1017894..693cd1a 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "llcp: %s: " fmt, __func__ diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 81cd341..1349074 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "llcp: %s: " fmt, __func__ diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 824c605..69fbc8d 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "llcp: %s: " fmt, __func__ diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index b943d46..f0e955e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 2a9399d..6c3aef8 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -16,8 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/nci/lib.c b/net/nfc/nci/lib.c index 6b7fd26..ed774a2 100644 --- a/net/nfc/nci/lib.c +++ b/net/nfc/nci/lib.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index b2aa98e..1e90509 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index dd072f3..041de51 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index a9b2342..ebbf6fb 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index aaf606f..9d6e74f 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef __LOCAL_NFC_H diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 66bcd2e..c27a6e8 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 65cfaa8..716b7ee 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -165,7 +165,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, } csum_replace4(&nh->check, *addr, new_addr); - skb->rxhash = 0; + skb_clear_hash(skb); *addr = new_addr; } @@ -199,7 +199,7 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, if (recalculate_csum) update_ipv6_checksum(skb, l4_proto, addr, new_addr); - skb->rxhash = 0; + skb_clear_hash(skb); memcpy(addr, new_addr, sizeof(__be32[4])); } @@ -296,7 +296,7 @@ static void set_tp_port(struct sk_buff *skb, __be16 *port, { inet_proto_csum_replace2(check, skb, *port, new_port, 0); *port = new_port; - skb->rxhash = 0; + skb_clear_hash(skb); } static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) @@ -310,7 +310,7 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) uh->check = CSUM_MANGLED_0; } else { *port = new_port; - skb->rxhash = 0; + skb_clear_hash(skb); } } @@ -381,7 +381,7 @@ static int set_sctp(struct sk_buff *skb, /* Carry any checksum errors through. */ sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - skb->rxhash = 0; + skb_clear_hash(skb); } return 0; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 51f6d7f..b430d42 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -25,7 +25,7 @@ #include <linux/if_vlan.h> #include <net/llc_pdu.h> #include <linux/kernel.h> -#include <linux/jhash.h> +#include <linux/hash.h> #include <linux/jiffies.h> #include <linux/llc.h> #include <linux/module.h> @@ -380,7 +380,7 @@ static u32 flow_hash(const struct sw_flow_key *key, int key_start, /* Make sure number of hash bytes are multiple of u32. */ BUILD_BUG_ON(sizeof(long) % sizeof(u32)); - return jhash2(hash_key, hash_u32s, 0); + return arch_fast_hash2(hash_key, hash_u32s, 0); } static int flow_key_start(const struct sw_flow_key *key) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index f28ff7e..208dd9a 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -139,14 +139,14 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); - vport->percpu_stats = alloc_percpu(struct pcpu_tstats); + vport->percpu_stats = alloc_percpu(struct pcpu_sw_netstats); if (!vport->percpu_stats) { kfree(vport); return ERR_PTR(-ENOMEM); } for_each_possible_cpu(i) { - struct pcpu_tstats *vport_stats; + struct pcpu_sw_netstats *vport_stats; vport_stats = per_cpu_ptr(vport->percpu_stats, i); u64_stats_init(&vport_stats->syncp); } @@ -278,8 +278,8 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) spin_unlock_bh(&vport->stats_lock); for_each_possible_cpu(i) { - const struct pcpu_tstats *percpu_stats; - struct pcpu_tstats local_stats; + const struct pcpu_sw_netstats *percpu_stats; + struct pcpu_sw_netstats local_stats; unsigned int start; percpu_stats = per_cpu_ptr(vport->percpu_stats, i); @@ -347,7 +347,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, struct ovs_key_ipv4_tunnel *tun_key) { - struct pcpu_tstats *stats; + struct pcpu_sw_netstats *stats; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); @@ -373,7 +373,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) int sent = vport->ops->send(vport, skb); if (likely(sent > 0)) { - struct pcpu_tstats *stats; + struct pcpu_sw_netstats *stats; stats = this_cpu_ptr(vport->percpu_stats); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 92137dd..d7e50a1 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -87,7 +87,7 @@ struct vport { struct hlist_node dp_hash_node; const struct vport_ops *ops; - struct pcpu_tstats __percpu *percpu_stats; + struct pcpu_sw_netstats __percpu *percpu_stats; spinlock_t stats_lock; struct vport_err_stats err_stats; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ac27c86..279467b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -237,6 +237,82 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +static int packet_direct_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; + netdev_features_t features; + struct netdev_queue *txq; + u16 queue_map; + int ret; + + if (unlikely(!netif_running(dev) || + !netif_carrier_ok(dev))) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + features = netif_skb_features(skb); + if (skb_needs_linearize(skb, features) && + __skb_linearize(skb)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + queue_map = skb_get_queue_mapping(skb); + txq = netdev_get_tx_queue(dev, queue_map); + + __netif_tx_lock_bh(txq); + if (unlikely(netif_xmit_frozen_or_stopped(txq))) { + ret = NETDEV_TX_BUSY; + kfree_skb(skb); + goto out; + } + + ret = ops->ndo_start_xmit(skb, dev); + if (likely(dev_xmit_complete(ret))) + txq_trans_update(txq); + else + kfree_skb(skb); +out: + __netif_tx_unlock_bh(txq); + return ret; +} + +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (likely(dev)) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + +static void packet_cached_dev_assign(struct packet_sock *po, + struct net_device *dev) +{ + rcu_assign_pointer(po->cached_dev, dev); +} + +static void packet_cached_dev_reset(struct packet_sock *po) +{ + RCU_INIT_POINTER(po->cached_dev, NULL); +} + +static bool packet_use_direct_xmit(const struct packet_sock *po) +{ + return po->xmit == packet_direct_xmit; +} + +static u16 packet_pick_tx_queue(struct net_device *dev) +{ + return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -246,12 +322,10 @@ static void register_prot_hook(struct sock *sk) struct packet_sock *po = pkt_sk(sk); if (!po->running) { - if (po->fanout) { + if (po->fanout) __fanout_link(sk, po); - } else { + else dev_add_pack(&po->prot_hook); - rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); - } sock_hold(sk); po->running = 1; @@ -270,12 +344,11 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) { + + if (po->fanout) __fanout_unlink(sk, po); - } else { + else __dev_remove_pack(&po->prot_hook); - RCU_INIT_POINTER(po->cached_dev, NULL); - } __sock_put(sk); @@ -437,11 +510,12 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, { struct tpacket_kbdq_core *pkc; - pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) : + GET_PBDQC_FROM_RB(&po->rx_ring); - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } @@ -463,7 +537,8 @@ static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) if (tx_ring) BUG(); - pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) : + GET_PBDQC_FROM_RB(&po->rx_ring); prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); } @@ -521,7 +596,7 @@ static void init_prb_bdqc(struct packet_sock *po, struct pgv *pg_vec, union tpacket_req_u *req_u, int tx_ring) { - struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; + struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); @@ -585,7 +660,7 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) static void prb_retire_rx_blk_timer_expired(unsigned long data) { struct packet_sock *po = (struct packet_sock *)data; - struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; @@ -888,7 +963,7 @@ static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { - ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); + ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); } static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, @@ -902,9 +977,11 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, { if (vlan_tx_tag_present(pkc->skb)) { ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); - ppd->tp_status = TP_STATUS_VLAN_VALID; + ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); + ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { ppd->hv1.tp_vlan_tci = 0; + ppd->hv1.tp_vlan_tpid = 0; ppd->tp_status = TP_STATUS_AVAILABLE; } } @@ -912,6 +989,7 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { + ppd->hv1.tp_padding = 0; prb_fill_vlan_info(pkc, ppd); if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) @@ -1220,7 +1298,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_rxhash(skb); + skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: @@ -1278,9 +1356,9 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } -static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) { - if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + if (ptype->af_packet_priv == (void *)((struct packet_sock *)sk)->fanout) return true; return false; @@ -1737,6 +1815,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; __u32 ts_status; + /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. + * We may add members to them until current aligned size without forcing + * userspace to call getsockopt(..., PACKET_HDRLEN, ...). + */ + BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); + BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -1843,11 +1928,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_nsec = ts.tv_nsec; if (vlan_tx_tag_present(skb)) { h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); - status |= TP_STATUS_VLAN_VALID; + h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); + status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; + h.h2->tp_vlan_tpid = 0; } - h.h2->tp_padding = 0; + memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding)); hdrlen = sizeof(*h.h2); break; case TPACKET_V3: @@ -1861,6 +1948,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h3->tp_net = netoff; h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; + memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding)); hdrlen = sizeof(*h.h3); break; default: @@ -1971,9 +2059,10 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - skb_probe_transport_header(skb, 0); - if (po->tp_tx_has_off) { + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, 0); + if (unlikely(po->tp_tx_has_off)) { int off_min, off_max, off; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); off_max = po->tx_ring.frame_size - tp_len; @@ -2059,19 +2148,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } -static struct net_device *packet_cached_dev_get(struct packet_sock *po) -{ - struct net_device *dev; - - rcu_read_lock(); - dev = rcu_dereference(po->cached_dev); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - - return dev; -} - static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; @@ -2088,7 +2164,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2156,12 +2232,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } + skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); atomic_inc(&po->tx_ring.pending); status = TP_STATUS_SEND_REQUEST; - err = dev_queue_xmit(skb); + err = po->xmit(skb); if (unlikely(err > 0)) { err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == @@ -2220,8 +2297,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, return skb; } -static int packet_snd(struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2242,7 +2318,7 @@ static int packet_snd(struct socket *sock, * Get and verify the address. */ - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2366,6 +2442,7 @@ static int packet_snd(struct socket *sock, skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; + skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); if (po->has_vnet_hdr) { if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { @@ -2386,16 +2463,12 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } - skb_probe_transport_header(skb, reserve); - + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, reserve); if (unlikely(extra_len == 4)) skb->no_fcs = 1; - /* - * Now send it - */ - - err = dev_queue_xmit(skb); + err = po->xmit(skb); if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; @@ -2417,6 +2490,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); + if (po->tx_ring.pg_vec) return tpacket_snd(po, msg); else @@ -2451,6 +2525,8 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + packet_cached_dev_reset(po); + if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -2506,14 +2582,17 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc spin_lock(&po->bind_lock); unregister_prot_hook(sk, true); + po->num = protocol; po->prot_hook.type = protocol; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; + po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + if (protocol == 0) goto out_unlock; @@ -2626,7 +2705,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; - RCU_INIT_POINTER(po->cached_dev, NULL); + po->xmit = dev_queue_xmit; + + packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -2799,11 +2880,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_net = skb_network_offset(skb); if (vlan_tx_tag_present(skb)) { aux.tp_vlan_tci = vlan_tx_tag_get(skb); - aux.tp_status |= TP_STATUS_VLAN_VALID; + aux.tp_vlan_tpid = ntohs(skb->vlan_proto); + aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { aux.tp_vlan_tci = 0; + aux.tp_vlan_tpid = 0; } - aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -3204,6 +3286,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->tp_tx_has_off = !!val; return 0; } + case PACKET_QDISC_BYPASS: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->xmit = val ? packet_direct_xmit : dev_queue_xmit; + return 0; + } default: return -ENOPROTOOPT; } @@ -3296,6 +3390,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; break; + case PACKET_QDISC_BYPASS: + val = packet_use_direct_xmit(po); + break; default: return -ENOPROTOOPT; } @@ -3337,6 +3434,7 @@ static int packet_notifier(struct notifier_block *this, sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); diff --git a/net/packet/internal.h b/net/packet/internal.h index 1035fa2..0a87d7b 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -114,6 +114,7 @@ struct packet_sock { unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct net_device __rcu *cached_dev; + int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/rds/ib.c b/net/rds/ib.c index b4c8b00..ba2dffe 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support iWARP devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index e590949..37be6e2 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); scat = &rm->data.op_sg[sg]; - ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; - ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); - return ret; + ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length); + return sizeof(struct rds_header) + ret; } /* FIXME we may overallocate here */ diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 1bacc10..ed7e0b4 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -14,9 +14,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/kernel.h> diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 33af772..d080eb4 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1012,7 +1012,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros make_rose->source_call = facilities.source_call; make_rose->source_ndigis = facilities.source_ndigis; for (n = 0 ; n < facilities.source_ndigis ; n++) - make_rose->source_digis[n]= facilities.source_digis[n]; + make_rose->source_digis[n] = facilities.source_digis[n]; make_rose->neighbour = neigh; make_rose->device = dev; make_rose->facilities = facilities; @@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_name) { struct sockaddr_rose *srose; + struct full_sockaddr_rose *full_srose = msg->msg_name; memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); srose = msg->msg_name; @@ -1260,18 +1261,9 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; - if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name; - for (n = 0 ; n < rose->dest_ndigis ; n++) - full_srose->srose_digis[n] = rose->dest_digis[n]; - msg->msg_namelen = sizeof(struct full_sockaddr_rose); - } else { - if (rose->dest_ndigis >= 1) { - srose->srose_ndigis = 1; - srose->srose_digi = rose->dest_digis[0]; - } - msg->msg_namelen = sizeof(struct sockaddr_rose); - } + for (n = 0 ; n < rose->dest_ndigis ; n++) + full_srose->srose_digis[n] = rose->dest_digis[n]; + msg->msg_namelen = sizeof(struct full_sockaddr_rose); } skb_free_datagram(sk, skb); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 28dbdb9..5000588 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -146,7 +146,7 @@ static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) static const struct header_ops rose_header_ops = { .create = rose_header, - .rebuild= rose_rebuild_header, + .rebuild = rose_rebuild_header, }; static const struct net_device_ops rose_netdev_ops = { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ad1f1d8..a1a8e29 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -286,6 +286,28 @@ config NET_SCH_FQ If unsure, say N. +config NET_SCH_HHF + tristate "Heavy-Hitter Filter (HHF)" + help + Say Y here if you want to use the Heavy-Hitter Filter (HHF) + packet scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_hhf. + +config NET_SCH_PIE + tristate "Proportional Integral controller Enhanced (PIE) scheduler" + help + Say Y here if you want to use the Proportional Integral controller + Enhanced scheduler packet scheduling algorithm. + For more information, please see + http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + + To compile this driver as a module, choose M here: the module + will be called sch_pie. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT @@ -435,6 +457,7 @@ config NET_CLS_FLOW config NET_CLS_CGROUP tristate "Control Group Classifier" select NET_CLS + select CGROUP_NET_CLASSID depends on CGROUPS ---help--- Say Y here if you want to classify packets based on the control diff --git a/net/sched/Makefile b/net/sched/Makefile index 35fa47a..0a869a1 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -40,6 +40,8 @@ obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o +obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o +obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fd70728..f63e146 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -29,25 +29,16 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) { - unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); - struct tcf_common **p1p; - - for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) { - if (*p1p == p) { - write_lock_bh(hinfo->lock); - *p1p = p->tcfc_next; - write_unlock_bh(hinfo->lock); - gen_kill_estimator(&p->tcfc_bstats, - &p->tcfc_rate_est); - /* - * gen_estimator est_timer() might access p->tcfc_lock - * or bstats, wait a RCU grace period before freeing p - */ - kfree_rcu(p, tcfc_rcu); - return; - } - } - WARN_ON(1); + spin_lock_bh(&hinfo->lock); + hlist_del(&p->tcfc_head); + spin_unlock_bh(&hinfo->lock); + gen_kill_estimator(&p->tcfc_bstats, + &p->tcfc_rate_est); + /* + * gen_estimator est_timer() might access p->tcfc_lock + * or bstats, wait a RCU grace period before freeing p + */ + kfree_rcu(p, tcfc_rcu); } EXPORT_SYMBOL(tcf_hash_destroy); @@ -73,18 +64,19 @@ EXPORT_SYMBOL(tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a, struct tcf_hashinfo *hinfo) { + struct hlist_head *head; struct tcf_common *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct nlattr *nest; - read_lock_bh(hinfo->lock); + spin_lock_bh(&hinfo->lock); s_i = cb->args[0]; for (i = 0; i < (hinfo->hmask + 1); i++) { - p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - for (; p; p = p->tcfc_next) { + hlist_for_each_entry_rcu(p, head, tcfc_head) { index++; if (index < s_i) continue; @@ -107,7 +99,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, } } done: - read_unlock_bh(hinfo->lock); + spin_unlock_bh(&hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -120,7 +112,9 @@ nla_put_failure: static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, struct tcf_hashinfo *hinfo) { - struct tcf_common *p, *s_p; + struct hlist_head *head; + struct hlist_node *n; + struct tcf_common *p; struct nlattr *nest; int i = 0, n_i = 0; @@ -130,14 +124,12 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, if (nla_put_string(skb, TCA_KIND, a->ops->kind)) goto nla_put_failure; for (i = 0; i < (hinfo->hmask + 1); i++) { - p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - while (p != NULL) { - s_p = p->tcfc_next; - if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) + head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; + hlist_for_each_entry_safe(p, n, head, tcfc_head) { + if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) { module_put(a->ops->owner); - n_i++; - p = s_p; + n_i++; + } } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -150,8 +142,8 @@ nla_put_failure: return -EINVAL; } -int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, - int type, struct tc_action *a) +static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a) { struct tcf_hashinfo *hinfo = a->ops->hinfo; @@ -164,19 +156,18 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, return -EINVAL; } } -EXPORT_SYMBOL(tcf_generic_walker); struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) { - struct tcf_common *p; + struct tcf_common *p = NULL; + struct hlist_head *head; - read_lock_bh(hinfo->lock); - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; - p = p->tcfc_next) { + spin_lock_bh(&hinfo->lock); + head = &hinfo->htab[tcf_hash(index, hinfo->hmask)]; + hlist_for_each_entry_rcu(p, head, tcfc_head) if (p->tcfc_index == index) break; - } - read_unlock_bh(hinfo->lock); + spin_unlock_bh(&hinfo->lock); return p; } @@ -191,11 +182,12 @@ u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) val = 1; } while (tcf_hash_lookup(val, hinfo)); - return (*idx_gen = val); + *idx_gen = val; + return val; } EXPORT_SYMBOL(tcf_hash_new_index); -int tcf_hash_search(struct tc_action *a, u32 index) +static int tcf_hash_search(struct tc_action *a, u32 index) { struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = tcf_hash_lookup(index, hinfo); @@ -206,7 +198,6 @@ int tcf_hash_search(struct tc_action *a, u32 index) } return 0; } -EXPORT_SYMBOL(tcf_hash_search); struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, struct tcf_hashinfo *hinfo) @@ -235,6 +226,7 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, p->tcfc_bindcnt = 1; spin_lock_init(&p->tcfc_lock); + INIT_HLIST_NODE(&p->tcfc_head); p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; @@ -256,29 +248,37 @@ void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) { unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); - write_lock_bh(hinfo->lock); - p->tcfc_next = hinfo->htab[h]; - hinfo->htab[h] = p; - write_unlock_bh(hinfo->lock); + spin_lock_bh(&hinfo->lock); + hlist_add_head(&p->tcfc_head, &hinfo->htab[h]); + spin_unlock_bh(&hinfo->lock); } EXPORT_SYMBOL(tcf_hash_insert); -static struct tc_action_ops *act_base = NULL; +static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); int tcf_register_action(struct tc_action_ops *act) { - struct tc_action_ops *a, **ap; + struct tc_action_ops *a; + + /* Must supply act, dump, cleanup and init */ + if (!act->act || !act->dump || !act->cleanup || !act->init) + return -EINVAL; + + /* Supply defaults */ + if (!act->lookup) + act->lookup = tcf_hash_search; + if (!act->walk) + act->walk = tcf_generic_walker; write_lock(&act_mod_lock); - for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { + list_for_each_entry(a, &act_base, head) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); return -EEXIST; } } - act->next = NULL; - *ap = act; + list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); return 0; } @@ -286,17 +286,16 @@ EXPORT_SYMBOL(tcf_register_action); int tcf_unregister_action(struct tc_action_ops *act) { - struct tc_action_ops *a, **ap; + struct tc_action_ops *a; int err = -ENOENT; write_lock(&act_mod_lock); - for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) - if (a == act) + list_for_each_entry(a, &act_base, head) { + if (a == act) { + list_del(&act->head); + err = 0; break; - if (a) { - *ap = a->next; - a->next = NULL; - err = 0; + } } write_unlock(&act_mod_lock); return err; @@ -306,69 +305,42 @@ EXPORT_SYMBOL(tcf_unregister_action); /* lookup by name */ static struct tc_action_ops *tc_lookup_action_n(char *kind) { - struct tc_action_ops *a = NULL; + struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); - for (a = act_base; a; a = a->next) { + list_for_each_entry(a, &act_base, head) { if (strcmp(kind, a->kind) == 0) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } + if (try_module_get(a->owner)) + res = a; break; } } read_unlock(&act_mod_lock); } - return a; + return res; } /* lookup by nlattr */ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) { - struct tc_action_ops *a = NULL; + struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); - for (a = act_base; a; a = a->next) { + list_for_each_entry(a, &act_base, head) { if (nla_strcmp(kind, a->kind) == 0) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } + if (try_module_get(a->owner)) + res = a; break; } } read_unlock(&act_mod_lock); } - return a; -} - -#if 0 -/* lookup by id */ -static struct tc_action_ops *tc_lookup_action_id(u32 type) -{ - struct tc_action_ops *a = NULL; - - if (type) { - read_lock(&act_mod_lock); - for (a = act_base; a; a = a->next) { - if (a->type == type) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } - break; - } - } - read_unlock(&act_mod_lock); - } - return a; + return res; } -#endif -int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, +int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, struct tcf_result *res) { const struct tc_action *a; @@ -379,53 +351,39 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, ret = TC_ACT_OK; goto exec_done; } - while ((a = act) != NULL) { + list_for_each_entry(a, actions, list) { repeat: - if (a->ops && a->ops->act) { - ret = a->ops->act(skb, a, res); - if (TC_MUNGED & skb->tc_verd) { - /* copied already, allow trampling */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - } - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ - if (ret != TC_ACT_PIPE) - goto exec_done; + ret = a->ops->act(skb, a, res); + if (TC_MUNGED & skb->tc_verd) { + /* copied already, allow trampling */ + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); } - act = a->next; + if (ret == TC_ACT_REPEAT) + goto repeat; /* we need a ttl - JHS */ + if (ret != TC_ACT_PIPE) + goto exec_done; } exec_done: return ret; } EXPORT_SYMBOL(tcf_action_exec); -void tcf_action_destroy(struct tc_action *act, int bind) +void tcf_action_destroy(struct list_head *actions, int bind) { - struct tc_action *a; + struct tc_action *a, *tmp; - for (a = act; a; a = act) { - if (a->ops && a->ops->cleanup) { - if (a->ops->cleanup(a, bind) == ACT_P_DELETED) - module_put(a->ops->owner); - act = act->next; - kfree(a); - } else { - /*FIXME: Remove later - catch insertion bugs*/ - WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n"); - act = act->next; - kfree(a); - } + list_for_each_entry_safe(a, tmp, actions, list) { + if (a->ops->cleanup(a, bind) == ACT_P_DELETED) + module_put(a->ops->owner); + list_del(&a->list); + kfree(a); } } int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - int err = -EINVAL; - - if (a->ops == NULL || a->ops->dump == NULL) - return err; return a->ops->dump(skb, a, bind, ref); } @@ -436,9 +394,6 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL || a->ops->dump == NULL) - return err; - if (nla_put_string(skb, TCA_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) @@ -459,14 +414,13 @@ nla_put_failure: EXPORT_SYMBOL(tcf_action_dump_1); int -tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) +tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref) { struct tc_action *a; int err = -EINVAL; struct nlattr *nest; - while ((a = act) != NULL) { - act = a->next; + list_for_each_entry(a, actions, list) { nest = nla_nest_start(skb, a->order); if (nest == NULL) goto nla_put_failure; @@ -541,6 +495,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, if (a == NULL) goto err_mod; + INIT_LIST_HEAD(&a->list); /* backward compatibility for policer */ if (name == NULL) err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind); @@ -567,37 +522,33 @@ err_out: return ERR_PTR(err); } -struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla, +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *name, int ovr, - int bind) + int bind, struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct tc_action *head = NULL, *act, *act_prev = NULL; + struct tc_action *act; int err; int i; err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); if (err < 0) - return ERR_PTR(err); + return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tb[i], est, name, ovr, bind); - if (IS_ERR(act)) + if (IS_ERR(act)) { + err = PTR_ERR(act); goto err; + } act->order = i; - - if (head == NULL) - head = act; - else - act_prev->next = act; - act_prev = act; + list_add_tail(&act->list, actions); } - return head; + return 0; err: - if (head != NULL) - tcf_action_destroy(head, bind); - return act; + tcf_action_destroy(actions, bind); + return err; } int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, @@ -626,10 +577,6 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (err < 0) goto errout; - if (a->ops != NULL && a->ops->get_stats != NULL) - if (a->ops->get_stats(skb, a) < 0) - goto errout; - if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || gnet_stats_copy_rate_est(&d, &h->tcf_bstats, &h->tcf_rate_est) < 0 || @@ -646,7 +593,7 @@ errout: } static int -tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, +tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq, u16 flags, int event, int bind, int ref) { struct tcamsg *t; @@ -666,7 +613,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, if (nest == NULL) goto out_nlmsg_trim; - if (tcf_action_dump(skb, a, bind, ref) < 0) + if (tcf_action_dump(skb, actions, bind, ref) < 0) goto out_nlmsg_trim; nla_nest_end(skb, nest); @@ -681,14 +628,14 @@ out_nlmsg_trim: static int act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, - struct tc_action *a, int event) + struct list_head *actions, int event) { struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -719,12 +666,11 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) if (a == NULL) goto err_out; + INIT_LIST_HEAD(&a->list); err = -EINVAL; a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); - if (a->ops == NULL) + if (a->ops == NULL) /* could happen in batch of actions */ goto err_free; - if (a->ops->lookup == NULL) - goto err_mod; err = -ENOENT; if (a->ops->lookup(a, index) == 0) goto err_mod; @@ -740,12 +686,12 @@ err_out: return ERR_PTR(err); } -static void cleanup_a(struct tc_action *act) +static void cleanup_a(struct list_head *actions) { - struct tc_action *a; + struct tc_action *a, *tmp; - for (a = act; a; a = act) { - act = a->next; + list_for_each_entry_safe(a, tmp, actions, list) { + list_del(&a->list); kfree(a); } } @@ -760,6 +706,7 @@ static struct tc_action *create_a(int i) return NULL; } act->order = i; + INIT_LIST_HEAD(&act->list); return act; } @@ -798,7 +745,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, err = -EINVAL; kind = tb[TCA_ACT_KIND]; a->ops = tc_lookup_action(kind); - if (a->ops == NULL) + if (a->ops == NULL) /*some idjot trying to flush unknown action */ goto err_out; nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); @@ -847,7 +794,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct tc_action *head = NULL, *act, *act_prev = NULL; + struct tc_action *act; + LIST_HEAD(actions); ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); if (ret < 0) @@ -867,16 +815,11 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; - - if (head == NULL) - head = act; - else - act_prev->next = act; - act_prev = act; + list_add_tail(&act->list, &actions); } if (event == RTM_GETACTION) - ret = act_get_notify(net, portid, n, head, event); + ret = act_get_notify(net, portid, n, &actions, event); else { /* delete */ struct sk_buff *skb; @@ -886,7 +829,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } - if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event, + if (tca_get_fill(skb, &actions, portid, n->nlmsg_seq, 0, event, 0, 1) <= 0) { kfree_skb(skb); ret = -EINVAL; @@ -894,7 +837,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } /* now do the delete */ - tcf_action_destroy(head, 0); + tcf_action_destroy(&actions, 0); ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (ret > 0) @@ -902,11 +845,11 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, return ret; } err: - cleanup_a(head); + cleanup_a(&actions); return ret; } -static int tcf_add_notify(struct net *net, struct tc_action *a, +static int tcf_add_notify(struct net *net, struct list_head *actions, u32 portid, u32 seq, int event, u16 flags) { struct tcamsg *t; @@ -934,7 +877,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, if (nest == NULL) goto out_kfree_skb; - if (tcf_action_dump(skb, a, 0, 0) < 0) + if (tcf_action_dump(skb, actions, 0, 0) < 0) goto out_kfree_skb; nla_nest_end(skb, nest); @@ -958,26 +901,18 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; - struct tc_action *act; - struct tc_action *a; + LIST_HEAD(actions); u32 seq = n->nlmsg_seq; - act = tcf_action_init(net, nla, NULL, NULL, ovr, 0); - if (act == NULL) - goto done; - if (IS_ERR(act)) { - ret = PTR_ERR(act); + ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions); + if (ret) goto done; - } /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags); - for (a = act; a; a = act) { - act = a->next; - kfree(a); - } + ret = tcf_add_notify(net, &actions, portid, seq, RTM_NEWACTION, n->nlmsg_flags); + cleanup_a(&actions); done: return ret; } @@ -1084,12 +1019,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) memset(&a, 0, sizeof(struct tc_action)); a.ops = a_o; - if (a_o->walk == NULL) { - WARN(1, "tc_dump_action: %s !capable of dumping table\n", - a_o->kind); - goto out_module_put; - } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3a4c0ca..8b1d657 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,15 +37,8 @@ #include <net/tc_act/tc_csum.h> #define CSUM_TAB_MASK 15 -static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1]; static u32 csum_idx_gen; -static DEFINE_RWLOCK(csum_lock); - -static struct tcf_hashinfo csum_hash_info = { - .htab = tcf_csum_ht, - .hmask = CSUM_TAB_MASK, - .lock = &csum_lock, -}; +static struct tcf_hashinfo csum_hash_info; static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, @@ -77,16 +70,16 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, &csum_idx_gen, &csum_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_csum(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_csum(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &csum_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &csum_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_csum(pc); spin_lock_bh(&p->tcf_lock); p->tcf_action = parm->action; p->update_flags = parm->update_flags; @@ -585,9 +578,7 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, - .lookup = tcf_hash_search, .init = tcf_csum_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Checksum updating actions"); @@ -595,6 +586,10 @@ MODULE_LICENSE("GPL"); static int __init csum_init_module(void) { + int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK); + if (err) + return err; + return tcf_register_action(&act_csum_ops); } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index fd2b3cf..af5641c 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -24,15 +24,8 @@ #include <net/tc_act/tc_gact.h> #define GACT_TAB_MASK 15 -static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1]; static u32 gact_idx_gen; -static DEFINE_RWLOCK(gact_lock); - -static struct tcf_hashinfo gact_hash_info = { - .htab = tcf_gact_ht, - .hmask = GACT_TAB_MASK, - .lock = &gact_lock, -}; +static struct tcf_hashinfo gact_hash_info; #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) @@ -102,10 +95,11 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_hash_release(pc, bind, &gact_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &gact_hash_info); + if (!ovr) return -EEXIST; - } } gact = to_gact(pc); @@ -206,9 +200,7 @@ static struct tc_action_ops act_gact_ops = { .act = tcf_gact, .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, - .lookup = tcf_hash_search, .init = tcf_gact_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); @@ -217,6 +209,9 @@ MODULE_LICENSE("GPL"); static int __init gact_init_module(void) { + int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK); + if (err) + return err; #ifdef CONFIG_GACT_PROB pr_info("GACT probability on\n"); #else @@ -228,6 +223,7 @@ static int __init gact_init_module(void) static void __exit gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops); + tcf_hashinfo_destroy(&gact_hash_info); } module_init(gact_init_module); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60d88b6..2426369 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -29,15 +29,8 @@ #define IPT_TAB_MASK 15 -static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1]; static u32 ipt_idx_gen; -static DEFINE_RWLOCK(ipt_lock); - -static struct tcf_hashinfo ipt_hash_info = { - .htab = tcf_ipt_ht, - .hmask = IPT_TAB_MASK, - .lock = &ipt_lock, -}; +static struct tcf_hashinfo ipt_hash_info; static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) { @@ -141,10 +134,12 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_ipt_release(to_ipt(pc), bind); + if (bind)/* dont override defaults */ + return 0; + tcf_ipt_release(to_ipt(pc), bind); + + if (!ovr) return -EEXIST; - } } ipt = to_ipt(pc); @@ -298,9 +293,7 @@ static struct tc_action_ops act_ipt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; static struct tc_action_ops act_xt_ops = { @@ -312,9 +305,7 @@ static struct tc_action_ops act_xt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); @@ -324,7 +315,11 @@ MODULE_ALIAS("act_xt"); static int __init ipt_init_module(void) { - int ret1, ret2; + int ret1, ret2, err; + err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK); + if (err) + return err; + ret1 = tcf_register_action(&act_xt_ops); if (ret1 < 0) printk("Failed to load xt action\n"); @@ -332,9 +327,10 @@ static int __init ipt_init_module(void) if (ret2 < 0) printk("Failed to load ipt action\n"); - if (ret1 < 0 && ret2 < 0) + if (ret1 < 0 && ret2 < 0) { + tcf_hashinfo_destroy(&ipt_hash_info); return ret1; - else + } else return 0; } @@ -342,6 +338,7 @@ static void __exit ipt_cleanup_module(void) { tcf_unregister_action(&act_xt_ops); tcf_unregister_action(&act_ipt_ops); + tcf_hashinfo_destroy(&ipt_hash_info); } module_init(ipt_init_module); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 977c10e..9dbb8cd 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -30,16 +30,9 @@ #include <linux/if_arp.h> #define MIRRED_TAB_MASK 7 -static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; static u32 mirred_idx_gen; -static DEFINE_RWLOCK(mirred_lock); static LIST_HEAD(mirred_list); - -static struct tcf_hashinfo mirred_hash_info = { - .htab = tcf_mirred_ht, - .hmask = MIRRED_TAB_MASK, - .lock = &mirred_lock, -}; +static struct tcf_hashinfo mirred_hash_info; static int tcf_mirred_release(struct tcf_mirred *m, int bind) { @@ -261,7 +254,6 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; - static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, @@ -271,9 +263,7 @@ static struct tc_action_ops act_mirred_ops = { .act = tcf_mirred, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, - .lookup = tcf_hash_search, .init = tcf_mirred_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); @@ -286,14 +276,20 @@ static int __init mirred_init_module(void) if (err) return err; + err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK); + if (err) { + unregister_netdevice_notifier(&mirred_device_notifier); + return err; + } pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { - unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); + tcf_hashinfo_destroy(&mirred_hash_info); + unregister_netdevice_notifier(&mirred_device_notifier); } module_init(mirred_init_module); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 876f0ef..584e655 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -30,15 +30,9 @@ #define NAT_TAB_MASK 15 -static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; static u32 nat_idx_gen; -static DEFINE_RWLOCK(nat_lock); -static struct tcf_hashinfo nat_hash_info = { - .htab = tcf_nat_ht, - .hmask = NAT_TAB_MASK, - .lock = &nat_lock, -}; +static struct tcf_hashinfo nat_hash_info; static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, @@ -70,15 +64,15 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, &nat_idx_gen, &nat_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_nat(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &nat_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &nat_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_nat(pc); spin_lock_bh(&p->tcf_lock); p->old_addr = parm->old_addr; @@ -308,9 +302,7 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat, .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, - .lookup = tcf_hash_search, .init = tcf_nat_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Stateless NAT actions"); @@ -318,12 +310,16 @@ MODULE_LICENSE("GPL"); static int __init nat_init_module(void) { + int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK); + if (err) + return err; return tcf_register_action(&act_nat_ops); } static void __exit nat_cleanup_module(void) { tcf_unregister_action(&act_nat_ops); + tcf_hashinfo_destroy(&nat_hash_info); } module_init(nat_init_module); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7ed78c9..7291893 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -24,15 +24,9 @@ #include <net/tc_act/tc_pedit.h> #define PEDIT_TAB_MASK 15 -static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1]; static u32 pedit_idx_gen; -static DEFINE_RWLOCK(pedit_lock); -static struct tcf_hashinfo pedit_hash_info = { - .htab = tcf_pedit_ht, - .hmask = PEDIT_TAB_MASK, - .lock = &pedit_lock, -}; +static struct tcf_hashinfo pedit_hash_info; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, @@ -84,10 +78,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { p = to_pedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &pedit_hash_info); + tcf_hash_release(pc, bind, &pedit_hash_info); + if (bind) + return 0; + if (!ovr) return -EEXIST; - } + if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) @@ -243,9 +239,7 @@ static struct tc_action_ops act_pedit_ops = { .act = tcf_pedit, .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, - .lookup = tcf_hash_search, .init = tcf_pedit_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); @@ -254,11 +248,15 @@ MODULE_LICENSE("GPL"); static int __init pedit_init_module(void) { + int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK); + if (err) + return err; return tcf_register_action(&act_pedit_ops); } static void __exit pedit_cleanup_module(void) { + tcf_hashinfo_destroy(&pedit_hash_info); tcf_unregister_action(&act_pedit_ops); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 272d8e9..9295b86 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -41,15 +41,8 @@ struct tcf_police { container_of(pc, struct tcf_police, common) #define POL_TAB_MASK 15 -static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; static u32 police_idx_gen; -static DEFINE_RWLOCK(police_lock); - -static struct tcf_hashinfo police_hash_info = { - .htab = tcf_police_ht, - .hmask = POL_TAB_MASK, - .lock = &police_lock, -}; +static struct tcf_hashinfo police_hash_info; /* old policer structure from before tc actions */ struct tc_police_compat { @@ -67,18 +60,19 @@ struct tc_police_compat { static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { + struct hlist_head *head; struct tcf_common *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct nlattr *nest; - read_lock_bh(&police_lock); + spin_lock_bh(&police_hash_info.lock); s_i = cb->args[0]; for (i = 0; i < (POL_TAB_MASK + 1); i++) { - p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; + head = &police_hash_info.htab[tcf_hash(i, POL_TAB_MASK)]; - for (; p; p = p->tcfc_next) { + hlist_for_each_entry_rcu(p, head, tcfc_head) { index++; if (index < s_i) continue; @@ -101,7 +95,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c } } done: - read_unlock_bh(&police_lock); + spin_unlock_bh(&police_hash_info.lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -113,25 +107,16 @@ nla_put_failure: static void tcf_police_destroy(struct tcf_police *p) { - unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); - struct tcf_common **p1p; - - for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { - if (*p1p == &p->common) { - write_lock_bh(&police_lock); - *p1p = p->tcf_next; - write_unlock_bh(&police_lock); - gen_kill_estimator(&p->tcf_bstats, - &p->tcf_rate_est); - /* - * gen_estimator est_timer() might access p->tcf_lock - * or bstats, wait a RCU grace period before freeing p - */ - kfree_rcu(p, tcf_rcu); - return; - } - } - WARN_ON(1); + spin_lock_bh(&police_hash_info.lock); + hlist_del(&p->tcf_head); + spin_unlock_bh(&police_hash_info.lock); + gen_kill_estimator(&p->tcf_bstats, + &p->tcf_rate_est); + /* + * gen_estimator est_timer() might access p->tcf_lock + * or bstats, wait a RCU grace period before freeing p + */ + kfree_rcu(p, tcf_rcu); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { @@ -177,10 +162,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; + return 0; } if (ovr) goto override; - return ret; + /* not replacing */ + return -EEXIST; } } @@ -266,10 +253,9 @@ override: police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(&police_idx_gen, &police_hash_info); h = tcf_hash(police->tcf_index, POL_TAB_MASK); - write_lock_bh(&police_lock); - police->tcf_next = tcf_police_ht[h]; - tcf_police_ht[h] = &police->common; - write_unlock_bh(&police_lock); + spin_lock_bh(&police_hash_info.lock); + hlist_add_head(&police->tcf_head, &police_hash_info.htab[h]); + spin_unlock_bh(&police_hash_info.lock); a->priv = police; return ret; @@ -277,10 +263,8 @@ override: failure_unlock: spin_unlock_bh(&police->tcf_lock); failure: - if (P_tab) - qdisc_put_rtab(P_tab); - if (R_tab) - qdisc_put_rtab(R_tab); + qdisc_put_rtab(P_tab); + qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) kfree(police); return err; @@ -407,7 +391,6 @@ static struct tc_action_ops act_police_ops = { .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; @@ -415,12 +398,19 @@ static struct tc_action_ops act_police_ops = { static int __init police_init_module(void) { - return tcf_register_action(&act_police_ops); + int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK); + if (err) + return err; + err = tcf_register_action(&act_police_ops); + if (err) + tcf_hashinfo_destroy(&police_hash_info); + return err; } static void __exit police_cleanup_module(void) { + tcf_hashinfo_destroy(&police_hash_info); tcf_unregister_action(&act_police_ops); } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 7725eb4..b44491e 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -25,15 +25,8 @@ #include <net/tc_act/tc_defact.h> #define SIMP_TAB_MASK 7 -static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; static u32 simp_idx_gen; -static DEFINE_RWLOCK(simp_lock); - -static struct tcf_hashinfo simp_hash_info = { - .htab = tcf_simp_ht, - .hmask = SIMP_TAB_MASK, - .lock = &simp_lock, -}; +static struct tcf_hashinfo simp_hash_info; #define SIMP_MAX_DATA 32 static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, @@ -142,10 +135,13 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_defact(pc); - if (!ovr) { - tcf_simp_release(d, bind); + + if (bind) + return 0; + tcf_simp_release(d, bind); + if (!ovr) return -EEXIST; - } + reset_policy(d, defdata, parm); } @@ -201,7 +197,6 @@ static struct tc_action_ops act_simp_ops = { .dump = tcf_simp_dump, .cleanup = tcf_simp_cleanup, .init = tcf_simp_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); @@ -210,14 +205,23 @@ MODULE_LICENSE("GPL"); static int __init simp_init_module(void) { - int ret = tcf_register_action(&act_simp_ops); + int err, ret; + err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK); + if (err) + return err; + + ret = tcf_register_action(&act_simp_ops); if (!ret) pr_info("Simple TC action Loaded\n"); + else + tcf_hashinfo_destroy(&simp_hash_info); + return ret; } static void __exit simp_cleanup_module(void) { + tcf_hashinfo_destroy(&simp_hash_info); tcf_unregister_action(&act_simp_ops); } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cb42211..0fa1aad 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: Alexander Duyck <alexander.h.duyck@intel.com> */ @@ -29,15 +28,8 @@ #include <net/tc_act/tc_skbedit.h> #define SKBEDIT_TAB_MASK 15 -static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1]; static u32 skbedit_idx_gen; -static DEFINE_RWLOCK(skbedit_lock); - -static struct tcf_hashinfo skbedit_hash_info = { - .htab = tcf_skbedit_ht, - .hmask = SKBEDIT_TAB_MASK, - .lock = &skbedit_lock, -}; +static struct tcf_hashinfo skbedit_hash_info; static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) @@ -120,10 +112,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &skbedit_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &skbedit_hash_info); + if (!ovr) return -EEXIST; - } } spin_lock_bh(&d->tcf_lock); @@ -203,7 +196,6 @@ static struct tc_action_ops act_skbedit_ops = { .dump = tcf_skbedit_dump, .cleanup = tcf_skbedit_cleanup, .init = tcf_skbedit_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); @@ -212,11 +204,15 @@ MODULE_LICENSE("GPL"); static int __init skbedit_init_module(void) { + int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK); + if (err) + return err; return tcf_register_action(&act_skbedit_ops); } static void __exit skbedit_cleanup_module(void) { + tcf_hashinfo_destroy(&skbedit_hash_info); tcf_unregister_action(&act_skbedit_ops); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e118af..d8c42b1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,8 +31,7 @@ #include <net/pkt_cls.h> /* The list of all installed classifier types */ - -static struct tcf_proto_ops *tcf_proto_base __read_mostly; +static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); @@ -41,36 +40,35 @@ static DEFINE_RWLOCK(cls_mod_lock); static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) { - const struct tcf_proto_ops *t = NULL; + const struct tcf_proto_ops *t, *res = NULL; if (kind) { read_lock(&cls_mod_lock); - for (t = tcf_proto_base; t; t = t->next) { + list_for_each_entry(t, &tcf_proto_base, head) { if (nla_strcmp(kind, t->kind) == 0) { - if (!try_module_get(t->owner)) - t = NULL; + if (try_module_get(t->owner)) + res = t; break; } } read_unlock(&cls_mod_lock); } - return t; + return res; } /* Register(unregister) new classifier type */ int register_tcf_proto_ops(struct tcf_proto_ops *ops) { - struct tcf_proto_ops *t, **tp; + struct tcf_proto_ops *t; int rc = -EEXIST; write_lock(&cls_mod_lock); - for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) + list_for_each_entry(t, &tcf_proto_base, head) if (!strcmp(ops->kind, t->kind)) goto out; - ops->next = NULL; - *tp = ops; + list_add_tail(&ops->head, &tcf_proto_base); rc = 0; out: write_unlock(&cls_mod_lock); @@ -80,19 +78,17 @@ EXPORT_SYMBOL(register_tcf_proto_ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { - struct tcf_proto_ops *t, **tp; + struct tcf_proto_ops *t; int rc = -ENOENT; write_lock(&cls_mod_lock); - for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) - if (t == ops) + list_for_each_entry(t, &tcf_proto_base, head) { + if (t == ops) { + list_del(&t->head); + rc = 0; break; - - if (!t) - goto out; - *tp = t->next; - rc = 0; -out: + } + } write_unlock(&cls_mod_lock); return rc; } @@ -500,46 +496,41 @@ out: void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - if (exts->action) { - tcf_action_destroy(exts->action, TCA_ACT_UNBIND); - exts->action = NULL; - } + tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND); + INIT_LIST_HEAD(&exts->actions); #endif } EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, - const struct tcf_ext_map *map) + struct nlattr *rate_tlv, struct tcf_exts *exts) { - memset(exts, 0, sizeof(*exts)); - #ifdef CONFIG_NET_CLS_ACT { struct tc_action *act; - if (map->police && tb[map->police]) { - act = tcf_action_init_1(net, tb[map->police], rate_tlv, + INIT_LIST_HEAD(&exts->actions); + if (exts->police && tb[exts->police]) { + act = tcf_action_init_1(net, tb[exts->police], rate_tlv, "police", TCA_ACT_NOREPLACE, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); - act->type = TCA_OLD_COMPAT; - exts->action = act; - } else if (map->action && tb[map->action]) { - act = tcf_action_init(net, tb[map->action], rate_tlv, + act->type = exts->type = TCA_OLD_COMPAT; + list_add(&act->list, &exts->actions); + } else if (exts->action && tb[exts->action]) { + int err; + err = tcf_action_init(net, tb[exts->action], rate_tlv, NULL, TCA_ACT_NOREPLACE, - TCA_ACT_BIND); - if (IS_ERR(act)) - return PTR_ERR(act); - - exts->action = act; + TCA_ACT_BIND, &exts->actions); + if (err) + return err; } } #else - if ((map->action && tb[map->action]) || - (map->police && tb[map->police])) + if ((exts->action && tb[exts->action]) || + (exts->police && tb[exts->police])) return -EOPNOTSUPP; #endif @@ -551,43 +542,44 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT - if (src->action) { - struct tc_action *act; + if (!list_empty(&src->actions)) { + LIST_HEAD(tmp); tcf_tree_lock(tp); - act = dst->action; - dst->action = src->action; + list_splice_init(&dst->actions, &tmp); + list_splice(&src->actions, &dst->actions); tcf_tree_unlock(tp); - if (act) - tcf_action_destroy(act, TCA_ACT_UNBIND); + tcf_action_destroy(&tmp, TCA_ACT_UNBIND); } #endif } EXPORT_SYMBOL(tcf_exts_change); -int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, - const struct tcf_ext_map *map) +#define tcf_exts_first_act(ext) \ + list_first_entry(&(exts)->actions, struct tc_action, list) + +int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - if (map->action && exts->action) { + if (exts->action && !list_empty(&exts->actions)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ struct nlattr *nest; - - if (exts->action->type != TCA_OLD_COMPAT) { - nest = nla_nest_start(skb, map->action); + if (exts->type != TCA_OLD_COMPAT) { + nest = nla_nest_start(skb, exts->action); if (nest == NULL) goto nla_put_failure; - if (tcf_action_dump(skb, exts->action, 0, 0) < 0) + if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - } else if (map->police) { - nest = nla_nest_start(skb, map->police); - if (nest == NULL) + } else if (exts->police) { + struct tc_action *act = tcf_exts_first_act(exts); + nest = nla_nest_start(skb, exts->police); + if (nest == NULL || !act) goto nla_put_failure; - if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) + if (tcf_action_dump_old(skb, act, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } @@ -600,17 +592,14 @@ nla_put_failure: __attribute__ ((unused)) EXPORT_SYMBOL(tcf_exts_dump); -int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, - const struct tcf_ext_map *map) +int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - if (exts->action) - if (tcf_action_copy_stats(skb, exts->action, 1) < 0) - goto nla_put_failure; + struct tc_action *a = tcf_exts_first_act(exts); + if (tcf_action_copy_stats(skb, a, 1) < 0) + return -1; #endif return 0; -nla_put_failure: __attribute__ ((unused)) - return -1; } EXPORT_SYMBOL(tcf_exts_dump_stats); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 636d913..b655203 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -34,11 +34,6 @@ struct basic_filter { struct list_head link; }; -static const struct tcf_ext_map basic_ext_map = { - .action = TCA_BASIC_ACT, - .police = TCA_BASIC_POLICE -}; - static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -141,7 +136,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; struct tcf_ematch_tree t; - err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map); + tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; @@ -191,6 +187,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout; + tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); err = -EINVAL; if (handle) f->handle = handle; @@ -263,13 +260,13 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || + if (tcf_exts_dump(skb, &f->exts) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1002a82..00a5a58 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -46,11 +46,6 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; -static const struct tcf_ext_map bpf_ext_map = { - .action = TCA_BPF_ACT, - .police = TCA_BPF_POLICE, -}; - static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -174,7 +169,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map); + tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &exts); if (ret < 0) return ret; @@ -271,6 +267,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (prog == NULL) return -ENOBUFS; + tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); if (handle == 0) prog->handle = cls_bpf_grab_new_handle(tp, head); else @@ -323,14 +320,14 @@ static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh, if (nla == NULL) goto nla_put_failure; - memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); - if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0) + if (tcf_exts_dump(skb, &prog->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &prog->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 16006c9..8349fcd 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -11,109 +11,13 @@ #include <linux/module.h> #include <linux/slab.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/cgroup.h> #include <linux/rcupdate.h> -#include <linux/fdtable.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> #include <net/cls_cgroup.h> -static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct cgroup_cls_state, css) : NULL; -} - -static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) -{ - return css_cls_state(task_css(p, net_cls_subsys_id)); -} - -static struct cgroup_subsys_state * -cgrp_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct cgroup_cls_state *cs; - - cs = kzalloc(sizeof(*cs), GFP_KERNEL); - if (!cs) - return ERR_PTR(-ENOMEM); - return &cs->css; -} - -static int cgrp_css_online(struct cgroup_subsys_state *css) -{ - struct cgroup_cls_state *cs = css_cls_state(css); - struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); - - if (parent) - cs->classid = parent->classid; - return 0; -} - -static void cgrp_css_free(struct cgroup_subsys_state *css) -{ - kfree(css_cls_state(css)); -} - -static int update_classid(const void *v, struct file *file, unsigned n) -{ - int err; - struct socket *sock = sock_from_file(file, &err); - if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - return 0; -} - -static void cgrp_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) -{ - struct task_struct *p; - struct cgroup_cls_state *cs = css_cls_state(css); - void *v = (void *)(unsigned long)cs->classid; - - cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid, v); - task_unlock(p); - } -} - -static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) -{ - return css_cls_state(css)->classid; -} - -static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, - u64 value) -{ - css_cls_state(css)->classid = (u32) value; - return 0; -} - -static struct cftype ss_files[] = { - { - .name = "classid", - .read_u64 = read_classid, - .write_u64 = write_classid, - }, - { } /* terminate */ -}; - -struct cgroup_subsys net_cls_subsys = { - .name = "net_cls", - .css_alloc = cgrp_css_alloc, - .css_online = cgrp_css_online, - .css_free = cgrp_css_free, - .attach = cgrp_attach, - .subsys_id = net_cls_subsys_id, - .base_cftypes = ss_files, - .module = THIS_MODULE, -}; - struct cls_cgroup_head { u32 handle; struct tcf_exts exts; @@ -172,11 +76,6 @@ static int cls_cgroup_init(struct tcf_proto *tp) return 0; } -static const struct tcf_ext_map cgroup_ext_map = { - .action = TCA_CGROUP_ACT, - .police = TCA_CGROUP_POLICE, -}; - static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; @@ -203,6 +102,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (head == NULL) return -ENOBUFS; + tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); head->handle = handle; tcf_tree_lock(tp); @@ -218,8 +118,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, - &cgroup_ext_map); + tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -277,13 +177,13 @@ static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, if (nest == NULL) goto nla_put_failure; - if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 || + if (tcf_exts_dump(skb, &head->exts) < 0 || tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &head->exts) < 0) goto nla_put_failure; return skb->len; @@ -309,25 +209,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { static int __init init_cgroup_cls(void) { - int ret; - - ret = cgroup_load_subsys(&net_cls_subsys); - if (ret) - goto out; - - ret = register_tcf_proto_ops(&cls_cgroup_ops); - if (ret) - cgroup_unload_subsys(&net_cls_subsys); - -out: - return ret; + return register_tcf_proto_ops(&cls_cgroup_ops); } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); - - cgroup_unload_subsys(&net_cls_subsys); } module_init(init_cgroup_cls); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 7881e2f..dfd18a5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -56,11 +56,6 @@ struct flow_filter { u32 hashrnd; }; -static const struct tcf_ext_map flow_ext_map = { - .action = TCA_FLOW_ACT, - .police = TCA_FLOW_POLICE, -}; - static inline u32 addr_fold(void *addr) { unsigned long a = (unsigned long)addr; @@ -220,7 +215,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb) static u32 flow_get_rxhash(struct sk_buff *skb) { - return skb_get_rxhash(skb); + return skb_get_hash(skb); } static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) @@ -397,7 +392,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map); + tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -455,6 +451,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, f->handle = handle; f->mask = ~0U; + tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); get_random_bytes(&f->hashrnd, 4); f->perturb_timer.function = flow_perturbation; @@ -608,7 +605,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH if (f->ematches.hdr.nmatches && @@ -617,7 +614,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, #endif nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 9b97172..3f9cece 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -46,11 +46,6 @@ struct fw_filter { struct tcf_exts exts; }; -static const struct tcf_ext_map fw_ext_map = { - .action = TCA_FW_ACT, - .police = TCA_FW_POLICE -}; - static inline int fw_hash(u32 handle) { if (HTSIZE == 4096) @@ -200,7 +195,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, u32 mask; int err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map); + tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -280,6 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) return -ENOBUFS; + tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); f->id = handle; err = fw_change_attrs(net, tp, f, tb, tca, base); @@ -359,12 +356,12 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_FW_MASK, head->mask)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 37da567..2473953 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -59,11 +59,6 @@ struct route4_filter { #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) -static const struct tcf_ext_map route_ext_map = { - .police = TCA_ROUTE4_POLICE, - .action = TCA_ROUTE4_ACT -}; - static inline int route4_fastmap_hash(u32 id, int iif) { return id & 0xF; @@ -347,7 +342,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; struct tcf_exts e; - err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map); + tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; @@ -481,6 +477,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout; + tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); err = route4_set_parms(net, tp, base, f, handle, head, tb, tca[TCA_RATE], 1); if (err < 0) @@ -589,12 +586,12 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_ROUTE4_CLASSID, f->res.classid)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 252d8b0..4f25c2a 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -116,11 +116,6 @@ static inline unsigned int hash_src(__be32 *src) return h & 0xF; } -static struct tcf_ext_map rsvp_ext_map = { - .police = TCA_RSVP_POLICE, - .action = TCA_RSVP_ACT -}; - #define RSVP_APPLY_RESULT() \ { \ int r = tcf_exts_exec(skb, &f->exts, res); \ @@ -440,7 +435,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); + tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -471,6 +467,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout2; + tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); h2 = 16; if (tb[TCA_RSVP_SRC]) { memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); @@ -633,12 +630,12 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index b86535a..ffad187 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -50,11 +50,6 @@ struct tcindex_data { int fall_through; /* 0: only classify if explicit match */ }; -static const struct tcf_ext_map tcindex_ext_map = { - .police = TCA_TCINDEX_POLICE, - .action = TCA_TCINDEX_ACT -}; - static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { @@ -209,17 +204,21 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_exts e; - err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map); + tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; memcpy(&cp, p, sizeof(cp)); memset(&new_filter_result, 0, sizeof(new_filter_result)); + tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (old_r) memcpy(&cr, r, sizeof(cr)); - else + else { memset(&cr, 0, sizeof(cr)); + tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + } if (tb[TCA_TCINDEX_HASH]) cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -468,11 +467,11 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid)) goto nla_put_failure; - if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) + if (tcf_exts_dump(skb, &r->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &r->exts) < 0) goto nla_put_failure; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index eb07a1e..20f2fb7 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -79,11 +79,6 @@ struct tc_u_common { u32 hgenerator; }; -static const struct tcf_ext_map u32_ext_map = { - .action = TCA_U32_ACT, - .police = TCA_U32_POLICE -}; - static inline unsigned int u32_hash_fold(__be32 key, const struct tc_u32_sel *sel, u8 fshift) @@ -352,7 +347,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) return 0; } -static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) +static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { struct tc_u_knode **kp; struct tc_u_hnode *ht = key->ht_up; @@ -496,7 +491,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, int err; struct tcf_exts e; - err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map); + tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; @@ -646,6 +642,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, n->ht_up = ht; n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; + tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); #ifdef CONFIG_CLS_U32_MARK if (tb[TCA_U32_MARK]) { @@ -759,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; #endif - if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0) + if (tcf_exts_dump(skb, &n->exts) < 0) goto nla_put_failure; #ifdef CONFIG_NET_CLS_IND @@ -778,7 +775,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, nla_nest_end(skb, nest); if (TC_U32_KEY(n->handle)) - if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &n->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index e5cef956..9b8c0b0 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -222,7 +222,7 @@ META_COLLECTOR(int_maclen) META_COLLECTOR(int_rxhash) { - dst->value = skb_get_rxhash(skb); + dst->value = skb_get_hash(skb); } /************************************************************************** @@ -271,40 +271,52 @@ META_COLLECTOR(int_rtiif) * Socket Attributes **************************************************************************/ -#define SKIP_NONLOCAL(skb) \ - if (unlikely(skb->sk == NULL)) { \ - *err = -1; \ - return; \ - } +#define skip_nonlocal(skb) \ + (unlikely(skb->sk == NULL)) META_COLLECTOR(int_sk_family) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_family; } META_COLLECTOR(int_sk_state) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_state; } META_COLLECTOR(int_sk_reuse) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_reuse; } META_COLLECTOR(int_sk_bound_if) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } /* No error if bound_dev_if is 0, legal userspace check */ dst->value = skb->sk->sk_bound_dev_if; } META_COLLECTOR(var_sk_bound_if) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } if (skb->sk->sk_bound_dev_if == 0) { dst->value = (unsigned long) "any"; @@ -322,151 +334,226 @@ META_COLLECTOR(var_sk_bound_if) META_COLLECTOR(int_sk_refcnt) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = atomic_read(&skb->sk->sk_refcnt); } META_COLLECTOR(int_sk_rcvbuf) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_rcvbuf; } META_COLLECTOR(int_sk_shutdown) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_shutdown; } META_COLLECTOR(int_sk_proto) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_protocol; } META_COLLECTOR(int_sk_type) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_type; } META_COLLECTOR(int_sk_rmem_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = sk_rmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_wmem_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = sk_wmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_omem_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = atomic_read(&skb->sk->sk_omem_alloc); } META_COLLECTOR(int_sk_rcv_qlen) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_receive_queue.qlen; } META_COLLECTOR(int_sk_snd_qlen) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_write_queue.qlen; } META_COLLECTOR(int_sk_wmem_queued) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_wmem_queued; } META_COLLECTOR(int_sk_fwd_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_forward_alloc; } META_COLLECTOR(int_sk_sndbuf) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_sndbuf; } META_COLLECTOR(int_sk_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = (__force int) skb->sk->sk_allocation; } META_COLLECTOR(int_sk_hash) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_hash; } META_COLLECTOR(int_sk_lingertime) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_lingertime / HZ; } META_COLLECTOR(int_sk_err_qlen) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_error_queue.qlen; } META_COLLECTOR(int_sk_ack_bl) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_ack_backlog; } META_COLLECTOR(int_sk_max_ack_bl) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_max_ack_backlog; } META_COLLECTOR(int_sk_prio) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_priority; } META_COLLECTOR(int_sk_rcvlowat) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_rcvlowat; } META_COLLECTOR(int_sk_rcvtimeo) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_rcvtimeo / HZ; } META_COLLECTOR(int_sk_sndtimeo) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_sndtimeo / HZ; } META_COLLECTOR(int_sk_sendmsg_off) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_frag.offset; } META_COLLECTOR(int_sk_write_pend) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_write_pending; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cd81505..1313145 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -135,7 +135,7 @@ static DEFINE_RWLOCK(qdisc_mod_lock); static struct Qdisc_ops *qdisc_base; -/* Register/uregister queueing discipline */ +/* Register/unregister queueing discipline */ int register_qdisc(struct Qdisc_ops *qops) { @@ -271,11 +271,15 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -static void qdisc_list_add(struct Qdisc *q) +void qdisc_list_add(struct Qdisc *q) { + struct Qdisc *root = qdisc_dev(q)->qdisc; + + WARN_ON_ONCE(root == &noop_qdisc); if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list); + list_add_tail(&q->list, &root->list); } +EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 7a42c81..2f80d01 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1058,9 +1058,10 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ q->quanta[prio]; } - if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { - pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); + if (cl->quantum <= 0 || + cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { + pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", + cl->common.classid, cl->quantum); cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } @@ -1782,8 +1783,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); if (err) { - if (rtab) - qdisc_put_rtab(rtab); + qdisc_put_rtab(rtab); return err; } } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3886365..49d6ef3 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -47,7 +47,7 @@ struct dsmark_qdisc_data { static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) { - return (index <= p->indices && index > 0); + return index <= p->indices && index > 0; } /* ------------------------- Class/flow operations ------------------------- */ @@ -57,8 +57,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n", - sch, p, new, old); + pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", + __func__, sch, p, new, old); if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, @@ -85,8 +85,8 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) { - pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n", - sch, qdisc_priv(sch), classid); + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", + __func__, sch, qdisc_priv(sch), classid); return TC_H_MIN(classid) + 1; } @@ -118,8 +118,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, int err = -EINVAL; u8 mask = 0; - pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," - "arg 0x%lx\n", sch, p, classid, parent, *arg); + pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", + __func__, sch, p, classid, parent, *arg); if (!dsmark_valid_index(p, *arg)) { err = -ENOENT; @@ -166,7 +166,8 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) struct dsmark_qdisc_data *p = qdisc_priv(sch); int i; - pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); + pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", + __func__, sch, p, walker); if (walker->stop) return; @@ -199,7 +200,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); int err; - pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { switch (skb->protocol) { @@ -275,7 +276,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) struct sk_buff *skb; u32 index; - pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); skb = p->q->ops->dequeue(p->q); if (skb == NULL) @@ -303,8 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) * and don't need yet another qdisc as a bypass. */ if (p->mask[index] != 0xff || p->value[index]) - pr_warning("dsmark_dequeue: unsupported protocol %d\n", - ntohs(skb->protocol)); + pr_warn("%s: unsupported protocol %d\n", + __func__, ntohs(skb->protocol)); break; } @@ -315,7 +316,7 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); return p->q->ops->peek(p->q); } @@ -325,7 +326,7 @@ static unsigned int dsmark_drop(struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); unsigned int len; - pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); if (p->q->ops->drop == NULL) return 0; @@ -346,7 +347,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) u16 indices; u8 *mask; - pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); + pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); if (!opt) goto errout; @@ -384,7 +385,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (p->q == NULL) p->q = &noop_qdisc; - pr_debug("dsmark_init: qdisc %p\n", p->q); + pr_debug("%s: qdisc %p\n", __func__, p->q); err = 0; errout: @@ -395,7 +396,7 @@ static void dsmark_reset(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); sch->q.qlen = 0; } @@ -404,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); @@ -417,7 +418,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *opts = NULL; - pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl); + pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); if (!dsmark_valid_index(p, cl)) return -EINVAL; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 95d8439..08ef7a4 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -47,6 +47,7 @@ #include <linux/rbtree.h> #include <linux/hash.h> #include <linux/prefetch.h> +#include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sock.h> @@ -225,7 +226,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ - sk = (struct sock *)(skb_get_rxhash(skb) | 1L); + sk = (struct sock *)(skb_get_hash(skb) | 1L); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; @@ -578,15 +579,36 @@ static void fq_rehash(struct fq_sched_data *q, q->stat_gc_flows += fcnt; } -static int fq_resize(struct fq_sched_data *q, u32 log) +static void *fq_alloc_node(size_t sz, int node) { + void *ptr; + + ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node); + if (!ptr) + ptr = vmalloc_node(sz, node); + return ptr; +} + +static void fq_free(void *addr) +{ + if (addr && is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + +static int fq_resize(struct Qdisc *sch, u32 log) +{ + struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *array; u32 idx; if (q->fq_root && log == q->fq_trees_log) return 0; - array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL); + /* If XPS was setup, we can allocate memory on right NUMA node */ + array = fq_alloc_node(sizeof(struct rb_root) << log, + netdev_queue_numa_node_read(sch->dev_queue)); if (!array) return -ENOMEM; @@ -595,7 +617,7 @@ static int fq_resize(struct fq_sched_data *q, u32 log) if (q->fq_root) { fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); - kfree(q->fq_root); + fq_free(q->fq_root); } q->fq_root = array; q->fq_trees_log = log; @@ -676,7 +698,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) } if (!err) - err = fq_resize(q, fq_log); + err = fq_resize(sch, fq_log); while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); @@ -697,7 +719,7 @@ static void fq_destroy(struct Qdisc *sch) struct fq_sched_data *q = qdisc_priv(sch); fq_reset(sch); - kfree(q->fq_root); + fq_free(q->fq_root); qdisc_watchdog_cancel(&q->watchdog); } @@ -723,7 +745,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) if (opt) err = fq_change(sch, opt); else - err = fq_resize(q, q->fq_trees_log); + err = fq_resize(sch, q->fq_trees_log); return err; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 922a094..32bb942 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -338,13 +338,13 @@ EXPORT_SYMBOL(netif_carrier_off); cheaper. */ -static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) +static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) { kfree_skb(skb); return NET_XMIT_CN; } -static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) +static struct sk_buff *noop_dequeue(struct Qdisc *qdisc) { return NULL; } @@ -718,8 +718,8 @@ static void attach_default_qdiscs(struct net_device *dev) } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { - qdisc->ops->attach(qdisc); dev->qdisc = qdisc; + qdisc->ops->attach(qdisc); } } } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index d42234c..12cbc09 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -370,8 +370,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { - pr_warning("GRED: Warning: Destroying " - "shadowed VQ 0x%x\n", i); + pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", + i); gred_destroy_vq(table->tab[i]); table->tab[i] = NULL; } diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c new file mode 100644 index 0000000..cf7f614 --- /dev/null +++ b/net/sched/sch_hhf.c @@ -0,0 +1,745 @@ +/* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF) + * + * Copyright (C) 2013 Terry Lam <vtlam@google.com> + * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com> + */ + +#include <linux/jhash.h> +#include <linux/jiffies.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <net/flow_keys.h> +#include <net/pkt_sched.h> +#include <net/sock.h> + +/* Heavy-Hitter Filter (HHF) + * + * Principles : + * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter + * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified + * as heavy-hitter, it is immediately switched to the heavy-hitter bucket. + * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler, + * in which the heavy-hitter bucket is served with less weight. + * In other words, non-heavy-hitters (e.g., short bursts of critical traffic) + * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have + * higher share of bandwidth. + * + * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the + * following paper: + * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and + * Accounting", in ACM SIGCOMM, 2002. + * + * Conceptually, a multi-stage filter comprises k independent hash functions + * and k counter arrays. Packets are indexed into k counter arrays by k hash + * functions, respectively. The counters are then increased by the packet sizes. + * Therefore, + * - For a heavy-hitter flow: *all* of its k array counters must be large. + * - For a non-heavy-hitter flow: some of its k array counters can be large + * due to hash collision with other small flows; however, with high + * probability, not *all* k counters are large. + * + * By the design of the multi-stage filter algorithm, the false negative rate + * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is + * susceptible to false positives (non-heavy-hitters mistakenly classified as + * heavy-hitters). + * Therefore, we also implement the following optimizations to reduce false + * positives by avoiding unnecessary increment of the counter values: + * - Optimization O1: once a heavy-hitter is identified, its bytes are not + * accounted in the array counters. This technique is called "shielding" + * in Section 3.3.1 of [EV02]. + * - Optimization O2: conservative update of counters + * (Section 3.3.2 of [EV02]), + * New counter value = max {old counter value, + * smallest counter value + packet bytes} + * + * Finally, we refresh the counters periodically since otherwise the counter + * values will keep accumulating. + * + * Once a flow is classified as heavy-hitter, we also save its per-flow state + * in an exact-matching flow table so that its subsequent packets can be + * dispatched to the heavy-hitter bucket accordingly. + * + * + * At a high level, this qdisc works as follows: + * Given a packet p: + * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching + * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter + * bucket. + * - Otherwise, forward p to the multi-stage filter, denoted filter F + * + If F decides that p belongs to a non-heavy-hitter flow, then send p + * to the non-heavy-hitter bucket. + * + Otherwise, if F decides that p belongs to a new heavy-hitter flow, + * then set up a new flow entry for the flow-id of p in the table T and + * send p to the heavy-hitter bucket. + * + * In this implementation: + * - T is a fixed-size hash-table with 1024 entries. Hash collision is + * resolved by linked-list chaining. + * - F has four counter arrays, each array containing 1024 32-bit counters. + * That means 4 * 1024 * 32 bits = 16KB of memory. + * - Since each array in F contains 1024 counters, 10 bits are sufficient to + * index into each array. + * Hence, instead of having four hash functions, we chop the 32-bit + * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is + * computed as XOR sum of those three chunks. + * - We need to clear the counter arrays periodically; however, directly + * memsetting 16KB of memory can lead to cache eviction and unwanted delay. + * So by representing each counter by a valid bit, we only need to reset + * 4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory. + * - The Deficit Round Robin engine is taken from fq_codel implementation + * (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to + * fq_codel_flow in fq_codel implementation. + * + */ + +/* Non-configurable parameters */ +#define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */ +#define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */ +#define HHF_ARRAYS_LEN 1024 /* number of counters in each array of F */ +#define HHF_BIT_MASK_LEN 10 /* masking 10 bits */ +#define HHF_BIT_MASK 0x3FF /* bitmask of 10 bits */ + +#define WDRR_BUCKET_CNT 2 /* two buckets for Weighted DRR */ +enum wdrr_bucket_idx { + WDRR_BUCKET_FOR_HH = 0, /* bucket id for heavy-hitters */ + WDRR_BUCKET_FOR_NON_HH = 1 /* bucket id for non-heavy-hitters */ +}; + +#define hhf_time_before(a, b) \ + (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0)) + +/* Heavy-hitter per-flow state */ +struct hh_flow_state { + u32 hash_id; /* hash of flow-id (e.g. TCP 5-tuple) */ + u32 hit_timestamp; /* last time heavy-hitter was seen */ + struct list_head flowchain; /* chaining under hash collision */ +}; + +/* Weighted Deficit Round Robin (WDRR) scheduler */ +struct wdrr_bucket { + struct sk_buff *head; + struct sk_buff *tail; + struct list_head bucketchain; + int deficit; +}; + +struct hhf_sched_data { + struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; + u32 perturbation; /* hash perturbation */ + u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ + u32 drop_overlimit; /* number of times max qdisc packet + * limit was hit + */ + struct list_head *hh_flows; /* table T (currently active HHs) */ + u32 hh_flows_limit; /* max active HH allocs */ + u32 hh_flows_overlimit; /* num of disallowed HH allocs */ + u32 hh_flows_total_cnt; /* total admitted HHs */ + u32 hh_flows_current_cnt; /* total current HHs */ + u32 *hhf_arrays[HHF_ARRAYS_CNT]; /* HH filter F */ + u32 hhf_arrays_reset_timestamp; /* last time hhf_arrays + * was reset + */ + unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT]; /* shadow valid bits + * of hhf_arrays + */ + /* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */ + struct list_head new_buckets; /* list of new buckets */ + struct list_head old_buckets; /* list of old buckets */ + + /* Configurable HHF parameters */ + u32 hhf_reset_timeout; /* interval to reset counter + * arrays in filter F + * (default 40ms) + */ + u32 hhf_admit_bytes; /* counter thresh to classify as + * HH (default 128KB). + * With these default values, + * 128KB / 40ms = 25 Mbps + * i.e., we expect to capture HHs + * sending > 25 Mbps. + */ + u32 hhf_evict_timeout; /* aging threshold to evict idle + * HHs out of table T. This should + * be large enough to avoid + * reordering during HH eviction. + * (default 1s) + */ + u32 hhf_non_hh_weight; /* WDRR weight for non-HHs + * (default 2, + * i.e., non-HH : HH = 2 : 1) + */ +}; + +static u32 hhf_time_stamp(void) +{ + return jiffies; +} + +static unsigned int skb_hash(const struct hhf_sched_data *q, + const struct sk_buff *skb) +{ + struct flow_keys keys; + unsigned int hash; + + if (skb->sk && skb->sk->sk_hash) + return skb->sk->sk_hash; + + skb_flow_dissect(skb, &keys); + hash = jhash_3words((__force u32)keys.dst, + (__force u32)keys.src ^ keys.ip_proto, + (__force u32)keys.ports, q->perturbation); + return hash; +} + +/* Looks up a heavy-hitter flow in a chaining list of table T. */ +static struct hh_flow_state *seek_list(const u32 hash, + struct list_head *head, + struct hhf_sched_data *q) +{ + struct hh_flow_state *flow, *next; + u32 now = hhf_time_stamp(); + + if (list_empty(head)) + return NULL; + + list_for_each_entry_safe(flow, next, head, flowchain) { + u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; + + if (hhf_time_before(prev, now)) { + /* Delete expired heavy-hitters, but preserve one entry + * to avoid kzalloc() when next time this slot is hit. + */ + if (list_is_last(&flow->flowchain, head)) + return NULL; + list_del(&flow->flowchain); + kfree(flow); + q->hh_flows_current_cnt--; + } else if (flow->hash_id == hash) { + return flow; + } + } + return NULL; +} + +/* Returns a flow state entry for a new heavy-hitter. Either reuses an expired + * entry or dynamically alloc a new entry. + */ +static struct hh_flow_state *alloc_new_hh(struct list_head *head, + struct hhf_sched_data *q) +{ + struct hh_flow_state *flow; + u32 now = hhf_time_stamp(); + + if (!list_empty(head)) { + /* Find an expired heavy-hitter flow entry. */ + list_for_each_entry(flow, head, flowchain) { + u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; + + if (hhf_time_before(prev, now)) + return flow; + } + } + + if (q->hh_flows_current_cnt >= q->hh_flows_limit) { + q->hh_flows_overlimit++; + return NULL; + } + /* Create new entry. */ + flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC); + if (!flow) + return NULL; + + q->hh_flows_current_cnt++; + INIT_LIST_HEAD(&flow->flowchain); + list_add_tail(&flow->flowchain, head); + + return flow; +} + +/* Assigns packets to WDRR buckets. Implements a multi-stage filter to + * classify heavy-hitters. + */ +static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + u32 tmp_hash, hash; + u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos; + struct hh_flow_state *flow; + u32 pkt_len, min_hhf_val; + int i; + u32 prev; + u32 now = hhf_time_stamp(); + + /* Reset the HHF counter arrays if this is the right time. */ + prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout; + if (hhf_time_before(prev, now)) { + for (i = 0; i < HHF_ARRAYS_CNT; i++) + bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN); + q->hhf_arrays_reset_timestamp = now; + } + + /* Get hashed flow-id of the skb. */ + hash = skb_hash(q, skb); + + /* Check if this packet belongs to an already established HH flow. */ + flow_pos = hash & HHF_BIT_MASK; + flow = seek_list(hash, &q->hh_flows[flow_pos], q); + if (flow) { /* found its HH flow */ + flow->hit_timestamp = now; + return WDRR_BUCKET_FOR_HH; + } + + /* Now pass the packet through the multi-stage filter. */ + tmp_hash = hash; + xorsum = 0; + for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) { + /* Split the skb_hash into three 10-bit chunks. */ + filter_pos[i] = tmp_hash & HHF_BIT_MASK; + xorsum ^= filter_pos[i]; + tmp_hash >>= HHF_BIT_MASK_LEN; + } + /* The last chunk is computed as XOR sum of other chunks. */ + filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash; + + pkt_len = qdisc_pkt_len(skb); + min_hhf_val = ~0U; + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + u32 val; + + if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) { + q->hhf_arrays[i][filter_pos[i]] = 0; + __set_bit(filter_pos[i], q->hhf_valid_bits[i]); + } + + val = q->hhf_arrays[i][filter_pos[i]] + pkt_len; + if (min_hhf_val > val) + min_hhf_val = val; + } + + /* Found a new HH iff all counter values > HH admit threshold. */ + if (min_hhf_val > q->hhf_admit_bytes) { + /* Just captured a new heavy-hitter. */ + flow = alloc_new_hh(&q->hh_flows[flow_pos], q); + if (!flow) /* memory alloc problem */ + return WDRR_BUCKET_FOR_NON_HH; + flow->hash_id = hash; + flow->hit_timestamp = now; + q->hh_flows_total_cnt++; + + /* By returning without updating counters in q->hhf_arrays, + * we implicitly implement "shielding" (see Optimization O1). + */ + return WDRR_BUCKET_FOR_HH; + } + + /* Conservative update of HHF arrays (see Optimization O2). */ + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val) + q->hhf_arrays[i][filter_pos[i]] = min_hhf_val; + } + return WDRR_BUCKET_FOR_NON_HH; +} + +/* Removes one skb from head of bucket. */ +static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket) +{ + struct sk_buff *skb = bucket->head; + + bucket->head = skb->next; + skb->next = NULL; + return skb; +} + +/* Tail-adds skb to bucket. */ +static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb) +{ + if (bucket->head == NULL) + bucket->head = skb; + else + bucket->tail->next = skb; + bucket->tail = skb; + skb->next = NULL; +} + +static unsigned int hhf_drop(struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct wdrr_bucket *bucket; + + /* Always try to drop from heavy-hitters first. */ + bucket = &q->buckets[WDRR_BUCKET_FOR_HH]; + if (!bucket->head) + bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH]; + + if (bucket->head) { + struct sk_buff *skb = dequeue_head(bucket); + + sch->q.qlen--; + sch->qstats.drops++; + sch->qstats.backlog -= qdisc_pkt_len(skb); + kfree_skb(skb); + } + + /* Return id of the bucket from which the packet was dropped. */ + return bucket - q->buckets; +} + +static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + enum wdrr_bucket_idx idx; + struct wdrr_bucket *bucket; + + idx = hhf_classify(skb, sch); + + bucket = &q->buckets[idx]; + bucket_add(bucket, skb); + sch->qstats.backlog += qdisc_pkt_len(skb); + + if (list_empty(&bucket->bucketchain)) { + unsigned int weight; + + /* The logic of new_buckets vs. old_buckets is the same as + * new_flows vs. old_flows in the implementation of fq_codel, + * i.e., short bursts of non-HHs should have strict priority. + */ + if (idx == WDRR_BUCKET_FOR_HH) { + /* Always move heavy-hitters to old bucket. */ + weight = 1; + list_add_tail(&bucket->bucketchain, &q->old_buckets); + } else { + weight = q->hhf_non_hh_weight; + list_add_tail(&bucket->bucketchain, &q->new_buckets); + } + bucket->deficit = weight * q->quantum; + } + if (++sch->q.qlen < sch->limit) + return NET_XMIT_SUCCESS; + + q->drop_overlimit++; + /* Return Congestion Notification only if we dropped a packet from this + * bucket. + */ + if (hhf_drop(sch) == idx) + return NET_XMIT_CN; + + /* As we dropped a packet, better let upper stack know this. */ + qdisc_tree_decrease_qlen(sch, 1); + return NET_XMIT_SUCCESS; +} + +static struct sk_buff *hhf_dequeue(struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb = NULL; + struct wdrr_bucket *bucket; + struct list_head *head; + +begin: + head = &q->new_buckets; + if (list_empty(head)) { + head = &q->old_buckets; + if (list_empty(head)) + return NULL; + } + bucket = list_first_entry(head, struct wdrr_bucket, bucketchain); + + if (bucket->deficit <= 0) { + int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ? + 1 : q->hhf_non_hh_weight; + + bucket->deficit += weight * q->quantum; + list_move_tail(&bucket->bucketchain, &q->old_buckets); + goto begin; + } + + if (bucket->head) { + skb = dequeue_head(bucket); + sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + } + + if (!skb) { + /* Force a pass through old_buckets to prevent starvation. */ + if ((head == &q->new_buckets) && !list_empty(&q->old_buckets)) + list_move_tail(&bucket->bucketchain, &q->old_buckets); + else + list_del_init(&bucket->bucketchain); + goto begin; + } + qdisc_bstats_update(sch, skb); + bucket->deficit -= qdisc_pkt_len(skb); + + return skb; +} + +static void hhf_reset(struct Qdisc *sch) +{ + struct sk_buff *skb; + + while ((skb = hhf_dequeue(sch)) != NULL) + kfree_skb(skb); +} + +static void *hhf_zalloc(size_t sz) +{ + void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); + + if (!ptr) + ptr = vzalloc(sz); + + return ptr; +} + +static void hhf_free(void *addr) +{ + if (addr) { + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); + } +} + +static void hhf_destroy(struct Qdisc *sch) +{ + int i; + struct hhf_sched_data *q = qdisc_priv(sch); + + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + hhf_free(q->hhf_arrays[i]); + hhf_free(q->hhf_valid_bits[i]); + } + + for (i = 0; i < HH_FLOWS_CNT; i++) { + struct hh_flow_state *flow, *next; + struct list_head *head = &q->hh_flows[i]; + + if (list_empty(head)) + continue; + list_for_each_entry_safe(flow, next, head, flowchain) { + list_del(&flow->flowchain); + kfree(flow); + } + } + hhf_free(q->hh_flows); +} + +static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { + [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 }, + [TCA_HHF_QUANTUM] = { .type = NLA_U32 }, + [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 }, + [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 }, + [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 }, + [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 }, + [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 }, +}; + +static int hhf_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_HHF_MAX + 1]; + unsigned int qlen; + int err; + u64 non_hh_quantum; + u32 new_quantum = q->quantum; + u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy); + if (err < 0) + return err; + + sch_tree_lock(sch); + + if (tb[TCA_HHF_BACKLOG_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); + + if (tb[TCA_HHF_QUANTUM]) + new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]); + + if (tb[TCA_HHF_NON_HH_WEIGHT]) + new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]); + + non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; + if (non_hh_quantum > INT_MAX) + return -EINVAL; + q->quantum = new_quantum; + q->hhf_non_hh_weight = new_hhf_non_hh_weight; + + if (tb[TCA_HHF_HH_FLOWS_LIMIT]) + q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]); + + if (tb[TCA_HHF_RESET_TIMEOUT]) { + u32 ms = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]); + + q->hhf_reset_timeout = msecs_to_jiffies(ms); + } + + if (tb[TCA_HHF_ADMIT_BYTES]) + q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]); + + if (tb[TCA_HHF_EVICT_TIMEOUT]) { + u32 ms = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]); + + q->hhf_evict_timeout = msecs_to_jiffies(ms); + } + + qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = hhf_dequeue(sch); + + kfree_skb(skb); + } + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + + sch_tree_unlock(sch); + return 0; +} + +static int hhf_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + int i; + + sch->limit = 1000; + q->quantum = psched_mtu(qdisc_dev(sch)); + q->perturbation = net_random(); + INIT_LIST_HEAD(&q->new_buckets); + INIT_LIST_HEAD(&q->old_buckets); + + /* Configurable HHF parameters */ + q->hhf_reset_timeout = HZ / 25; /* 40 ms */ + q->hhf_admit_bytes = 131072; /* 128 KB */ + q->hhf_evict_timeout = HZ; /* 1 sec */ + q->hhf_non_hh_weight = 2; + + if (opt) { + int err = hhf_change(sch, opt); + + if (err) + return err; + } + + if (!q->hh_flows) { + /* Initialize heavy-hitter flow table. */ + q->hh_flows = hhf_zalloc(HH_FLOWS_CNT * + sizeof(struct list_head)); + if (!q->hh_flows) + return -ENOMEM; + for (i = 0; i < HH_FLOWS_CNT; i++) + INIT_LIST_HEAD(&q->hh_flows[i]); + + /* Cap max active HHs at twice len of hh_flows table. */ + q->hh_flows_limit = 2 * HH_FLOWS_CNT; + q->hh_flows_overlimit = 0; + q->hh_flows_total_cnt = 0; + q->hh_flows_current_cnt = 0; + + /* Initialize heavy-hitter filter arrays. */ + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * + sizeof(u32)); + if (!q->hhf_arrays[i]) { + hhf_destroy(sch); + return -ENOMEM; + } + } + q->hhf_arrays_reset_timestamp = hhf_time_stamp(); + + /* Initialize valid bits of heavy-hitter filter arrays. */ + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / + BITS_PER_BYTE); + if (!q->hhf_valid_bits[i]) { + hhf_destroy(sch); + return -ENOMEM; + } + } + + /* Initialize Weighted DRR buckets. */ + for (i = 0; i < WDRR_BUCKET_CNT; i++) { + struct wdrr_bucket *bucket = q->buckets + i; + + INIT_LIST_HEAD(&bucket->bucketchain); + } + } + + return 0; +} + +static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) || + nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) || + nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) || + nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT, + jiffies_to_msecs(q->hhf_reset_timeout)) || + nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) || + nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT, + jiffies_to_msecs(q->hhf_evict_timeout)) || + nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight)) + goto nla_put_failure; + + nla_nest_end(skb, opts); + return skb->len; + +nla_put_failure: + return -1; +} + +static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct tc_hhf_xstats st = { + .drop_overlimit = q->drop_overlimit, + .hh_overlimit = q->hh_flows_overlimit, + .hh_tot_count = q->hh_flows_total_cnt, + .hh_cur_count = q->hh_flows_current_cnt, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { + .id = "hhf", + .priv_size = sizeof(struct hhf_sched_data), + + .enqueue = hhf_enqueue, + .dequeue = hhf_dequeue, + .peek = qdisc_peek_dequeued, + .drop = hhf_drop, + .init = hhf_init, + .reset = hhf_reset, + .destroy = hhf_destroy, + .change = hhf_change, + .dump = hhf_dump, + .dump_stats = hhf_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init hhf_module_init(void) +{ + return register_qdisc(&hhf_qdisc_ops); +} + +static void __exit hhf_module_exit(void) +{ + unregister_qdisc(&hhf_qdisc_ops); +} + +module_init(hhf_module_init) +module_exit(hhf_module_exit) +MODULE_AUTHOR("Terry Lam"); +MODULE_AUTHOR("Nandita Dukkipati"); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0e1e38b..0db5a6e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -712,7 +712,7 @@ static s64 htb_do_events(struct htb_sched *q, const int level, /* too much load - let's continue after a break for scheduling */ if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { - pr_warning("htb: too many events!\n"); + pr_warn("htb: too many events!\n"); q->warned |= HTB_WARN_TOOMANYEVENTS; } @@ -1276,9 +1276,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) struct Qdisc *new_q = NULL; int last_child = 0; - // TODO: why don't allow to delete subtree ? references ? does - // tc subsys quarantee us that in htb_destroy it holds no class - // refs so that we can remove children safely there ? + /* TODO: why don't allow to delete subtree ? references ? does + * tc subsys guarantee us that in htb_destroy it holds no class + * refs so that we can remove children safely there ? + */ if (cl->children || cl->filter_cnt) return -EBUSY; @@ -1337,7 +1338,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; - struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; u64 rate64, ceil64; @@ -1361,16 +1361,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; /* Keeping backward compatible with rate_table based iproute2 tc */ - if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) { - rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); - if (rtab) - qdisc_put_rtab(rtab); - } - if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) { - ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); - if (ctab) - qdisc_put_rtab(ctab); - } + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB])); + + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB])); if (!cl) { /* new class */ struct Qdisc *new_q; @@ -1477,21 +1472,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_lock(sch); } + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + + psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); + /* it used to be a nasty bug here, we have to check that node * is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->quantum = hopt->rate.rate / q->rate2quantum; + u64 quantum = cl->rate.rate_bytes_ps; + + do_div(quantum, q->rate2quantum); + cl->quantum = min_t(u64, quantum, INT_MAX); + if (!hopt->quantum && cl->quantum < 1000) { - pr_warning( - "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->common.classid); + pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n", + cl->common.classid); cl->quantum = 1000; } if (!hopt->quantum && cl->quantum > 200000) { - pr_warning( - "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->common.classid); + pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n", + cl->common.classid); cl->quantum = 200000; } if (hopt->quantum) @@ -1500,13 +1504,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; - - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; - - psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); - psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); - cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 2e56185..a8b2864 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -78,14 +78,19 @@ static void mq_attach(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); - struct Qdisc *qdisc; + struct Qdisc *qdisc, *old; unsigned int ntx; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = priv->qdiscs[ntx]; - qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (qdisc) - qdisc_destroy(qdisc); + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (old) + qdisc_destroy(old); +#ifdef CONFIG_NET_SCHED + if (ntx < dev->real_num_tx_queues) + qdisc_list_add(qdisc); +#endif + } kfree(priv->qdiscs); priv->qdiscs = NULL; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index d44c868..6749e2f 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -167,15 +167,17 @@ static void mqprio_attach(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - struct Qdisc *qdisc; + struct Qdisc *qdisc, *old; unsigned int ntx; /* Attach underlying qdisc */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = priv->qdiscs[ntx]; - qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (qdisc) - qdisc_destroy(qdisc); + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (old) + qdisc_destroy(old); + if (ntx < dev->real_num_tx_queues) + qdisc_list_add(qdisc); } kfree(priv->qdiscs); priv->qdiscs = NULL; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 2a2b096..afb050a 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: Alexander Duyck <alexander.h.duyck@intel.com> */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 75c94e5..090a4e3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -88,7 +88,7 @@ struct netem_sched_data { u32 duplicate; u32 reorder; u32 corrupt; - u32 rate; + u64 rate; s32 packet_overhead; u32 cell_size; u32 cell_size_reciprocal; @@ -215,10 +215,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = 4; return true; - } else if (clg->a4 < rnd && rnd < clg->a1) { + } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { clg->state = 3; return true; - } else if (clg->a1 < rnd) + } else if (clg->a1 + clg->a4 < rnd) clg->state = 1; break; @@ -268,10 +268,11 @@ static bool loss_gilb_ell(struct netem_sched_data *q) clg->state = 2; if (net_random() < clg->a4) return true; + break; case 2: if (net_random() < clg->a2) clg->state = 1; - if (clg->a3 > net_random()) + if (net_random() > clg->a3) return true; } @@ -494,7 +495,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = netem_skb_cb(last)->time_to_send; } - delay += packet_len_2_sched_time(skb->len, q); + delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); } cb->time_to_send = now + delay; @@ -728,7 +729,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) nla_for_each_nested(la, attr, rem) { u16 type = nla_type(la); - switch(type) { + switch (type) { case NETEM_LOSS_GI: { const struct tc_netem_gimodel *gi = nla_data(la); @@ -781,6 +782,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, + [TCA_NETEM_RATE64] = { .type = NLA_U64 }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -851,6 +853,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_RATE]) get_rate(sch, tb[TCA_NETEM_RATE]); + if (tb[TCA_NETEM_RATE64]) + q->rate = max_t(u64, q->rate, + nla_get_u64(tb[TCA_NETEM_RATE64])); + if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); @@ -973,7 +979,13 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) goto nla_put_failure; - rate.rate = q->rate; + if (q->rate >= (1ULL << 32)) { + if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate)) + goto nla_put_failure; + rate.rate = ~0U; + } else { + rate.rate = q->rate; + } rate.packet_overhead = q->packet_overhead; rate.cell_size = q->cell_size; rate.cell_overhead = q->cell_overhead; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c new file mode 100644 index 0000000..fe65340 --- /dev/null +++ b/net/sched/sch_pie.c @@ -0,0 +1,555 @@ +/* Copyright (C) 2013 Cisco Systems, Inc, 2013. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Vijay Subramanian <vijaynsu@cisco.com> + * Author: Mythili Prabhu <mysuryan@cisco.com> + * + * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no> + * University of Oslo, Norway. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> + +#define QUEUE_THRESHOLD 10000 +#define DQCOUNT_INVALID -1 +#define MAX_PROB 0xffffffff +#define PIE_SCALE 8 + +/* parameters used */ +struct pie_params { + psched_time_t target; /* user specified target delay in pschedtime */ + u32 tupdate; /* timer frequency (in jiffies) */ + u32 limit; /* number of packets that can be enqueued */ + u32 alpha; /* alpha and beta are between -4 and 4 */ + u32 beta; /* and are used for shift relative to 1 */ + bool ecn; /* true if ecn is enabled */ + bool bytemode; /* to scale drop early prob based on pkt size */ +}; + +/* variables used */ +struct pie_vars { + u32 prob; /* probability but scaled by u32 limit. */ + psched_time_t burst_time; + psched_time_t qdelay; + psched_time_t qdelay_old; + u64 dq_count; /* measured in bytes */ + psched_time_t dq_tstamp; /* drain rate */ + u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */ + u32 qlen_old; /* in bytes */ +}; + +/* statistics gathering */ +struct pie_stats { + u32 packets_in; /* total number of packets enqueued */ + u32 dropped; /* packets dropped due to pie_action */ + u32 overlimit; /* dropped due to lack of space in queue */ + u32 maxq; /* maximum queue size */ + u32 ecn_mark; /* packets marked with ECN */ +}; + +/* private data for the Qdisc */ +struct pie_sched_data { + struct pie_params params; + struct pie_vars vars; + struct pie_stats stats; + struct timer_list adapt_timer; +}; + +static void pie_params_init(struct pie_params *params) +{ + params->alpha = 2; + params->beta = 20; + params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */ + params->limit = 1000; /* default of 1000 packets */ + params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */ + params->ecn = false; + params->bytemode = false; +} + +static void pie_vars_init(struct pie_vars *vars) +{ + vars->dq_count = DQCOUNT_INVALID; + vars->avg_dq_rate = 0; + /* default of 100 ms in pschedtime */ + vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC); +} + +static bool drop_early(struct Qdisc *sch, u32 packet_size) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 rnd; + u32 local_prob = q->vars.prob; + u32 mtu = psched_mtu(qdisc_dev(sch)); + + /* If there is still burst allowance left skip random early drop */ + if (q->vars.burst_time > 0) + return false; + + /* If current delay is less than half of target, and + * if drop prob is low already, disable early_drop + */ + if ((q->vars.qdelay < q->params.target / 2) + && (q->vars.prob < MAX_PROB / 5)) + return false; + + /* If we have fewer than 2 mtu-sized packets, disable drop_early, + * similar to min_th in RED + */ + if (sch->qstats.backlog < 2 * mtu) + return false; + + /* If bytemode is turned on, use packet size to compute new + * probablity. Smaller packets will have lower drop prob in this case + */ + if (q->params.bytemode && packet_size <= mtu) + local_prob = (local_prob / mtu) * packet_size; + else + local_prob = q->vars.prob; + + rnd = net_random(); + if (rnd < local_prob) + return true; + + return false; +} + +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + bool enqueue = false; + + if (unlikely(qdisc_qlen(sch) >= sch->limit)) { + q->stats.overlimit++; + goto out; + } + + if (!drop_early(sch, skb->len)) { + enqueue = true; + } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) && + INET_ECN_set_ce(skb)) { + /* If packet is ecn capable, mark it if drop probability + * is lower than 10%, else drop it. + */ + q->stats.ecn_mark++; + enqueue = true; + } + + /* we can enqueue the packet */ + if (enqueue) { + q->stats.packets_in++; + if (qdisc_qlen(sch) > q->stats.maxq) + q->stats.maxq = qdisc_qlen(sch); + + return qdisc_enqueue_tail(skb, sch); + } + +out: + q->stats.dropped++; + return qdisc_drop(skb, sch); +} + +static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { + [TCA_PIE_TARGET] = {.type = NLA_U32}, + [TCA_PIE_LIMIT] = {.type = NLA_U32}, + [TCA_PIE_TUPDATE] = {.type = NLA_U32}, + [TCA_PIE_ALPHA] = {.type = NLA_U32}, + [TCA_PIE_BETA] = {.type = NLA_U32}, + [TCA_PIE_ECN] = {.type = NLA_U32}, + [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, +}; + +static int pie_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_PIE_MAX + 1]; + unsigned int qlen; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy); + if (err < 0) + return err; + + sch_tree_lock(sch); + + /* convert from microseconds to pschedtime */ + if (tb[TCA_PIE_TARGET]) { + /* target is in us */ + u32 target = nla_get_u32(tb[TCA_PIE_TARGET]); + + /* convert to pschedtime */ + q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); + } + + /* tupdate is in jiffies */ + if (tb[TCA_PIE_TUPDATE]) + q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); + + if (tb[TCA_PIE_LIMIT]) { + u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); + + q->params.limit = limit; + sch->limit = limit; + } + + if (tb[TCA_PIE_ALPHA]) + q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]); + + if (tb[TCA_PIE_BETA]) + q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]); + + if (tb[TCA_PIE_ECN]) + q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]); + + if (tb[TCA_PIE_BYTEMODE]) + q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); + + /* Drop excess packets if new limit is lower */ + qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = __skb_dequeue(&sch->q); + + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_drop(skb, sch); + } + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + + sch_tree_unlock(sch); + return 0; +} + +static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) +{ + + struct pie_sched_data *q = qdisc_priv(sch); + int qlen = sch->qstats.backlog; /* current queue size in bytes */ + + /* If current queue is about 10 packets or more and dq_count is unset + * we have enough packets to calculate the drain rate. Save + * current time as dq_tstamp and start measurement cycle. + */ + if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) { + q->vars.dq_tstamp = psched_get_time(); + q->vars.dq_count = 0; + } + + /* Calculate the average drain rate from this value. If queue length + * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset + * the dq_count to -1 as we don't have enough packets to calculate the + * drain rate anymore The following if block is entered only when we + * have a substantial queue built up (QUEUE_THRESHOLD bytes or more) + * and we calculate the drain rate for the threshold here. dq_count is + * in bytes, time difference in psched_time, hence rate is in + * bytes/psched_time. + */ + if (q->vars.dq_count != DQCOUNT_INVALID) { + q->vars.dq_count += skb->len; + + if (q->vars.dq_count >= QUEUE_THRESHOLD) { + psched_time_t now = psched_get_time(); + u32 dtime = now - q->vars.dq_tstamp; + u32 count = q->vars.dq_count << PIE_SCALE; + + if (dtime == 0) + return; + + count = count / dtime; + + if (q->vars.avg_dq_rate == 0) + q->vars.avg_dq_rate = count; + else + q->vars.avg_dq_rate = + (q->vars.avg_dq_rate - + (q->vars.avg_dq_rate >> 3)) + (count >> 3); + + /* If the queue has receded below the threshold, we hold + * on to the last drain rate calculated, else we reset + * dq_count to 0 to re-enter the if block when the next + * packet is dequeued + */ + if (qlen < QUEUE_THRESHOLD) + q->vars.dq_count = DQCOUNT_INVALID; + else { + q->vars.dq_count = 0; + q->vars.dq_tstamp = psched_get_time(); + } + + if (q->vars.burst_time > 0) { + if (q->vars.burst_time > dtime) + q->vars.burst_time -= dtime; + else + q->vars.burst_time = 0; + } + } + } +} + +static void calculate_probability(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 qlen = sch->qstats.backlog; /* queue size in bytes */ + psched_time_t qdelay = 0; /* in pschedtime */ + psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */ + s32 delta = 0; /* determines the change in probability */ + u32 oldprob; + u32 alpha, beta; + bool update_prob = true; + + q->vars.qdelay_old = q->vars.qdelay; + + if (q->vars.avg_dq_rate > 0) + qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate; + else + qdelay = 0; + + /* If qdelay is zero and qlen is not, it means qlen is very small, less + * than dequeue_rate, so we do not update probabilty in this round + */ + if (qdelay == 0 && qlen != 0) + update_prob = false; + + /* Add ranges for alpha and beta, more aggressive for high dropping + * mode and gentle steps for light dropping mode + * In light dropping mode, take gentle steps; in medium dropping mode, + * take medium steps; in high dropping mode, take big steps. + */ + if (q->vars.prob < MAX_PROB / 100) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + } else if (q->vars.prob < MAX_PROB / 10) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + } else { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + } + + /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ + delta += alpha * ((qdelay - q->params.target)); + delta += beta * ((qdelay - qdelay_old)); + + oldprob = q->vars.prob; + + /* to ensure we increase probability in steps of no more than 2% */ + if (delta > (s32) (MAX_PROB / (100 / 2)) && + q->vars.prob >= MAX_PROB / 10) + delta = (MAX_PROB / 100) * 2; + + /* Non-linear drop: + * Tune drop probability to increase quickly for high delays(>= 250ms) + * 250ms is derived through experiments and provides error protection + */ + + if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) + delta += MAX_PROB / (100 / 2); + + q->vars.prob += delta; + + if (delta > 0) { + /* prevent overflow */ + if (q->vars.prob < oldprob) { + q->vars.prob = MAX_PROB; + /* Prevent normalization error. If probability is at + * maximum value already, we normalize it here, and + * skip the check to do a non-linear drop in the next + * section. + */ + update_prob = false; + } + } else { + /* prevent underflow */ + if (q->vars.prob > oldprob) + q->vars.prob = 0; + } + + /* Non-linear drop in probability: Reduce drop probability quickly if + * delay is 0 for 2 consecutive Tupdate periods. + */ + + if ((qdelay == 0) && (qdelay_old == 0) && update_prob) + q->vars.prob = (q->vars.prob * 98) / 100; + + q->vars.qdelay = qdelay; + q->vars.qlen_old = qlen; + + /* We restart the measurement cycle if the following conditions are met + * 1. If the delay has been low for 2 consecutive Tupdate periods + * 2. Calculated drop probability is zero + * 3. We have atleast one estimate for the avg_dq_rate ie., + * is a non-zero value + */ + if ((q->vars.qdelay < q->params.target / 2) && + (q->vars.qdelay_old < q->params.target / 2) && + (q->vars.prob == 0) && + (q->vars.avg_dq_rate > 0)) + pie_vars_init(&q->vars); +} + +static void pie_timer(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc *)arg; + struct pie_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + + spin_lock(root_lock); + calculate_probability(sch); + + /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */ + if (q->params.tupdate) + mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); + spin_unlock(root_lock); + +} + +static int pie_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + + pie_params_init(&q->params); + pie_vars_init(&q->vars); + sch->limit = q->params.limit; + + setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); + mod_timer(&q->adapt_timer, jiffies + HZ / 2); + + if (opt) { + int err = pie_change(sch, opt); + + if (err) + return err; + } + + return 0; +} + +static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + /* convert target from pschedtime to us */ + if (nla_put_u32(skb, TCA_PIE_TARGET, + ((u32) PSCHED_TICKS2NS(q->params.target)) / + NSEC_PER_USEC) || + nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || + nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) || + nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || + nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || + nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || + nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode)) + goto nla_put_failure; + + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -1; + +} + +static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct tc_pie_xstats st = { + .prob = q->vars.prob, + .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) / + NSEC_PER_USEC, + /* unscale and return dq_rate in bytes per sec */ + .avg_dq_rate = q->vars.avg_dq_rate * + (PSCHED_TICKS_PER_SEC) >> PIE_SCALE, + .packets_in = q->stats.packets_in, + .overlimit = q->stats.overlimit, + .maxq = q->stats.maxq, + .dropped = q->stats.dropped, + .ecn_mark = q->stats.ecn_mark, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) +{ + struct sk_buff *skb; + skb = __qdisc_dequeue_head(sch, &sch->q); + + if (!skb) + return NULL; + + pie_process_dequeue(sch, skb); + return skb; +} + +static void pie_reset(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + qdisc_reset_queue(sch); + pie_vars_init(&q->vars); +} + +static void pie_destroy(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + q->params.tupdate = 0; + del_timer_sync(&q->adapt_timer); +} + +static struct Qdisc_ops pie_qdisc_ops __read_mostly = { + .id = "pie", + .priv_size = sizeof(struct pie_sched_data), + .enqueue = pie_qdisc_enqueue, + .dequeue = pie_qdisc_dequeue, + .peek = qdisc_peek_dequeued, + .init = pie_init, + .destroy = pie_destroy, + .reset = pie_reset, + .change = pie_change, + .dump = pie_dump, + .dump_stats = pie_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init pie_module_init(void) +{ + return register_qdisc(&pie_qdisc_ops); +} + +static void __exit pie_module_exit(void) +{ + unregister_qdisc(&pie_qdisc_ops); +} + +module_init(pie_module_init); +module_exit(pie_module_exit); + +MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler"); +MODULE_AUTHOR("Vijay Subramanian"); +MODULE_AUTHOR("Mythili Prabhu"); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index d3a1bc2..76f01e0 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -237,10 +237,12 @@ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) } #define sfq_unlink(q, x, n, p) \ - n = q->slots[x].dep.next; \ - p = q->slots[x].dep.prev; \ - sfq_dep_head(q, p)->next = n; \ - sfq_dep_head(q, n)->prev = p + do { \ + n = q->slots[x].dep.next; \ + p = q->slots[x].dep.prev; \ + sfq_dep_head(q, p)->next = n; \ + sfq_dep_head(q, n)->prev = p; \ + } while (0) static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 68f9859..fbba5b0 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,6 +21,7 @@ #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> +#include <net/tcp.h> /* Simple Token Bucket Filter. @@ -117,6 +118,48 @@ struct tbf_sched_data { }; +/* Time to Length, convert time in ns to length in bytes + * to determinate how many bytes can be sent in given time. + */ +static u64 psched_ns_t2l(const struct psched_ratecfg *r, + u64 time_in_ns) +{ + /* The formula is : + * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC + */ + u64 len = time_in_ns * r->rate_bytes_ps; + + do_div(len, NSEC_PER_SEC); + + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { + do_div(len, 53); + len = len * 48; + } + + if (len > r->overhead) + len -= r->overhead; + else + len = 0; + + return len; +} + +/* + * Return length of individual segments of a gso packet, + * including all headers (MAC, IP, TCP/UDP) + */ +static unsigned int skb_gso_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -136,12 +179,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) while (segs) { nskb = segs->next; segs->next = NULL; - if (likely(segs->len <= q->max_size)) { - qdisc_skb_cb(segs)->pkt_len = segs->len; - ret = qdisc_enqueue(segs, q->qdisc); - } else { - ret = qdisc_reshape_fail(skb, sch); - } + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) sch->qstats.drops++; @@ -163,7 +202,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb)) + if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } @@ -268,6 +307,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_RATE64] = { .type = NLA_U64 }, [TCA_TBF_PRATE64] = { .type = NLA_U64 }, + [TCA_TBF_BURST] = { .type = NLA_U32 }, + [TCA_TBF_PBURST] = { .type = NLA_U32 }, }; static int tbf_change(struct Qdisc *sch, struct nlattr *opt) @@ -276,10 +317,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; - struct qdisc_rate_table *rtab = NULL; - struct qdisc_rate_table *ptab = NULL; struct Qdisc *child = NULL; - int max_size, n; + struct psched_ratecfg rate; + struct psched_ratecfg peak; + u64 max_size; + s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); @@ -291,33 +333,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) goto done; qopt = nla_data(tb[TCA_TBF_PARMS]); - rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); - if (rtab == NULL) - goto done; + if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, + tb[TCA_TBF_RTAB])); - if (qopt->peakrate.rate) { - if (qopt->peakrate.rate > qopt->rate.rate) - ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); - if (ptab == NULL) - goto done; - } - - for (n = 0; n < 256; n++) - if (rtab->data[n] > qopt->buffer) - break; - max_size = (n << qopt->rate.cell_log) - 1; - if (ptab) { - int size; - - for (n = 0; n < 256; n++) - if (ptab->data[n] > qopt->mtu) - break; - size = (n << qopt->peakrate.cell_log) - 1; - if (size < max_size) - max_size = size; - } - if (max_size < 0) - goto done; + if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, + tb[TCA_TBF_PTAB])); if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); @@ -331,6 +353,50 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) } } + buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); + mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); + + if (tb[TCA_TBF_RATE64]) + rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); + psched_ratecfg_precompute(&rate, &qopt->rate, rate64); + + if (tb[TCA_TBF_BURST]) { + max_size = nla_get_u32(tb[TCA_TBF_BURST]); + buffer = psched_l2t_ns(&rate, max_size); + } else { + max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + } + + if (qopt->peakrate.rate) { + if (tb[TCA_TBF_PRATE64]) + prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); + psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); + if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { + pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", + peak.rate_bytes_ps, rate.rate_bytes_ps); + err = -EINVAL; + goto done; + } + + if (tb[TCA_TBF_PBURST]) { + u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); + max_size = min_t(u32, max_size, pburst); + mtu = psched_l2t_ns(&peak, pburst); + } else { + max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + } + } + + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + + if (!max_size) { + err = -EINVAL; + goto done; + } + sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); @@ -338,19 +404,21 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc = child; } q->limit = qopt->limit; - q->mtu = PSCHED_TICKS2NS(qopt->mtu); + if (tb[TCA_TBF_PBURST]) + q->mtu = mtu; + else + q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; - q->buffer = PSCHED_TICKS2NS(qopt->buffer); + if (tb[TCA_TBF_BURST]) + q->buffer = buffer; + else + q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; - if (tb[TCA_TBF_RATE64]) - rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); - psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); - if (ptab) { - if (tb[TCA_TBF_PRATE64]) - prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); - psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64); + memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); + if (qopt->peakrate.rate) { + memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); q->peak_present = true; } else { q->peak_present = false; @@ -359,10 +427,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_unlock(sch); err = 0; done: - if (rtab) - qdisc_put_rtab(rtab); - if (ptab) - qdisc_put_rtab(ptab); return err; } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 68a27f9..5ae6092 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -90,14 +89,12 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); - asoc->base.dead = false; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); asoc->state = SCTP_STATE_CLOSED; asoc->cookie_life = ms_to_ktime(sp->assocparams.sasoc_cookie_life); - asoc->frag_point = 0; asoc->user_frag = sp->user_frag; /* Set the association max_retrans and RTO values from the @@ -110,8 +107,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min); - asoc->overall_error_count = 0; - /* Initialize the association's heartbeat interval based on the * sock configured value. */ @@ -132,18 +127,15 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a */ asoc->param_flags = sp->param_flags; - /* Initialize the maximum mumber of new data packets that can be sent + /* Initialize the maximum number of new data packets that can be sent * in a burst. */ asoc->max_burst = sp->max_burst; /* initialize association timers */ - asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = asoc->rto_initial; asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = asoc->rto_initial; asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = asoc->rto_initial; - asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] = 0; - asoc->timeouts[SCTP_EVENT_TIMEOUT_T4_RTO] = 0; /* sctpimpguide Section 2.12.2 * If the 'T5-shutdown-guard' timer is used, it SHOULD be set to the @@ -152,10 +144,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] = 5 * asoc->rto_max; - asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -173,11 +163,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->max_init_timeo = msecs_to_jiffies(sp->initmsg.sinit_max_init_timeo); - /* Allocate storage for the ssnmap after the inbound and outbound - * streams have been negotiated during Init. - */ - asoc->ssnmap = NULL; - /* Set the local window size for receive. * This is also the rcvbuf space per association. * RFC 6 - A SCTP receiver MUST be able to receive a minimum of @@ -190,25 +175,15 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->a_rwnd = asoc->rwnd; - asoc->rwnd_over = 0; - asoc->rwnd_press = 0; - /* Use my own max window until I learn something better. */ asoc->peer.rwnd = SCTP_DEFAULT_MAXWINDOW; - /* Set the sndbuf size for transmit. */ - asoc->sndbuf_used = 0; - /* Initialize the receive memory counter */ atomic_set(&asoc->rmem_alloc, 0); init_waitqueue_head(&asoc->wait); asoc->c.my_vtag = sctp_generate_tag(ep); - asoc->peer.i.init_tag = 0; /* INIT needs a vtag of 0. */ - asoc->c.peer_vtag = 0; - asoc->c.my_ttag = 0; - asoc->c.peer_ttag = 0; asoc->c.my_port = ep->base.bind_addr.port; asoc->c.initial_tsn = sctp_generate_tsn(ep); @@ -219,7 +194,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->adv_peer_ack_point = asoc->ctsn_ack_point; asoc->highest_sacked = asoc->ctsn_ack_point; asoc->last_cwr_tsn = asoc->ctsn_ack_point; - asoc->unack_data = 0; /* ADDIP Section 4.1 Asconf Chunk Procedures * @@ -238,7 +212,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Make an empty list of remote transport addresses. */ INIT_LIST_HEAD(&asoc->peer.transport_addr_list); - asoc->peer.transport_count = 0; /* RFC 2960 5.1 Normal Establishment of an Association * @@ -252,20 +225,15 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * already received one packet.] */ asoc->peer.sack_needed = 1; - asoc->peer.sack_cnt = 0; asoc->peer.sack_generation = 1; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. - * The sctp_addip_noauth option is there for backward compatibilty + * The sctp_addip_noauth option is there for backward compatibility * and will revert old behavior. */ - asoc->peer.asconf_capable = 0; if (net->sctp.addip_noauth) asoc->peer.asconf_capable = 1; - asoc->asconf_addr_del_pending = NULL; - asoc->src_out_of_asoc_ok = 0; - asoc->new_transport = NULL; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); @@ -277,12 +245,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - memset(&asoc->peer.tsn_map, 0, sizeof(struct sctp_tsnmap)); - - asoc->need_ecne = 0; - - asoc->assoc_id = 0; - /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -291,8 +253,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.ipv6_address = 1; INIT_LIST_HEAD(&asoc->asocs); - asoc->autoclose = sp->autoclose; - asoc->default_stream = sp->default_stream; asoc->default_ppid = sp->default_ppid; asoc->default_flags = sp->default_flags; @@ -300,9 +260,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->default_timetolive = sp->default_timetolive; asoc->default_rcv_context = sp->default_rcv_context; - /* SCTP_GET_ASSOC_STATS COUNTERS */ - memset(&asoc->stats, 0, sizeof(struct sctp_priv_assoc_stats)); - /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp); @@ -310,9 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a goto fail_init; asoc->active_key_id = ep->active_key_id; - asoc->asoc_shared_key = NULL; - asoc->default_hmac_id = 0; /* Save the hmacs and chunks list into this association */ if (ep->auth_hmacs_list) memcpy(asoc->c.auth_hmacs, ep->auth_hmacs_list, @@ -997,17 +952,13 @@ int sctp_cmp_addr_exact(const union sctp_addr *ss1, */ struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc) { - struct sctp_chunk *chunk; + if (!asoc->need_ecne) + return NULL; /* Send ECNE if needed. * Not being able to allocate a chunk here is not deadly. */ - if (asoc->need_ecne) - chunk = sctp_make_ecne(asoc, asoc->last_ecne_tsn); - else - chunk = NULL; - - return chunk; + return sctp_make_ecne(asoc, asoc->last_ecne_tsn); } /* @@ -1268,7 +1219,7 @@ void sctp_assoc_update(struct sctp_association *asoc, } } - /* SCTP-AUTH: Save the peer parameters from the new assocaitions + /* SCTP-AUTH: Save the peer parameters from the new associations * and also move the association shared keys over */ kfree(asoc->peer.peer_random); @@ -1396,7 +1347,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) } /* Should we send a SACK to update our peer? */ -static inline int sctp_peer_needs_update(struct sctp_association *asoc) +static inline bool sctp_peer_needs_update(struct sctp_association *asoc) { struct net *net = sock_net(asoc->base.sk); switch (asoc->state) { @@ -1408,12 +1359,12 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift), asoc->pathmtu))) - return 1; + return true; break; default: break; } - return 0; + return false; } /* Increase asoc's rwnd by len and send any window update SACK if needed. */ @@ -1493,7 +1444,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) /* If we've reached or overflowed our receive buffer, announce * a 0 rwnd if rwnd would still be positive. Store the - * the pottential pressure overflow so that the window can be restored + * the potential pressure overflow so that the window can be restored * back to original value. */ if (rx_count >= asoc->base.sk->sk_rcvbuf) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 46b5977..683c7d1 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -16,9 +16,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -42,7 +41,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { }, { .hmac_id = SCTP_AUTH_HMAC_ID_SHA1, - .hmac_name="hmac(sha1)", + .hmac_name = "hmac(sha1)", .hmac_len = SCTP_SHA1_SIG_SIZE, }, { @@ -52,7 +51,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { #if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, - .hmac_name="hmac(sha256)", + .hmac_name = "hmac(sha256)", .hmac_len = SCTP_SHA256_SIG_SIZE, } #endif @@ -164,7 +163,7 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, * lead-zero padded. If it is not, it * is automatically larger numerically. */ - for (i = 0; i < abs(diff); i++ ) { + for (i = 0; i < abs(diff); i++) { if (longer[i] != 0) return diff; } @@ -227,9 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector( gfp_t gfp) { return sctp_auth_make_key_vector( - (sctp_random_param_t*)asoc->c.auth_random, - (sctp_chunks_param_t*)asoc->c.auth_chunks, - (sctp_hmac_algo_param_t*)asoc->c.auth_hmacs, + (sctp_random_param_t *)asoc->c.auth_random, + (sctp_chunks_param_t *)asoc->c.auth_chunks, + (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, gfp); } @@ -500,8 +499,7 @@ void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[]) if (!auth_hmacs) return; - for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) - { + for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) { if (auth_hmacs[i]) crypto_free_hash(auth_hmacs[i]); } @@ -648,15 +646,15 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) */ for (i = 0; !found && i < len; i++) { switch (param->chunks[i]) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: - case SCTP_CID_SHUTDOWN_COMPLETE: - case SCTP_CID_AUTH: + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: break; - default: + default: if (param->chunks[i] == chunk) - found = 1; + found = 1; break; } } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 077bb07..871cdf9 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index f2044fc..158701d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -18,9 +18,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -255,7 +254,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ - for (i=0, len=first_len; i < whole; i++) { + for (i = 0, len = first_len; i < whole; i++) { frag = SCTP_DATA_MIDDLE_FRAG; if (0 == i) @@ -318,7 +317,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, goto errout; } - err = sctp_user_addto_chunk(chunk, offset, over,msgh->msg_iov); + err = sctp_user_addto_chunk(chunk, offset, over, msgh->msg_iov); /* Put the chunk->skb back into the form expected by send. */ __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr diff --git a/net/sctp/command.c b/net/sctp/command.c index 3d9a9ff6..dd73758 100644 --- a/net/sctp/command.c +++ b/net/sctp/command.c @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/debug.c b/net/sctp/debug.c index e89015d..95d7b15 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 09b8daa..6ffb6c1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/input.c b/net/sctp/input.c index 98b69bb..1f4eeb4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -120,7 +119,7 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_af *af; struct net *net = dev_net(skb->dev); - if (skb->pkt_type!=PACKET_HOST) + if (skb->pkt_type != PACKET_HOST) goto discard_it; SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); @@ -181,8 +180,7 @@ int sctp_rcv(struct sk_buff *skb) * If a frame arrives on an interface and the receiving socket is * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ - if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) - { + if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { if (asoc) { sctp_association_put(asoc); asoc = NULL; @@ -537,8 +535,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, return sk; out: - if (asoc) - sctp_association_put(asoc); + sctp_association_put(asoc); return NULL; } @@ -546,8 +543,7 @@ out: void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) { sctp_bh_unlock_sock(sk); - if (asoc) - sctp_association_put(asoc); + sctp_association_put(asoc); } /* @@ -613,8 +609,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, info); goto out_unlock; - } - else { + } else { if (ICMP_PROT_UNREACH == code) { sctp_icmp_proto_unreachable(sk, asoc, transport); @@ -1058,31 +1053,31 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ch_end > skb_tail_pointer(skb)) break; - switch(ch->type) { - case SCTP_CID_AUTH: - have_auth = chunk_num; - break; - - case SCTP_CID_COOKIE_ECHO: - /* If a packet arrives containing an AUTH chunk as - * a first chunk, a COOKIE-ECHO chunk as the second - * chunk, and possibly more chunks after them, and - * the receiver does not have an STCB for that - * packet, then authentication is based on - * the contents of the COOKIE- ECHO chunk. - */ - if (have_auth == 1 && chunk_num == 2) - return NULL; - break; - - case SCTP_CID_ASCONF: - if (have_auth || net->sctp.addip_noauth) - asoc = __sctp_rcv_asconf_lookup( - net, ch, laddr, - sctp_hdr(skb)->source, - transportp); - default: - break; + switch (ch->type) { + case SCTP_CID_AUTH: + have_auth = chunk_num; + break; + + case SCTP_CID_COOKIE_ECHO: + /* If a packet arrives containing an AUTH chunk as + * a first chunk, a COOKIE-ECHO chunk as the second + * chunk, and possibly more chunks after them, and + * the receiver does not have an STCB for that + * packet, then authentication is based on + * the contents of the COOKIE- ECHO chunk. + */ + if (have_auth == 1 && chunk_num == 2) + return NULL; + break; + + case SCTP_CID_ASCONF: + if (have_auth || net->sctp.addip_noauth) + asoc = __sctp_rcv_asconf_lookup( + net, ch, laddr, + sctp_hdr(skb)->source, + transportp); + default: + break; } if (asoc) @@ -1119,19 +1114,10 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ - switch (ch->type) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: + if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK) return __sctp_rcv_init_lookup(net, skb, laddr, transportp); - break; - default: - return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); - break; - } - - - return NULL; + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); } /* Lookup an association for an inbound skb. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 5856932..4de12af 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -24,9 +24,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7567e6f..0f6259a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -173,7 +172,8 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, switch (type) { case ICMPV6_PKT_TOOBIG: - sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info)); + if (ip6_sk_accept_pmtu(sk)) + sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info)); goto out_unlock; case ICMPV6_PARAMPROB: if (ICMPV6_UNK_NEXTHDR == code) { @@ -263,7 +263,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, } final_p = fl6_update_dst(fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (!asoc || saddr) goto out; @@ -322,7 +322,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, fl6->saddr = baddr->v6.sin6_addr; fl6->fl6_sport = baddr->v6.sin6_port; final_p = fl6_update_dst(fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); } out: @@ -402,7 +402,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } /* Initialize a sockaddr_storage from in incoming skb. */ -static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, +static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr) { __be16 *port; diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 647396b..40e7fac9 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -20,9 +20,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -98,7 +97,7 @@ static void sctp_objcnt_seq_stop(struct seq_file *seq, void *v) { } -static void * sctp_objcnt_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_objcnt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (*pos >= ARRAY_SIZE(sctp_dbg_objcnt)) ? NULL : (void *)pos; diff --git a/net/sctp/output.c b/net/sctp/output.c index e650978..0f4d15f 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -20,9 +20,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -281,7 +280,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { - case SCTP_CID_DATA: + case SCTP_CID_DATA: /* Account for the data being in the packet */ sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ @@ -293,17 +292,17 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, /* timestamp the chunk for rtx purposes */ chunk->sent_at = jiffies; break; - case SCTP_CID_COOKIE_ECHO: + case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; break; - case SCTP_CID_SACK: + case SCTP_CID_SACK: packet->has_sack = 1; if (chunk->asoc) chunk->asoc->stats.osacks++; break; - case SCTP_CID_AUTH: + case SCTP_CID_AUTH: packet->has_auth = 1; packet->auth = chunk; break; @@ -388,7 +387,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) int err = 0; int padding; /* How much padding do we need? */ __u8 has_data = 0; - struct dst_entry *dst = tp->dst; + struct dst_entry *dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ pr_debug("%s: packet:%p\n", __func__, packet); @@ -421,9 +420,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) } } dst = dst_clone(tp->dst); - skb_dst_set(nskb, dst); if (!dst) goto no_route; + skb_dst_set(nskb, dst); /* Build the SCTP header. */ sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); @@ -474,10 +473,11 @@ int sctp_packet_transmit(struct sctp_packet *packet) * for a given destination transport address. */ - if (!tp->rto_pending) { + if (!chunk->resent && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } + has_data = 1; } @@ -540,8 +540,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } else { /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (skb_transport_header(nskb) - - nskb->head); + nskb->csum_start = skb_transport_header(nskb) - nskb->head; nskb->csum_offset = offsetof(struct sctphdr, checksum); } } @@ -558,7 +557,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - (*tp->af_specific->ecn_capable)(nskb->sk); + tp->af_specific->ecn_capable(nskb->sk); /* Set up the IP options. */ /* BUG: not implemented @@ -580,7 +579,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned long timeout; /* Restart the AUTOCLOSE timer when sending data. */ - if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { + if (sctp_state(asoc, ESTABLISHED) && + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; @@ -592,7 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); nskb->local_df = packet->ipfragok; - (*tp->af_specific->sctp_xmit)(nskb, tp); + tp->af_specific->sctp_xmit(nskb, tp); out: sctp_packet_reset(packet); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 94df758..9c77947 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -111,7 +110,7 @@ static inline int sctp_cacc_skip_3_1_d(struct sctp_transport *primary, struct sctp_transport *transport, int count_of_newacks) { - if (count_of_newacks >=2 && transport != primary) + if (count_of_newacks >= 2 && transport != primary) return 1; return 0; } @@ -208,8 +207,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - - q->empty = 1; } /* Free the outqueue structure and any related pending chunks. @@ -332,7 +329,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); - q->empty = 0; break; } } else { @@ -446,6 +442,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } + chunk->resent = 1; + /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -469,7 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, struct net *net = sock_net(q->asoc->base.sk); int error = 0; - switch(reason) { + switch (reason) { case SCTP_RTXR_T3_RTX: SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); @@ -652,7 +650,6 @@ redo: if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; - q->empty = 0; q->asoc->stats.rtxchunks++; break; } @@ -1063,8 +1060,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_transport_reset_timers(transport); - q->empty = 0; - /* Only let one DATA chunk get bundled with a * COOKIE-ECHO chunk. */ @@ -1087,7 +1082,7 @@ sctp_flush_out: * * --xguo */ - while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL ) { + while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL) { struct sctp_transport *t = list_entry(ltransport, struct sctp_transport, send_ready); @@ -1216,7 +1211,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) * destinations for which cacc_saw_newack is set. */ if (transport->cacc.cacc_saw_newack) - count_of_newacks ++; + count_of_newacks++; } /* Move the Cumulative TSN Ack Point if appropriate. */ @@ -1273,29 +1268,17 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, asoc->adv_peer_ack_point); - /* See if all chunks are acked. - * Make sure the empty queue handler will get run later. - */ - q->empty = (list_empty(&q->out_chunk_list) && - list_empty(&q->retransmit)); - if (!q->empty) - goto finish; - - list_for_each_entry(transport, transport_list, transports) { - q->empty = q->empty && list_empty(&transport->transmitted); - if (!q->empty) - goto finish; - } - - pr_debug("%s: sack queue is empty\n", __func__); -finish: - return q->empty; + return sctp_outq_is_empty(q); } -/* Is the outqueue empty? */ +/* Is the outqueue empty? + * The queue is empty when we have not pending data, no in-flight data + * and nothing pending retransmissions. + */ int sctp_outq_is_empty(const struct sctp_outq *q) { - return q->empty; + return q->out_qlen == 0 && q->outstanding_bytes == 0 && + list_empty(&q->retransmit); } /******************************************************************** @@ -1375,6 +1358,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && + !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; @@ -1391,7 +1375,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; - *highest_new_tsn_in_sack = tsn; + if (TSN_lt(*highest_new_tsn_in_sack, tsn)) + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index ce1ffd8..ab8d9f9 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/probe.c b/net/sctp/probe.c index 53c452e..5e68b94 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -38,6 +38,7 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> +MODULE_SOFTDEP("pre: sctp"); MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>"); MODULE_DESCRIPTION("SCTP snooper"); MODULE_LICENSE("GPL"); @@ -182,6 +183,20 @@ static struct jprobe sctp_recv_probe = { .entry = jsctp_sf_eat_sack, }; +static __init int sctp_setup_jprobe(void) +{ + int ret = register_jprobe(&sctp_recv_probe); + + if (ret) { + if (request_module("sctp")) + goto out; + ret = register_jprobe(&sctp_recv_probe); + } + +out: + return ret; +} + static __init int sctpprobe_init(void) { int ret = -ENOMEM; @@ -202,7 +217,7 @@ static __init int sctpprobe_init(void) &sctpprobe_fops)) goto free_kfifo; - ret = register_jprobe(&sctp_recv_probe); + ret = sctp_setup_jprobe(); if (ret) goto remove_proc; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0c06421..63ba0bd 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -16,9 +16,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -178,7 +177,7 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa rcu_read_unlock(); } -static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) +static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= sctp_ep_hashsize) return NULL; @@ -197,7 +196,7 @@ static void sctp_eps_seq_stop(struct seq_file *seq, void *v) } -static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) { if (++*pos >= sctp_ep_hashsize) return NULL; @@ -283,7 +282,7 @@ void sctp_eps_proc_exit(struct net *net) } -static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= sctp_assoc_hashsize) return NULL; @@ -306,7 +305,7 @@ static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) } -static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) { if (++*pos >= sctp_assoc_hashsize) return NULL; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5e17092..34b7726 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -1066,8 +1065,8 @@ static struct sctp_af sctp_af_inet = { #endif }; -struct sctp_pf *sctp_get_pf_specific(sa_family_t family) { - +struct sctp_pf *sctp_get_pf_specific(sa_family_t family) +{ switch (family) { case PF_INET: return sctp_pf_inet_specific; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fe69032..e5f7cdb 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -1968,13 +1967,13 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { - case SCTP_CID_AUTH: - have_auth = 1; - break; - case SCTP_CID_ASCONF: - case SCTP_CID_ASCONF_ACK: - have_asconf = 1; - break; + case SCTP_CID_AUTH: + have_auth = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + have_asconf = 1; + break; } } @@ -2001,25 +2000,24 @@ static void sctp_process_ext_param(struct sctp_association *asoc, for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { - case SCTP_CID_FWD_TSN: - if (net->sctp.prsctp_enable && - !asoc->peer.prsctp_capable) + case SCTP_CID_FWD_TSN: + if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; - break; - case SCTP_CID_AUTH: - /* if the peer reports AUTH, assume that he - * supports AUTH. - */ - if (net->sctp.auth_enable) - asoc->peer.auth_capable = 1; - break; - case SCTP_CID_ASCONF: - case SCTP_CID_ASCONF_ACK: - if (net->sctp.addip_enable) - asoc->peer.asconf_capable = 1; - break; - default: - break; + break; + case SCTP_CID_AUTH: + /* if the peer reports AUTH, assume that he + * supports AUTH. + */ + if (net->sctp.auth_enable) + asoc->peer.auth_capable = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + if (net->sctp.addip_enable) + asoc->peer.asconf_capable = 1; + break; + default: + break; } } } @@ -2252,7 +2250,7 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, * VIOLATION error. We build the ERROR chunk here and let the normal * error handling code build and send the packet. */ - if (param.v != (void*)chunk->chunk_end) + if (param.v != (void *)chunk->chunk_end) return sctp_process_inv_paramlength(asoc, param.p, chunk, errp); /* The only missing mandatory param possible today is @@ -2267,14 +2265,14 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, result = sctp_verify_param(net, asoc, param, cid, chunk, errp); switch (result) { - case SCTP_IERROR_ABORT: - case SCTP_IERROR_NOMEM: - return 0; - case SCTP_IERROR_ERROR: - return 1; - case SCTP_IERROR_NO_ERROR: - default: - break; + case SCTP_IERROR_ABORT: + case SCTP_IERROR_NOMEM: + return 0; + case SCTP_IERROR_ERROR: + return 1; + case SCTP_IERROR_NO_ERROR: + default: + break; } } /* for (loop through all parameters) */ @@ -2309,7 +2307,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * added as the primary transport. The source address seems to * be a a better choice than any of the embedded addresses. */ - if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) + if (!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; if (sctp_cmp_addr_exact(sctp_source(chunk), peer_addr)) @@ -3335,7 +3333,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, while (asconf_ack_len > 0) { if (asconf_ack_param->crr_id == asconf_param->crr_id) { - switch(asconf_ack_param->param_hdr.type) { + switch (asconf_ack_param->param_hdr.type) { case SCTP_PARAM_SUCCESS_REPORT: return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1a6eef3..ded6db6 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -405,7 +404,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; struct net *net = sock_net(asoc->base.sk); - + sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { pr_debug("%s: sock is busy\n", __func__); @@ -544,7 +543,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands, { struct sctp_ulpevent *event; - event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC, + event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_CANT_STR_ASSOC, (__u16)error, 0, 0, NULL, GFP_ATOMIC); @@ -1116,7 +1115,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_init_cmd_seq(&commands); debug_pre_sfn(); - status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); + status = state_fn->fn(net, ep, asoc, subtype, event_arg, &commands); debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index dfe3f36..483dcd7 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -820,7 +819,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (new_asoc->autoclose) + if (new_asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -908,7 +907,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2946,7 +2945,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - error = sctp_eat_data(asoc, chunk, commands ); + error = sctp_eat_data(asoc, chunk, commands); switch (error) { case SCTP_IERROR_NO_ERROR: break; @@ -2970,7 +2969,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) force = SCTP_FORCE(); - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -3067,7 +3066,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - error = sctp_eat_data(asoc, chunk, commands ); + error = sctp_eat_data(asoc, chunk, commands); switch (error) { case SCTP_IERROR_NO_ERROR: case SCTP_IERROR_HIGH_TSN: @@ -3682,8 +3681,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); if (asoc->new_transport) { - sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, - commands); + sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, commands); ((struct sctp_association *)asoc)->new_transport = NULL; } @@ -3766,7 +3764,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, @@ -3800,7 +3798,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, /* We are going to ABORT, so we might as well stop * processing the rest of the chunks in the packet. */ - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, @@ -3878,7 +3876,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, SCTP_CHUNK(chunk)); /* Count this as receiving DATA. */ - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -4452,7 +4450,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The following chunk had invalid length:"; + static const char err_str[] = "The following chunk had invalid length:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); @@ -4515,7 +4513,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:"; + static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); @@ -4535,7 +4533,7 @@ static sctp_disposition_t sctp_sf_violation_chunk( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The following chunk violates protocol:"; + static const char err_str[] = "The following chunk violates protocol:"; if (!asoc) return sctp_sf_violation(net, ep, asoc, type, arg, commands); @@ -4611,7 +4609,7 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, sctp_cmd_seq_t *commands) { struct sctp_chunk *repl; - struct sctp_association* my_asoc; + struct sctp_association *my_asoc; /* The comment below says that we enter COOKIE-WAIT AFTER * sending the INIT, but that doesn't actually work in our @@ -5267,7 +5265,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5346,7 +5344,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -6001,7 +5999,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, /* Special case the INIT-ACK as there is no peer's vtag * yet. */ - switch(chunk->chunk_hdr->type) { + switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT_ACK: { sctp_initack_chunk_t *initack; @@ -6018,7 +6016,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, /* Special case the INIT and stale COOKIE_ECHO as there is no * vtag yet. */ - switch(chunk->chunk_hdr->type) { + switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT: { sctp_init_chunk_t *init; @@ -6208,7 +6206,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (*sk->sk_prot_creator->memory_pressure) { if (sctp_tsnmap_has_gap(map) && - (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { + (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { pr_debug("%s: under pressure, reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; @@ -6232,7 +6230,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* We are going to ABORT, so we might as well stop * processing the rest of the chunks in the packet. */ - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index c5999b2..a987d54 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -70,7 +69,7 @@ static const sctp_sm_table_entry_t bug = { if ((event_subtype._type > (_max))) { \ pr_warn("table %p possible attack: event %d exceeds max %d\n", \ _table, event_subtype._type, _max); \ - rtn = &bug; \ + rtn = &bug; \ } else \ rtn = &_table[event_subtype._type][(int)state]; \ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 72046b9..d32dae7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -28,9 +28,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -83,7 +82,7 @@ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, size_t msg_len); -static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); +static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); @@ -953,7 +952,7 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -static int sctp_setsockopt_bindx(struct sock* sk, +static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size, int op) { @@ -1040,7 +1039,7 @@ out: * Common routine for handling connect() and sctp_connectx(). * Connect will come in with just a single address. */ -static int __sctp_connect(struct sock* sk, +static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1300,7 +1299,7 @@ out_free: * * Returns >=0 if ok, <0 errno code on error. */ -static int __sctp_setsockopt_connectx(struct sock* sk, +static int __sctp_setsockopt_connectx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1338,7 +1337,7 @@ static int __sctp_setsockopt_connectx(struct sock* sk, * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ -static int sctp_setsockopt_connectx_old(struct sock* sk, +static int sctp_setsockopt_connectx_old(struct sock *sk, struct sockaddr __user *addrs, int addrs_size) { @@ -1351,7 +1350,7 @@ static int sctp_setsockopt_connectx_old(struct sock* sk, * indication to the call. Error is always negative and association id is * always positive. */ -static int sctp_setsockopt_connectx(struct sock* sk, +static int sctp_setsockopt_connectx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size) { @@ -1374,7 +1373,7 @@ static int sctp_setsockopt_connectx(struct sock* sk, * addrs_num structure member. That way we can re-use the existing * code. */ -static int sctp_getsockopt_connectx3(struct sock* sk, int len, +static int sctp_getsockopt_connectx3(struct sock *sk, int len, char __user *optval, int __user *optlen) { @@ -1569,7 +1568,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; - struct sctp_association *new_asoc=NULL, *asoc=NULL; + struct sctp_association *new_asoc = NULL, *asoc = NULL; struct sctp_transport *transport, *chunk_tp; struct sctp_chunk *chunk; union sctp_addr to; @@ -2196,6 +2195,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) @@ -2205,6 +2205,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + if (sp->autoclose > net->sctp.max_autoclose) + sp->autoclose = net->sctp.max_autoclose; + return 0; } @@ -2459,7 +2462,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, int hb_change, pmtud_change, sackdelay_change; if (optlen != sizeof(struct sctp_paddrparams)) - return - EINVAL; + return -EINVAL; if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2480,7 +2483,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) @@ -2575,8 +2578,11 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay == 0 && params.sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { - pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); - pr_warn("Use struct sctp_sack_info instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of struct sctp_assoc_value in delayed_ack socket option.\n" + "Use struct sctp_sack_info instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2585,7 +2591,7 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, else params.sack_freq = 0; } else - return - EINVAL; + return -EINVAL; /* Validate value parameter. */ if (params.sack_delay > 500) @@ -2811,6 +2817,8 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne { struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; + unsigned long rto_min, rto_max; + struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof (struct sctp_rtoinfo)) return -EINVAL; @@ -2824,26 +2832,36 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) return -EINVAL; + rto_max = rtoinfo.srto_max; + rto_min = rtoinfo.srto_min; + + if (rto_max) + rto_max = asoc ? msecs_to_jiffies(rto_max) : rto_max; + else + rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max; + + if (rto_min) + rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min; + else + rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min; + + if (rto_min > rto_max) + return -EINVAL; + if (asoc) { if (rtoinfo.srto_initial != 0) asoc->rto_initial = msecs_to_jiffies(rtoinfo.srto_initial); - if (rtoinfo.srto_max != 0) - asoc->rto_max = msecs_to_jiffies(rtoinfo.srto_max); - if (rtoinfo.srto_min != 0) - asoc->rto_min = msecs_to_jiffies(rtoinfo.srto_min); + asoc->rto_max = rto_max; + asoc->rto_min = rto_min; } else { /* If there is no association or the association-id = 0 * set the values to the endpoint. */ - struct sctp_sock *sp = sctp_sk(sk); - if (rtoinfo.srto_initial != 0) sp->rtoinfo.srto_initial = rtoinfo.srto_initial; - if (rtoinfo.srto_max != 0) - sp->rtoinfo.srto_max = rtoinfo.srto_max; - if (rtoinfo.srto_min != 0) - sp->rtoinfo.srto_min = rtoinfo.srto_min; + sp->rtoinfo.srto_max = rto_max; + sp->rtoinfo.srto_min = rto_min; } return 0; @@ -2979,8 +2997,11 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int val; if (optlen == sizeof(int)) { - pr_warn("Use of int in maxseg socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in maxseg socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; params.assoc_id = 0; @@ -3237,8 +3258,11 @@ static int sctp_setsockopt_maxburst(struct sock *sk, int assoc_id = 0; if (optlen == sizeof(int)) { - pr_warn("Use of int in max_burst socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in max_burst socket option deprecated.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; } else if (optlen == sizeof(struct sctp_assoc_value)) { @@ -3317,7 +3341,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, if (optlen < sizeof(struct sctp_hmacalgo)) return -EINVAL; - hmacs= memdup_user(optval, optlen); + hmacs = memdup_user(optval, optlen); if (IS_ERR(hmacs)) return PTR_ERR(hmacs); @@ -3355,7 +3379,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; - authkey= memdup_user(optval, optlen); + authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) return PTR_ERR(authkey); @@ -3909,7 +3933,7 @@ static int sctp_init_sock(struct sock *sk) */ sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; - sp->pathmtu = 0; // allow default discovery + sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | @@ -4452,7 +4476,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { @@ -4558,12 +4582,15 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { - pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); - pr_warn("Use struct sctp_sack_info instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of struct sctp_assoc_value in delayed_ack socket option.\n" + "Use struct sctp_sack_info instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else - return - EINVAL; + return -EINVAL; /* Get association, if sack_assoc_id != 0 and the socket is a one * to many style socket, and an association was not found, then @@ -4653,8 +4680,8 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, if (!asoc) return -EINVAL; - to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - offsetof(struct sctp_getaddrs,addrs); + to = optval + offsetof(struct sctp_getaddrs, addrs); + space_left = len - offsetof(struct sctp_getaddrs, addrs); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { @@ -4714,7 +4741,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, memcpy(to, &temp, addrlen); to += addrlen; - cnt ++; + cnt++; space_left -= addrlen; *bytes_copied += addrlen; } @@ -4763,8 +4790,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, bp = &asoc->base.bind_addr; } - to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - offsetof(struct sctp_getaddrs,addrs); + to = optval + offsetof(struct sctp_getaddrs, addrs); + space_left = len - offsetof(struct sctp_getaddrs, addrs); addrs = kmalloc(space_left, GFP_KERNEL); if (!addrs) @@ -4803,7 +4830,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, memcpy(buf, &temp, addrlen); buf += addrlen; bytes_copied += addrlen; - cnt ++; + cnt++; space_left -= addrlen; } @@ -5075,7 +5102,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life); list_for_each(pos, &asoc->peer.transport_addr_list) { - cnt ++; + cnt++; } assocparams.sasoc_number_peer_destinations = cnt; @@ -5203,8 +5230,11 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - pr_warn("Use of int in maxseg socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in maxseg socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5295,8 +5325,11 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - pr_warn("Use of int in max_burst socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in max_burst socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5428,7 +5461,8 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, return -EFAULT; num: len = sizeof(struct sctp_authchunks) + num_chunks; - if (put_user(len, optlen)) return -EFAULT; + if (put_user(len, optlen)) + return -EFAULT; if (put_user(num_chunks, &p->gauth_number_of_chunks)) return -EFAULT; return 0; @@ -5460,7 +5494,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, return -EINVAL; if (asoc) - ch = (struct sctp_chunks_param*)asoc->c.auth_chunks; + ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; else ch = sctp_sk(sk)->ep->auth_chunk_list; @@ -6402,7 +6436,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) * Note: This function is the same function as in core/datagram.c * with a few modifications to make lksctp work. */ -static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) +static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; DEFINE_WAIT(wait); diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 6007124..b9c8521 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -18,9 +18,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6b36561..7135e61 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -56,11 +55,16 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, - loff_t *ppos); + static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", @@ -102,17 +106,17 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_sctp_do_rto_min, .extra1 = &one, - .extra2 = &timer_max + .extra2 = &init_net.sctp.rto_max }, { .procname = "rto_max", .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .proc_handler = proc_sctp_do_rto_max, + .extra1 = &init_net.sctp.rto_min, .extra2 = &timer_max }, { @@ -294,8 +298,7 @@ static struct ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -342,6 +345,60 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, return ret; } +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_min; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_min = new_value; + } + return ret; +} + +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_max; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_max = new_value; + } + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; @@ -367,7 +424,7 @@ void sctp_sysctl_net_unregister(struct net *net) kfree(table); } -static struct ctl_table_header * sctp_sysctl_header; +static struct ctl_table_header *sctp_sysctl_header; /* Sysctl registration. */ void sctp_sysctl_register(void) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e332efb..d0810dc 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -24,9 +24,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -573,7 +572,7 @@ void sctp_transport_burst_limited(struct sctp_transport *t) u32 old_cwnd = t->cwnd; u32 max_burst_bytes; - if (t->burst_limited) + if (t->burst_limited || asoc->max_burst == 0) return; max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index fbda200..7635f9f 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 81089ed..85c6465 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 1c1484e..5dc9411 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -44,9 +43,9 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, +static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *); -static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, +static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *, struct sctp_ulpevent *); static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); @@ -108,7 +107,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, event = sctp_ulpq_reasm(ulpq, event); /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)){ + if ((event) && (event->msg_flags & MSG_EOR)) { /* Create a temporary list to collect chunks on. */ skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); @@ -337,7 +336,8 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, pos = f_frag->next; /* Get the last skb in the f_frag's frag_list if present. */ - for (last = list; list; last = list, list = list->next); + for (last = list; list; last = list, list = list->next) + ; /* Add the list of remaining fragments to the first fragments * frag_list. @@ -727,7 +727,7 @@ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq) while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) { /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)){ + if ((event) && (event->msg_flags & MSG_EOR)) { skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); diff --git a/net/socket.c b/net/socket.c index 0b18693..879933a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1445,48 +1445,61 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, err = fd1; goto out_release_both; } + fd2 = get_unused_fd_flags(flags); if (unlikely(fd2 < 0)) { err = fd2; - put_unused_fd(fd1); - goto out_release_both; + goto out_put_unused_1; } newfile1 = sock_alloc_file(sock1, flags, NULL); if (unlikely(IS_ERR(newfile1))) { err = PTR_ERR(newfile1); - put_unused_fd(fd1); - put_unused_fd(fd2); - goto out_release_both; + goto out_put_unused_both; } newfile2 = sock_alloc_file(sock2, flags, NULL); if (IS_ERR(newfile2)) { err = PTR_ERR(newfile2); - fput(newfile1); - put_unused_fd(fd1); - put_unused_fd(fd2); - sock_release(sock2); - goto out; + goto out_fput_1; } + err = put_user(fd1, &usockvec[0]); + if (err) + goto out_fput_both; + + err = put_user(fd2, &usockvec[1]); + if (err) + goto out_fput_both; + audit_fd_pair(fd1, fd2); + fd_install(fd1, newfile1); fd_install(fd2, newfile2); /* fd1 and fd2 may be already another descriptors. * Not kernel problem. */ - err = put_user(fd1, &usockvec[0]); - if (!err) - err = put_user(fd2, &usockvec[1]); - if (!err) - return 0; + return 0; - sys_close(fd2); - sys_close(fd1); - return err; +out_fput_both: + fput(newfile2); + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + goto out; +out_fput_1: + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + sock_release(sock2); + goto out; + +out_put_unused_both: + put_unused_fd(fd2); +out_put_unused_1: + put_unused_fd(fd1); out_release_both: sock_release(sock2); out_release_1: @@ -1973,7 +1986,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; } @@ -2968,11 +2981,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *ifr32) { struct ifreq kifr; - struct ifreq __user *uifr; mm_segment_t old_fs; int err; - u32 data; - void __user *datap; switch (cmd) { case SIOCBONDENSLAVE: @@ -2989,26 +2999,13 @@ static int bond_ioctl(struct net *net, unsigned int cmd, set_fs(old_fs); return err; - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) - return -EFAULT; - - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; - - datap = compat_ptr(data); - if (put_user(datap, &uifr->ifr_ifru.ifru_data)) - return -EFAULT; - - return dev_ioctl(net, cmd, uifr); default: return -ENOIOCTLCMD; } } -static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, +/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ +static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { struct ifreq __user *u_ifreq64; @@ -3019,19 +3016,16 @@ static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), IFNAMSIZ)) return -EFAULT; - if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) return -EFAULT; data64 = compat_ptr(data32); u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - /* Don't check these user accesses, just let that get trapped - * in the ioctl handler instead. - */ if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ)) return -EFAULT; - if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) + if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) return -EFAULT; return dev_ioctl(net, cmd, u_ifreq64); @@ -3111,27 +3105,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return err; } -static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) -{ - void __user *uptr; - compat_uptr_t uptr32; - struct ifreq __user *uifr; - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - if (get_user(uptr32, &uifr32->ifr_data)) - return -EFAULT; - - uptr = compat_ptr(uptr32); - - if (put_user(uptr, &uifr->ifr_data)) - return -EFAULT; - - return dev_ioctl(net, SIOCSHWTSTAMP, uifr); -} - struct rtentry32 { u32 rt_pad1; struct sockaddr rt_dst; /* target address */ @@ -3243,7 +3216,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, struct net *net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - return siocdevprivate_ioctl(net, cmd, argp); + return compat_ifr_data_ioctl(net, cmd, argp); switch (cmd) { case SIOCSIFBR: @@ -3263,8 +3236,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: case SIOCBONDCHANGEACTIVE: return bond_ioctl(net, cmd, argp); case SIOCADDRT: @@ -3274,8 +3245,11 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return do_siocgstamp(net, sock, cmd, argp); case SIOCGSTAMPNS: return do_siocgstampns(net, sock, cmd, argp); + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: - return compat_siocshwtstamp(net, argp); + case SIOCGHWTSTAMP: + return compat_ifr_data_ioctl(net, cmd, argp); case FIOSETOWN: case SIOCSPGRP: diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 97912b4..42fdfc6 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1517,7 +1517,7 @@ out: static int gss_refresh_null(struct rpc_task *task) { - return -EACCES; + return 0; } static __be32 * diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 0d44025..4c2a80b 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -621,12 +621,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (!p) break; /* No more bearers to try */ - if (tipc_bearer_blocked(p)) { - if (!s || tipc_bearer_blocked(s)) - continue; /* Can't use either bearer */ - b = s; - } - tipc_nmap_diff(&bcbearer->remains, &b->nodes, &bcbearer->remains_new); if (bcbearer->remains_new.count == bcbearer->remains.count) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3f9707a..07ed5cc 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1,8 +1,8 @@ /* * net/tipc/bearer.c: TIPC bearer code * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, 2010-2011, Wind River Systems + * Copyright (c) 1996-2006, 2013, Ericsson AB + * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,8 +41,13 @@ #define MAX_ADDR_STR 60 -static struct tipc_media *media_list[MAX_MEDIA]; -static u32 media_count; +static struct tipc_media * const media_info_array[] = { + ð_media_info, +#ifdef CONFIG_TIPC_MEDIA_IB + &ib_media_info, +#endif + NULL +}; struct tipc_bearer tipc_bearers[MAX_BEARERS]; @@ -55,11 +60,11 @@ struct tipc_media *tipc_media_find(const char *name) { u32 i; - for (i = 0; i < media_count; i++) { - if (!strcmp(media_list[i]->name, name)) - return media_list[i]; + for (i = 0; media_info_array[i] != NULL; i++) { + if (!strcmp(media_info_array[i]->name, name)) + break; } - return NULL; + return media_info_array[i]; } /** @@ -69,44 +74,11 @@ static struct tipc_media *media_find_id(u8 type) { u32 i; - for (i = 0; i < media_count; i++) { - if (media_list[i]->type_id == type) - return media_list[i]; + for (i = 0; media_info_array[i] != NULL; i++) { + if (media_info_array[i]->type_id == type) + break; } - return NULL; -} - -/** - * tipc_register_media - register a media type - * - * Bearers for this media type must be activated separately at a later stage. - */ -int tipc_register_media(struct tipc_media *m_ptr) -{ - int res = -EINVAL; - - write_lock_bh(&tipc_net_lock); - - if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) - goto exit; - if (m_ptr->priority > TIPC_MAX_LINK_PRI) - goto exit; - if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || - (m_ptr->tolerance > TIPC_MAX_LINK_TOL)) - goto exit; - if (media_count >= MAX_MEDIA) - goto exit; - if (tipc_media_find(m_ptr->name) || media_find_id(m_ptr->type_id)) - goto exit; - - media_list[media_count] = m_ptr; - media_count++; - res = 0; -exit: - write_unlock_bh(&tipc_net_lock); - if (res) - pr_warn("Media <%s> registration error\n", m_ptr->name); - return res; + return media_info_array[i]; } /** @@ -144,13 +116,11 @@ struct sk_buff *tipc_media_get_names(void) if (!buf) return NULL; - read_lock_bh(&tipc_net_lock); - for (i = 0; i < media_count; i++) { + for (i = 0; media_info_array[i] != NULL; i++) { tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, - media_list[i]->name, - strlen(media_list[i]->name) + 1); + media_info_array[i]->name, + strlen(media_info_array[i]->name) + 1); } - read_unlock_bh(&tipc_net_lock); return buf; } @@ -215,31 +185,12 @@ struct tipc_bearer *tipc_bearer_find(const char *name) } /** - * tipc_bearer_find_interface - locates bearer object with matching interface name - */ -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name) -{ - struct tipc_bearer *b_ptr; - char *b_if_name; - u32 i; - - for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (!b_ptr->active) - continue; - b_if_name = strchr(b_ptr->name, ':') + 1; - if (!strcmp(b_if_name, if_name)) - return b_ptr; - } - return NULL; -} - -/** * tipc_bearer_get_names - record names of bearers in buffer */ struct sk_buff *tipc_bearer_get_names(void) { struct sk_buff *buf; - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; int i, j; buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); @@ -247,13 +198,13 @@ struct sk_buff *tipc_bearer_get_names(void) return NULL; read_lock_bh(&tipc_net_lock); - for (i = 0; i < media_count; i++) { + for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b_ptr = &tipc_bearers[j]; - if (b_ptr->active && (b_ptr->media == media_list[i])) { + b = &tipc_bearers[j]; + if (b->active && (b->media == media_info_array[i])) { tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, - b_ptr->name, - strlen(b_ptr->name) + 1); + b->name, + strlen(b->name) + 1); } } } @@ -275,31 +226,6 @@ void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) tipc_disc_remove_dest(b_ptr->link_req); } -/* - * Interrupt enabling new requests after bearer blocking: - * See bearer_send(). - */ -void tipc_continue(struct tipc_bearer *b) -{ - spin_lock_bh(&b->lock); - b->blocked = 0; - spin_unlock_bh(&b->lock); -} - -/* - * tipc_bearer_blocked - determines if bearer is currently blocked - */ -int tipc_bearer_blocked(struct tipc_bearer *b) -{ - int res; - - spin_lock_bh(&b->lock); - res = b->blocked; - spin_unlock_bh(&b->lock); - - return res; -} - /** * tipc_enable_bearer - enable bearer with the given name */ @@ -387,6 +313,7 @@ restart: b_ptr = &tipc_bearers[bearer_id]; strcpy(b_ptr->name, name); + b_ptr->media = m_ptr; res = m_ptr->enable_media(b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", @@ -395,7 +322,6 @@ restart: } b_ptr->identity = bearer_id; - b_ptr->media = m_ptr; b_ptr->tolerance = m_ptr->tolerance; b_ptr->window = m_ptr->window; b_ptr->net_plane = bearer_id + 'A'; @@ -420,17 +346,16 @@ exit: } /** - * tipc_block_bearer - Block the bearer, and reset all its links + * tipc_reset_bearer - Reset all links established over this bearer */ -int tipc_block_bearer(struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { struct tipc_link *l_ptr; struct tipc_link *temp_l_ptr; read_lock_bh(&tipc_net_lock); - pr_info("Blocking bearer <%s>\n", b_ptr->name); + pr_info("Resetting bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); - b_ptr->blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { struct tipc_node *n_ptr = l_ptr->owner; @@ -456,7 +381,6 @@ static void bearer_disable(struct tipc_bearer *b_ptr) pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); - b_ptr->blocked = 1; b_ptr->media->disable_media(b_ptr); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); @@ -490,6 +414,211 @@ int tipc_disable_bearer(const char *name) } +/* tipc_l2_media_addr_set - initialize Ethernet media address structure + * + * Media-dependent "value" field stores MAC address in first 6 bytes + * and zeroes out the remaining bytes. + */ +void tipc_l2_media_addr_set(const struct tipc_bearer *b, + struct tipc_media_addr *a, char *mac) +{ + int len = b->media->hwaddr_len; + + if (unlikely(sizeof(a->value) < len)) { + WARN_ONCE(1, "Media length invalid\n"); + return; + } + + memcpy(a->value, mac, len); + memset(a->value + len, 0, sizeof(a->value) - len); + a->media_id = b->media->type_id; + a->broadcast = !memcmp(mac, b->bcast_addr.value, len); +} + +int tipc_enable_l2_media(struct tipc_bearer *b) +{ + struct net_device *dev; + char *driver_name = strchr((const char *)b->name, ':') + 1; + + /* Find device with specified name */ + dev = dev_get_by_name(&init_net, driver_name); + if (!dev) + return -ENODEV; + + /* Associate TIPC bearer with Ethernet bearer */ + b->media_ptr = dev; + memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value)); + memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); + b->bcast_addr.media_id = b->media->type_id; + b->bcast_addr.broadcast = 1; + b->mtu = dev->mtu; + tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr); + rcu_assign_pointer(dev->tipc_ptr, b); + return 0; +} + +/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface + * + * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, + * then get worker thread to complete bearer cleanup. (Can't do cleanup + * here because cleanup code needs to sleep and caller holds spinlocks.) + */ +void tipc_disable_l2_media(struct tipc_bearer *b) +{ + struct net_device *dev = (struct net_device *)b->media_ptr; + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + dev_put(dev); +} + +/** + * tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface + * @buf: the packet to be sent + * @b_ptr: the bearer throught which the packet is to be sent + * @dest: peer destination address + */ +int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, + struct tipc_media_addr *dest) +{ + struct sk_buff *clone; + int delta; + struct net_device *dev = (struct net_device *)b->media_ptr; + + clone = skb_clone(buf, GFP_ATOMIC); + if (!clone) + return 0; + + delta = dev->hard_header_len - skb_headroom(buf); + if ((delta > 0) && + pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(clone); + return 0; + } + + skb_reset_network_header(clone); + clone->dev = dev; + clone->protocol = htons(ETH_P_TIPC); + dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, clone->len); + dev_queue_xmit(clone); + return 0; +} + +/* tipc_bearer_send- sends buffer to destination over bearer + * + * IMPORTANT: + * The media send routine must not alter the buffer being passed in + * as it may be needed for later retransmission! + */ +void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, + struct tipc_media_addr *dest) +{ + b->media->send_msg(buf, b, dest); +} + +/** + * tipc_l2_rcv_msg - handle incoming TIPC message from an interface + * @buf: the received packet + * @dev: the net device that the packet was received on + * @pt: the packet_type structure which was used to register this handler + * @orig_dev: the original receive net device in case the device is a bond + * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using interface multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). + */ +static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct tipc_bearer *b_ptr; + + if (!net_eq(dev_net(dev), &init_net)) { + kfree_skb(buf); + return NET_RX_DROP; + } + + rcu_read_lock(); + b_ptr = rcu_dereference(dev->tipc_ptr); + if (likely(b_ptr)) { + if (likely(buf->pkt_type <= PACKET_BROADCAST)) { + buf->next = NULL; + tipc_recv_msg(buf, b_ptr); + rcu_read_unlock(); + return NET_RX_SUCCESS; + } + } + rcu_read_unlock(); + + kfree_skb(buf); + return NET_RX_DROP; +} + +/** + * tipc_l2_device_event - handle device events from network device + * @nb: the context of the notification + * @evt: the type of event + * @ptr: the net device that the event was on + * + * This function is called by the Ethernet driver in case of link + * change event. + */ +static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, + void *ptr) +{ + struct tipc_bearer *b_ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + rcu_read_lock(); + b_ptr = rcu_dereference(dev->tipc_ptr); + if (!b_ptr) { + rcu_read_unlock(); + return NOTIFY_DONE; + } + + b_ptr->mtu = dev->mtu; + + switch (evt) { + case NETDEV_CHANGE: + if (netif_carrier_ok(dev)) + break; + case NETDEV_DOWN: + case NETDEV_CHANGEMTU: + case NETDEV_CHANGEADDR: + tipc_reset_bearer(b_ptr); + break; + case NETDEV_UNREGISTER: + case NETDEV_CHANGENAME: + tipc_disable_bearer(b_ptr->name); + break; + } + rcu_read_unlock(); + + return NOTIFY_OK; +} + +static struct packet_type tipc_packet_type __read_mostly = { + .type = __constant_htons(ETH_P_TIPC), + .func = tipc_l2_rcv_msg, +}; + +static struct notifier_block notifier = { + .notifier_call = tipc_l2_device_event, + .priority = 0, +}; + +int tipc_bearer_setup(void) +{ + dev_add_pack(&tipc_packet_type); + return register_netdevice_notifier(¬ifier); +} + +void tipc_bearer_cleanup(void) +{ + unregister_netdevice_notifier(¬ifier); + dev_remove_pack(&tipc_packet_type); +} void tipc_bearer_stop(void) { @@ -499,5 +628,4 @@ void tipc_bearer_stop(void) if (tipc_bearers[i].active) bearer_disable(&tipc_bearers[i]); } - media_count = 0; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index e5e04be..410efb1 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -1,7 +1,7 @@ /* * net/tipc/bearer.h: Include file for TIPC bearer code * - * Copyright (c) 1996-2006, Ericsson AB + * Copyright (c) 1996-2006, 2013, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -73,18 +73,18 @@ struct tipc_media_addr { struct tipc_bearer; /** - * struct tipc_media - TIPC media information available to internal users + * struct tipc_media - Media specific info exposed to generic bearer layer * @send_msg: routine which handles buffer transmission * @enable_media: routine which enables a media * @disable_media: routine which disables a media * @addr2str: routine which converts media address to string * @addr2msg: routine which converts media address to protocol message area * @msg2addr: routine which converts media address from protocol message area - * @bcast_addr: media address used in broadcasting * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion * @type_id: TIPC media identifier + * @hwaddr_len: TIPC media address len * @name: media name */ struct tipc_media { @@ -101,18 +101,20 @@ struct tipc_media { u32 tolerance; u32 window; u32 type_id; + u32 hwaddr_len; char name[TIPC_MAX_MEDIA_NAME]; }; /** - * struct tipc_bearer - TIPC bearer structure + * struct tipc_bearer - Generic TIPC bearer structure + * @dev: ptr to associated network device * @usr_handle: pointer to additional media-specific information about bearer * @mtu: max packet size bearer can support - * @blocked: non-zero if bearer is blocked * @lock: spinlock for controlling access to bearer * @addr: media-specific address associated with bearer * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer + * @bcast_addr: media address used in broadcasting * @priority: default link priority for bearer * @window: default window size for bearer * @tolerance: default link tolerance for bearer @@ -128,9 +130,8 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void *usr_handle; /* initalized by media */ + void *media_ptr; /* initalized by media */ u32 mtu; /* initalized by media */ - int blocked; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; spinlock_t lock; @@ -159,55 +160,41 @@ extern struct tipc_bearer tipc_bearers[]; /* * TIPC routines available to supported media types */ -int tipc_register_media(struct tipc_media *m_ptr); void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); -int tipc_block_bearer(struct tipc_bearer *b_ptr); -void tipc_continue(struct tipc_bearer *tb_ptr); - int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); int tipc_disable_bearer(const char *name); /* * Routines made available to TIPC by supported media types */ -int tipc_eth_media_start(void); -void tipc_eth_media_stop(void); +extern struct tipc_media eth_media_info; #ifdef CONFIG_TIPC_MEDIA_IB -int tipc_ib_media_start(void); -void tipc_ib_media_stop(void); -#else -static inline int tipc_ib_media_start(void) { return 0; } -static inline void tipc_ib_media_stop(void) { return; } +extern struct tipc_media ib_media_info; #endif int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); +void tipc_l2_media_addr_set(const struct tipc_bearer *b, + struct tipc_media_addr *a, char *mac); +int tipc_enable_l2_media(struct tipc_bearer *b); +void tipc_disable_l2_media(struct tipc_bearer *b); +int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, + struct tipc_media_addr *dest); struct sk_buff *tipc_bearer_get_names(void); void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); struct tipc_bearer *tipc_bearer_find(const char *name); -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name); struct tipc_media *tipc_media_find(const char *name); -int tipc_bearer_blocked(struct tipc_bearer *b_ptr); +int tipc_bearer_setup(void); +void tipc_bearer_cleanup(void); void tipc_bearer_stop(void); - -/** - * tipc_bearer_send- sends buffer to destination over bearer - * - * IMPORTANT: - * The media send routine must not alter the buffer being passed in - * as it may be needed for later retransmission! - */ -static inline void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, - struct tipc_media_addr *dest) -{ - b->media->send_msg(buf, b, dest); -} +void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, + struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/core.c b/net/tipc/core.c index fd4eeea..f9e88d8 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -82,8 +82,7 @@ struct sk_buff *tipc_buf_acquire(u32 size) static void tipc_core_stop_net(void) { tipc_net_stop(); - tipc_eth_media_stop(); - tipc_ib_media_stop(); + tipc_bearer_cleanup(); } /** @@ -94,10 +93,7 @@ int tipc_core_start_net(unsigned long addr) int res; tipc_net_start(addr); - res = tipc_eth_media_start(); - if (res < 0) - goto err; - res = tipc_ib_media_start(); + res = tipc_bearer_setup(); if (res < 0) goto err; return res; @@ -113,7 +109,6 @@ err: static void tipc_core_stop(void) { tipc_netlink_stop(); - tipc_handler_stop(); tipc_cfg_stop(); tipc_subscr_stop(); tipc_nametbl_stop(); @@ -146,9 +141,10 @@ static int tipc_core_start(void) res = tipc_subscr_start(); if (!res) res = tipc_cfg_init(); - if (res) + if (res) { + tipc_handler_stop(); tipc_core_stop(); - + } return res; } @@ -178,6 +174,7 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { + tipc_handler_stop(); tipc_core_stop_net(); tipc_core_stop(); pr_info("Deactivated\n"); diff --git a/net/tipc/core.h b/net/tipc/core.h index 94895d4..1ff477b 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -47,7 +47,7 @@ #include <linux/mm.h> #include <linux/timer.h> #include <linux/string.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/interrupt.h> #include <linux/atomic.h> #include <asm/hardirq.h> diff --git a/net/tipc/discover.c b/net/tipc/discover.c index ecc758c..bc849f1 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -239,7 +239,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) /* Accept discovery message & send response, if necessary */ link_fully_up = link_working_working(link); - if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) { + if ((type == DSC_REQ_MSG) && !link_fully_up) { rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); if (rbuf) { tipc_bearer_send(b_ptr, rbuf, &media_addr); @@ -288,16 +288,6 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) } /** - * disc_send_msg - send link setup request message - * @req: ptr to link request structure - */ -static void disc_send_msg(struct tipc_link_req *req) -{ - if (!req->bearer->blocked) - tipc_bearer_send(req->bearer, req->buf, &req->dest); -} - -/** * disc_timeout - send a periodic link setup request * @req: ptr to link request structure * @@ -322,7 +312,8 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - disc_send_msg(req); + tipc_bearer_send(req->bearer, req->buf, &req->dest); + req->timer_intv *= 2; if (req->num_nodes) @@ -368,7 +359,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; - disc_send_msg(req); + tipc_bearer_send(req->bearer, req->buf, &req->dest); return 0; } diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index f80d59f..67cf3f9 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,7 +1,7 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2001-2007, 2013, Ericsson AB * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * @@ -37,259 +37,11 @@ #include "core.h" #include "bearer.h" -#define MAX_ETH_MEDIA MAX_BEARERS - #define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ -/** - * struct eth_media - Ethernet bearer data structure - * @bearer: ptr to associated "generic" bearer structure - * @dev: ptr to associated Ethernet network device - * @tipc_packet_type: used in binding TIPC to Ethernet driver - * @setup: work item used when enabling bearer - * @cleanup: work item used when disabling bearer - */ -struct eth_media { - struct tipc_bearer *bearer; - struct net_device *dev; - struct packet_type tipc_packet_type; - struct work_struct setup; - struct work_struct cleanup; -}; - -static struct tipc_media eth_media_info; -static struct eth_media eth_media_array[MAX_ETH_MEDIA]; -static int eth_started; - -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv); -/* - * Network device notifier info - */ -static struct notifier_block notifier = { - .notifier_call = recv_notification, - .priority = 0 -}; - -/** - * eth_media_addr_set - initialize Ethernet media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -static void eth_media_addr_set(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *mac) -{ - memcpy(a->value, mac, ETH_ALEN); - memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN); - a->media_id = TIPC_MEDIA_TYPE_ETH; - a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN); -} - -/** - * send_msg - send a TIPC message out over an Ethernet interface - */ -static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, - struct tipc_media_addr *dest) -{ - struct sk_buff *clone; - struct net_device *dev; - int delta; - - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - dev = ((struct eth_media *)(tb_ptr->usr_handle))->dev; - delta = dev->hard_header_len - skb_headroom(buf); - - if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); - return 0; - } - - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); - return 0; -} - -/** - * recv_msg - handle incoming TIPC message from an Ethernet interface - * - * Accept only packets explicitly sent to this node, or broadcast packets; - * ignores packets sent using Ethernet multicast, and traffic sent to other - * nodes (which can happen if interface is running in promiscuous mode). - */ -static int recv_msg(struct sk_buff *buf, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct eth_media *eb_ptr = (struct eth_media *)pt->af_packet_priv; - - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return NET_RX_DROP; - } - - if (likely(eb_ptr->bearer)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_recv_msg(buf, eb_ptr->bearer); - return NET_RX_SUCCESS; - } - } - kfree_skb(buf); - return NET_RX_DROP; -} - -/** - * setup_media - setup association between Ethernet bearer and interface - */ -static void setup_media(struct work_struct *work) -{ - struct eth_media *eb_ptr = - container_of(work, struct eth_media, setup); - - dev_add_pack(&eb_ptr->tipc_packet_type); -} - -/** - * enable_media - attach TIPC bearer to an Ethernet interface - */ -static int enable_media(struct tipc_bearer *tb_ptr) -{ - struct net_device *dev; - struct eth_media *eb_ptr = ð_media_array[0]; - struct eth_media *stop = ð_media_array[MAX_ETH_MEDIA]; - char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; - int pending_dev = 0; - - /* Find unused Ethernet bearer structure */ - while (eb_ptr->dev) { - if (!eb_ptr->bearer) - pending_dev++; - if (++eb_ptr == stop) - return pending_dev ? -EAGAIN : -EDQUOT; - } - - /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); - if (!dev) - return -ENODEV; - - /* Create Ethernet bearer for device */ - eb_ptr->dev = dev; - eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - eb_ptr->tipc_packet_type.dev = dev; - eb_ptr->tipc_packet_type.func = recv_msg; - eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; - INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - INIT_WORK(&eb_ptr->setup, setup_media); - schedule_work(&eb_ptr->setup); - - /* Associate TIPC bearer with Ethernet bearer */ - eb_ptr->bearer = tb_ptr; - tb_ptr->usr_handle = (void *)eb_ptr; - memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); - memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN); - tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH; - tb_ptr->bcast_addr.broadcast = 1; - tb_ptr->mtu = dev->mtu; - tb_ptr->blocked = 0; - eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); - return 0; -} - -/** - * cleanup_media - break association between Ethernet bearer and interface - * - * This routine must be invoked from a work queue because it can sleep. - */ -static void cleanup_media(struct work_struct *work) -{ - struct eth_media *eb_ptr = - container_of(work, struct eth_media, cleanup); - - dev_remove_pack(&eb_ptr->tipc_packet_type); - dev_put(eb_ptr->dev); - eb_ptr->dev = NULL; -} - -/** - * disable_media - detach TIPC bearer from an Ethernet interface - * - * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, - * then get worker thread to complete bearer cleanup. (Can't do cleanup - * here because cleanup code needs to sleep and caller holds spinlocks.) - */ -static void disable_media(struct tipc_bearer *tb_ptr) -{ - struct eth_media *eb_ptr = (struct eth_media *)tb_ptr->usr_handle; - - eb_ptr->bearer = NULL; - INIT_WORK(&eb_ptr->cleanup, cleanup_media); - schedule_work(&eb_ptr->cleanup); -} - -/** - * recv_notification - handle device updates from OS - * - * Change the state of the Ethernet bearer (if any) associated with the - * specified device. - */ -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct eth_media *eb_ptr = ð_media_array[0]; - struct eth_media *stop = ð_media_array[MAX_ETH_MEDIA]; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - while ((eb_ptr->dev != dev)) { - if (++eb_ptr == stop) - return NOTIFY_DONE; /* couldn't find device */ - } - if (!eb_ptr->bearer) - return NOTIFY_DONE; /* bearer had been disabled */ - - eb_ptr->bearer->mtu = dev->mtu; - - switch (evt) { - case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) - tipc_continue(eb_ptr->bearer); - else - tipc_block_bearer(eb_ptr->bearer); - break; - case NETDEV_UP: - tipc_continue(eb_ptr->bearer); - break; - case NETDEV_DOWN: - tipc_block_bearer(eb_ptr->bearer); - break; - case NETDEV_CHANGEMTU: - case NETDEV_CHANGEADDR: - tipc_block_bearer(eb_ptr->bearer); - tipc_continue(eb_ptr->bearer); - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGENAME: - tipc_disable_bearer(eb_ptr->bearer->name); - break; - } - return NOTIFY_OK; -} - -/** - * eth_addr2str - convert Ethernet address to string - */ -static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +/* convert Ethernet address to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *a, char *str_buf, + int str_size) { if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ return 1; @@ -298,10 +50,8 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) return 0; } -/** - * eth_str2addr - convert Ethernet address format to message header format - */ -static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* convert Ethernet address format to message header format */ +static int tipc_eth_addr2msg(struct tipc_media_addr *a, char *msg_area) { memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; @@ -309,68 +59,30 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) return 0; } -/** - * eth_str2addr - convert message header address format to Ethernet format - */ -static int eth_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* convert message header address format to Ethernet format */ +static int tipc_eth_msg2addr(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *msg_area) { if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) return 1; - eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); + tipc_l2_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); return 0; } -/* - * Ethernet media registration info - */ -static struct tipc_media eth_media_info = { - .send_msg = send_msg, - .enable_media = enable_media, - .disable_media = disable_media, - .addr2str = eth_addr2str, - .addr2msg = eth_addr2msg, - .msg2addr = eth_msg2addr, +/* Ethernet media registration info */ +struct tipc_media eth_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_eth_addr2str, + .addr2msg = tipc_eth_addr2msg, + .msg2addr = tipc_eth_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_ETH, + .hwaddr_len = ETH_ALEN, .name = "eth" }; -/** - * tipc_eth_media_start - activate Ethernet bearer support - * - * Register Ethernet media type with TIPC bearer code. Also register - * with OS for notifications about device state changes. - */ -int tipc_eth_media_start(void) -{ - int res; - - if (eth_started) - return -EINVAL; - - res = tipc_register_media(ð_media_info); - if (res) - return res; - - res = register_netdevice_notifier(¬ifier); - if (!res) - eth_started = 1; - return res; -} - -/** - * tipc_eth_media_stop - deactivate Ethernet bearer support - */ -void tipc_eth_media_stop(void) -{ - if (!eth_started) - return; - - flush_scheduled_work(); - unregister_netdevice_notifier(¬ifier); - eth_started = 0; -} diff --git a/net/tipc/handler.c b/net/tipc/handler.c index b36f0fc..e4bc8a2 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -56,12 +56,13 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) { struct queue_item *item; + spin_lock_bh(&qitem_lock); if (!handler_enabled) { pr_err("Signal request ignored by handler\n"); + spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } - spin_lock_bh(&qitem_lock); item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); if (!item) { pr_err("Signal queue out of memory\n"); @@ -112,10 +113,14 @@ void tipc_handler_stop(void) struct list_head *l, *n; struct queue_item *item; - if (!handler_enabled) + spin_lock_bh(&qitem_lock); + if (!handler_enabled) { + spin_unlock_bh(&qitem_lock); return; - + } handler_enabled = 0; + spin_unlock_bh(&qitem_lock); + tasklet_kill(&tipc_tasklet); spin_lock_bh(&qitem_lock); diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index c139892..844a77e 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,252 +42,9 @@ #include "core.h" #include "bearer.h" -#define MAX_IB_MEDIA MAX_BEARERS - -/** - * struct ib_media - Infiniband media data structure - * @bearer: ptr to associated "generic" bearer structure - * @dev: ptr to associated Infiniband network device - * @tipc_packet_type: used in binding TIPC to Infiniband driver - * @cleanup: work item used when disabling bearer - */ - -struct ib_media { - struct tipc_bearer *bearer; - struct net_device *dev; - struct packet_type tipc_packet_type; - struct work_struct setup; - struct work_struct cleanup; -}; - -static struct tipc_media ib_media_info; -static struct ib_media ib_media_array[MAX_IB_MEDIA]; -static int ib_started; - -/** - * ib_media_addr_set - initialize Infiniband media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -static void ib_media_addr_set(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *mac) -{ - BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN); - memcpy(a->value, mac, INFINIBAND_ALEN); - a->media_id = TIPC_MEDIA_TYPE_IB; - a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN); -} - -/** - * send_msg - send a TIPC message out over an InfiniBand interface - */ -static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, - struct tipc_media_addr *dest) -{ - struct sk_buff *clone; - struct net_device *dev; - int delta; - - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - dev = ((struct ib_media *)(tb_ptr->usr_handle))->dev; - delta = dev->hard_header_len - skb_headroom(buf); - - if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); - return 0; - } - - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); - return 0; -} - -/** - * recv_msg - handle incoming TIPC message from an InfiniBand interface - * - * Accept only packets explicitly sent to this node, or broadcast packets; - * ignores packets sent using InfiniBand multicast, and traffic sent to other - * nodes (which can happen if interface is running in promiscuous mode). - */ -static int recv_msg(struct sk_buff *buf, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct ib_media *ib_ptr = (struct ib_media *)pt->af_packet_priv; - - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return NET_RX_DROP; - } - - if (likely(ib_ptr->bearer)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_recv_msg(buf, ib_ptr->bearer); - return NET_RX_SUCCESS; - } - } - kfree_skb(buf); - return NET_RX_DROP; -} - -/** - * setup_bearer - setup association between InfiniBand bearer and interface - */ -static void setup_media(struct work_struct *work) -{ - struct ib_media *ib_ptr = - container_of(work, struct ib_media, setup); - - dev_add_pack(&ib_ptr->tipc_packet_type); -} - -/** - * enable_media - attach TIPC bearer to an InfiniBand interface - */ -static int enable_media(struct tipc_bearer *tb_ptr) -{ - struct net_device *dev; - struct ib_media *ib_ptr = &ib_media_array[0]; - struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA]; - char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; - int pending_dev = 0; - - /* Find unused InfiniBand bearer structure */ - while (ib_ptr->dev) { - if (!ib_ptr->bearer) - pending_dev++; - if (++ib_ptr == stop) - return pending_dev ? -EAGAIN : -EDQUOT; - } - - /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); - if (!dev) - return -ENODEV; - - /* Create InfiniBand bearer for device */ - ib_ptr->dev = dev; - ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - ib_ptr->tipc_packet_type.dev = dev; - ib_ptr->tipc_packet_type.func = recv_msg; - ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr; - INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list)); - INIT_WORK(&ib_ptr->setup, setup_media); - schedule_work(&ib_ptr->setup); - - /* Associate TIPC bearer with InfiniBand bearer */ - ib_ptr->bearer = tb_ptr; - tb_ptr->usr_handle = (void *)ib_ptr; - memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); - memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN); - tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB; - tb_ptr->bcast_addr.broadcast = 1; - tb_ptr->mtu = dev->mtu; - tb_ptr->blocked = 0; - ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); - return 0; -} - -/** - * cleanup_bearer - break association between InfiniBand bearer and interface - * - * This routine must be invoked from a work queue because it can sleep. - */ -static void cleanup_bearer(struct work_struct *work) -{ - struct ib_media *ib_ptr = - container_of(work, struct ib_media, cleanup); - - dev_remove_pack(&ib_ptr->tipc_packet_type); - dev_put(ib_ptr->dev); - ib_ptr->dev = NULL; -} - -/** - * disable_media - detach TIPC bearer from an InfiniBand interface - * - * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away, - * then get worker thread to complete bearer cleanup. (Can't do cleanup - * here because cleanup code needs to sleep and caller holds spinlocks.) - */ -static void disable_media(struct tipc_bearer *tb_ptr) -{ - struct ib_media *ib_ptr = (struct ib_media *)tb_ptr->usr_handle; - - ib_ptr->bearer = NULL; - INIT_WORK(&ib_ptr->cleanup, cleanup_bearer); - schedule_work(&ib_ptr->cleanup); -} - -/** - * recv_notification - handle device updates from OS - * - * Change the state of the InfiniBand bearer (if any) associated with the - * specified device. - */ -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ib_media *ib_ptr = &ib_media_array[0]; - struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA]; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - while ((ib_ptr->dev != dev)) { - if (++ib_ptr == stop) - return NOTIFY_DONE; /* couldn't find device */ - } - if (!ib_ptr->bearer) - return NOTIFY_DONE; /* bearer had been disabled */ - - ib_ptr->bearer->mtu = dev->mtu; - - switch (evt) { - case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) - tipc_continue(ib_ptr->bearer); - else - tipc_block_bearer(ib_ptr->bearer); - break; - case NETDEV_UP: - tipc_continue(ib_ptr->bearer); - break; - case NETDEV_DOWN: - tipc_block_bearer(ib_ptr->bearer); - break; - case NETDEV_CHANGEMTU: - case NETDEV_CHANGEADDR: - tipc_block_bearer(ib_ptr->bearer); - tipc_continue(ib_ptr->bearer); - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGENAME: - tipc_disable_bearer(ib_ptr->bearer->name); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block notifier = { - .notifier_call = recv_notification, - .priority = 0, -}; - -/** - * ib_addr2str - convert InfiniBand address to string - */ -static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +/* convert InfiniBand address to string */ +static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, + int str_size) { if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ return 1; @@ -297,10 +54,8 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) return 0; } -/** - * ib_addr2msg - convert InfiniBand address format to message header format - */ -static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* convert InfiniBand address format to message header format */ +static int tipc_ib_addr2msg(struct tipc_media_addr *a, char *msg_area) { memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; @@ -308,65 +63,27 @@ static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area) return 0; } -/** - * ib_msg2addr - convert message header address format to InfiniBand format - */ -static int ib_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* convert message header address format to InfiniBand format */ +static int tipc_ib_msg2addr(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *msg_area) { - ib_media_addr_set(tb_ptr, a, msg_area); + tipc_l2_media_addr_set(tb_ptr, a, msg_area); return 0; } -/* - * InfiniBand media registration info - */ -static struct tipc_media ib_media_info = { - .send_msg = send_msg, - .enable_media = enable_media, - .disable_media = disable_media, - .addr2str = ib_addr2str, - .addr2msg = ib_addr2msg, - .msg2addr = ib_msg2addr, +/* InfiniBand media registration info */ +struct tipc_media ib_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_ib_addr2str, + .addr2msg = tipc_ib_addr2msg, + .msg2addr = tipc_ib_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_IB, + .hwaddr_len = INFINIBAND_ALEN, .name = "ib" }; -/** - * tipc_ib_media_start - activate InfiniBand bearer support - * - * Register InfiniBand media type with TIPC bearer code. Also register - * with OS for notifications about device state changes. - */ -int tipc_ib_media_start(void) -{ - int res; - - if (ib_started) - return -EINVAL; - - res = tipc_register_media(&ib_media_info); - if (res) - return res; - - res = register_netdevice_notifier(¬ifier); - if (!res) - ib_started = 1; - return res; -} - -/** - * tipc_ib_media_stop - deactivate InfiniBand bearer support - */ -void tipc_ib_media_stop(void) -{ - if (!ib_started) - return; - - flush_scheduled_work(); - unregister_netdevice_notifier(¬ifier); - ib_started = 0; -} diff --git a/net/tipc/link.c b/net/tipc/link.c index 69cd9bf..131a32a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -386,14 +386,7 @@ exit: */ static void link_release_outqueue(struct tipc_link *l_ptr) { - struct sk_buff *buf = l_ptr->first_out; - struct sk_buff *next; - - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } + kfree_skb_list(l_ptr->first_out); l_ptr->first_out = NULL; l_ptr->out_queue_size = 0; } @@ -415,32 +408,15 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) */ void tipc_link_stop(struct tipc_link *l_ptr) { - struct sk_buff *buf; - struct sk_buff *next; - - buf = l_ptr->oldest_deferred_in; - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - - buf = l_ptr->first_out; - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - + kfree_skb_list(l_ptr->oldest_deferred_in); + kfree_skb_list(l_ptr->first_out); tipc_link_reset_fragments(l_ptr); - kfree_skb(l_ptr->proto_msg_queue); l_ptr->proto_msg_queue = NULL; } void tipc_link_reset(struct tipc_link *l_ptr) { - struct sk_buff *buf; u32 prev_state = l_ptr->state; u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); @@ -471,12 +447,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_release_outqueue(l_ptr); kfree_skb(l_ptr->proto_msg_queue); l_ptr->proto_msg_queue = NULL; - buf = l_ptr->oldest_deferred_in; - while (buf) { - struct sk_buff *next = buf->next; - kfree_skb(buf); - buf = next; - } + kfree_skb_list(l_ptr->oldest_deferred_in); if (!list_empty(&l_ptr->waiting_ports)) tipc_link_wakeup_ports(l_ptr, 1); @@ -517,10 +488,11 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (!l_ptr->started && (event != STARTING_EVT)) return; /* Not yet. */ - if (link_blocked(l_ptr)) { + /* Check whether changeover is going on */ + if (l_ptr->exp_msg_count) { if (event == TIMEOUT_EVT) link_set_timer(l_ptr, cont_intv); - return; /* Changeover going on */ + return; } switch (l_ptr->state) { @@ -790,8 +762,7 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf) return link_send_long_buf(l_ptr, buf); /* Packet can be queued or sent. */ - if (likely(!tipc_bearer_blocked(l_ptr->b_ptr) && - !link_congested(l_ptr))) { + if (likely(!link_congested(l_ptr))) { link_add_to_outqueue(l_ptr, buf, msg); tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); @@ -957,14 +928,13 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, if (likely(!link_congested(l_ptr))) { if (likely(msg_size(msg) <= l_ptr->max_pkt)) { - if (likely(!tipc_bearer_blocked(l_ptr->b_ptr))) { - link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, - &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return res; - } - } else + link_add_to_outqueue(l_ptr, buf, msg); + tipc_bearer_send(l_ptr->b_ptr, buf, + &l_ptr->media_addr); + l_ptr->unacked_window = 0; + return res; + } + else *used_max_pkt = l_ptr->max_pkt; } return tipc_link_send_buf(l_ptr, buf); /* All other cases */ @@ -1013,8 +983,7 @@ exit: } /* Exit if link (or bearer) is congested */ - if (link_congested(l_ptr) || - tipc_bearer_blocked(l_ptr->b_ptr)) { + if (link_congested(l_ptr)) { res = link_schedule_port(l_ptr, sender->ref, res); goto exit; @@ -1127,10 +1096,7 @@ again: if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { res = -EFAULT; error: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } + kfree_skb_list(buf_chain); return res; } sect_crs += sz; @@ -1180,18 +1146,12 @@ error: if (l_ptr->max_pkt < max_pkt) { sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } + kfree_skb_list(buf_chain); goto again; } } else { reject: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } + kfree_skb_list(buf_chain); return tipc_port_reject_sections(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE); } @@ -1209,7 +1169,7 @@ reject: /* * tipc_link_push_packet: Push one unsent packet to the media */ -u32 tipc_link_push_packet(struct tipc_link *l_ptr) +static u32 tipc_link_push_packet(struct tipc_link *l_ptr) { struct sk_buff *buf = l_ptr->first_out; u32 r_q_size = l_ptr->retransm_queue_size; @@ -1281,9 +1241,6 @@ void tipc_link_push_queue(struct tipc_link *l_ptr) { u32 res; - if (tipc_bearer_blocked(l_ptr->b_ptr)) - return; - do { res = tipc_link_push_packet(l_ptr); } while (!res); @@ -1370,26 +1327,15 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, msg = buf_msg(buf); - if (tipc_bearer_blocked(l_ptr->b_ptr)) { - if (l_ptr->retransm_queue_size == 0) { - l_ptr->retransm_queue_head = msg_seqno(msg); - l_ptr->retransm_queue_size = retransmits; - } else { - pr_err("Unexpected retransmit on link %s (qsize=%d)\n", - l_ptr->name, l_ptr->retransm_queue_size); + /* Detect repeated retransmit failures */ + if (l_ptr->last_retransmitted == msg_seqno(msg)) { + if (++l_ptr->stale_count > 100) { + link_retransmit_failure(l_ptr, buf); + return; } - return; } else { - /* Detect repeated retransmit failures on unblocked bearer */ - if (l_ptr->last_retransmitted == msg_seqno(msg)) { - if (++l_ptr->stale_count > 100) { - link_retransmit_failure(l_ptr, buf); - return; - } - } else { - l_ptr->last_retransmitted = msg_seqno(msg); - l_ptr->stale_count = 1; - } + l_ptr->last_retransmitted = msg_seqno(msg); + l_ptr->stale_count = 1; } while (retransmits && (buf != l_ptr->next_out) && buf) { @@ -1786,7 +1732,8 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, l_ptr->proto_msg_queue = NULL; } - if (link_blocked(l_ptr)) + /* Don't send protocol message during link changeover */ + if (l_ptr->exp_msg_count) return; /* Abort non-RESET send if communication with node is prohibited */ @@ -1861,12 +1808,6 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - /* Defer message if bearer is already blocked */ - if (tipc_bearer_blocked(l_ptr->b_ptr)) { - l_ptr->proto_msg_queue = buf; - return; - } - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); @@ -1885,7 +1826,8 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); - if (link_blocked(l_ptr)) + /* Discard protocol message during link changeover */ + if (l_ptr->exp_msg_count) goto exit; /* record unnumbered packet arrival (force mismatch on next timeout) */ @@ -2306,11 +2248,7 @@ static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf) fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE); if (fragm == NULL) { kfree_skb(buf); - while (buf_chain) { - buf = buf_chain; - buf_chain = buf_chain->next; - kfree_skb(buf); - } + kfree_skb_list(buf_chain); return -ENOMEM; } msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); diff --git a/net/tipc/link.h b/net/tipc/link.h index 8a6c102..0636ca9 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -112,7 +112,6 @@ struct tipc_stats { * @continuity_interval: link continuity testing interval [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM - * @blocked: indicates if link has been administratively blocked * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field @@ -162,7 +161,6 @@ struct tipc_link { u32 continuity_interval; u32 abort_limit; int state; - int blocked; u32 fsm_msg_cnt; struct { unchar hdr[INT_H_SIZE]; @@ -223,7 +221,6 @@ void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); -u32 tipc_link_push_packet(struct tipc_link *l_ptr); void tipc_link_stop(struct tipc_link *l_ptr); struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); @@ -312,11 +309,6 @@ static inline int link_reset_reset(struct tipc_link *l_ptr) return l_ptr->state == RESET_RESET; } -static inline int link_blocked(struct tipc_link *l_ptr) -{ - return l_ptr->exp_msg_count || l_ptr->blocked; -} - static inline int link_congested(struct tipc_link *l_ptr) { return l_ptr->out_queue_size >= l_ptr->queue_limit[0]; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 09dcd54..92a1533 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -148,8 +148,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, */ static struct sub_seq *tipc_subseq_alloc(u32 cnt) { - struct sub_seq *sseq = kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC); - return sseq; + return kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index 25100c0..e167d26 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -235,11 +235,6 @@ int tipc_node_active_links(struct tipc_node *n_ptr) return n_ptr->active_links[0] != NULL; } -int tipc_node_redundant_links(struct tipc_node *n_ptr) -{ - return n_ptr->working_links > 1; -} - int tipc_node_is_up(struct tipc_node *n_ptr) { return tipc_node_active_links(n_ptr); @@ -291,11 +286,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { - while (n_ptr->bclink.deferred_head) { - struct sk_buff *buf = n_ptr->bclink.deferred_head; - n_ptr->bclink.deferred_head = buf->next; - kfree_skb(buf); - } + kfree_skb_list(n_ptr->bclink.deferred_head); n_ptr->bclink.deferred_size = 0; if (n_ptr->bclink.reasm_head) { diff --git a/net/tipc/node.h b/net/tipc/node.h index e5e96c0..d4bb654 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -115,7 +115,6 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); int tipc_node_active_links(struct tipc_node *n_ptr); -int tipc_node_redundant_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); diff --git a/net/tipc/port.c b/net/tipc/port.c index c081a76..b742b26 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -251,18 +251,15 @@ struct tipc_port *tipc_createport(struct sock *sk, return p_ptr; } -int tipc_deleteport(u32 ref) +int tipc_deleteport(struct tipc_port *p_ptr) { - struct tipc_port *p_ptr; struct sk_buff *buf = NULL; - tipc_withdraw(ref, 0, NULL); - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; + tipc_withdraw(p_ptr, 0, NULL); - tipc_ref_discard(ref); - tipc_port_unlock(p_ptr); + spin_lock_bh(p_ptr->lock); + tipc_ref_discard(p_ptr->ref); + spin_unlock_bh(p_ptr->lock); k_cancel_timer(&p_ptr->timer); if (p_ptr->connected) { @@ -704,47 +701,36 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) } -int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; u32 key; - int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) + if (p_ptr->connected) return -EINVAL; + key = p_ptr->ref + p_ptr->pub_count + 1; + if (key == p_ptr->ref) + return -EADDRINUSE; - if (p_ptr->connected) - goto exit; - key = ref + p_ptr->pub_count + 1; - if (key == ref) { - res = -EADDRINUSE; - goto exit; - } publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, scope, p_ptr->ref, key); if (publ) { list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; p_ptr->published = 1; - res = 0; + return 0; } -exit: - tipc_port_unlock(p_ptr); - return res; + return -EINVAL; } -int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; struct publication *tpubl; int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; if (!seq) { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -771,7 +757,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } if (list_empty(&p_ptr->publications)) p_ptr->published = 0; - tipc_port_unlock(p_ptr); return res; } @@ -832,17 +817,14 @@ exit: */ int __tipc_disconnect(struct tipc_port *tp_ptr) { - int res; - if (tp_ptr->connected) { tp_ptr->connected = 0; /* let timer expire on it's own to avoid deadlock! */ tipc_nodesub_unsubscribe(&tp_ptr->subscription); - res = 0; - } else { - res = -ENOTCONN; + return 0; } - return res; + + return -ENOTCONN; } /* diff --git a/net/tipc/port.h b/net/tipc/port.h index 9122535..34f12bd 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -116,7 +116,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err); void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_deleteport(u32 portref); +int tipc_deleteport(struct tipc_port *p_ptr); int tipc_portimportance(u32 portref, unsigned int *importance); int tipc_set_portimportance(u32 portref, unsigned int importance); @@ -127,9 +127,9 @@ int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); -int tipc_publish(u32 portref, unsigned int scope, +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); -int tipc_withdraw(u32 portref, unsigned int scope, +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3b61851..c8341d1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -239,7 +239,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int tipc_sock_create_local(int type, struct socket **res) { int rc; - struct sock *sk; rc = sock_create_lite(AF_TIPC, type, 0, res); if (rc < 0) { @@ -248,8 +247,6 @@ int tipc_sock_create_local(int type, struct socket **res) } tipc_sk_create(&init_net, *res, 0, 1); - sk = (*res)->sk; - return 0; } @@ -354,7 +351,7 @@ static int release(struct socket *sock) * Delete TIPC port; this ensures no more messages are queued * (also disconnects an active connection & sends a 'FIN-' to peer) */ - res = tipc_deleteport(tport->ref); + res = tipc_deleteport(tport); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -386,30 +383,46 @@ static int release(struct socket *sock) */ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) { + struct sock *sk = sock->sk; struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - u32 portref = tipc_sk_port(sock->sk)->ref; + struct tipc_port *tport = tipc_sk_port(sock->sk); + int res = -EINVAL; - if (unlikely(!uaddr_len)) - return tipc_withdraw(portref, 0, NULL); + lock_sock(sk); + if (unlikely(!uaddr_len)) { + res = tipc_withdraw(tport, 0, NULL); + goto exit; + } - if (uaddr_len < sizeof(struct sockaddr_tipc)) - return -EINVAL; - if (addr->family != AF_TIPC) - return -EAFNOSUPPORT; + if (uaddr_len < sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + goto exit; + } + if (addr->family != AF_TIPC) { + res = -EAFNOSUPPORT; + goto exit; + } if (addr->addrtype == TIPC_ADDR_NAME) addr->addr.nameseq.upper = addr->addr.nameseq.lower; - else if (addr->addrtype != TIPC_ADDR_NAMESEQ) - return -EAFNOSUPPORT; + else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { + res = -EAFNOSUPPORT; + goto exit; + } if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && (addr->addr.nameseq.type != TIPC_TOP_SRV) && - (addr->addr.nameseq.type != TIPC_CFG_SRV)) - return -EACCES; + (addr->addr.nameseq.type != TIPC_CFG_SRV)) { + res = -EACCES; + goto exit; + } - return (addr->scope > 0) ? - tipc_publish(portref, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq); + res = (addr->scope > 0) ? + tipc_publish(tport, addr->scope, &addr->addr.nameseq) : + tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq); +exit: + release_sock(sk); + return res; } /** @@ -754,16 +767,11 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, /* Handle special cases where there is no connection */ if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_UNCONNECTED) { + if (sock->state == SS_UNCONNECTED) res = send_packet(NULL, sock, m, total_len); - goto exit; - } else if (sock->state == SS_DISCONNECTING) { - res = -EPIPE; - goto exit; - } else { - res = -ENOTCONN; - goto exit; - } + else + res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; + goto exit; } if (unlikely(m->msg_name)) { @@ -1311,14 +1319,12 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - unsigned int limit; if (msg_connected(msg)) - limit = sysctl_tipc_rmem[2]; - else - limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << - msg_importance(msg); - return limit; + return sysctl_tipc_rmem[2]; + + return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << + msg_importance(msg); } /** @@ -1514,14 +1520,12 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, sock->state != SS_CONNECTING, timeout ? (long)msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT); - lock_sock(sk); if (res <= 0) { if (res == 0) res = -ETIMEDOUT; - else - ; /* leave "res" unchanged */ - goto exit; + return res; } + lock_sock(sk); } if (unlikely(sock->state == SS_DISCONNECTING)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 01625ccc..800ca61 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -80,6 +80,8 @@ * with BSD names. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/signal.h> @@ -366,7 +368,7 @@ static void unix_sock_destructor(struct sock *sk) WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { - printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk); + pr_info("Attempt to release alive unix socket: %p\n", sk); return; } @@ -378,7 +380,7 @@ static void unix_sock_destructor(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG - printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk, + pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, atomic_long_read(&unix_nr_socks)); #endif } @@ -530,13 +532,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); -static void unix_set_peek_off(struct sock *sk, int val) +static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); - mutex_lock(&u->readlock); + if (mutex_lock_interruptible(&u->readlock)) + return -EINTR; + sk->sk_peek_off = val; mutex_unlock(&u->readlock); + + return 0; } @@ -714,7 +720,9 @@ static int unix_autobind(struct socket *sock) int err; unsigned int retries = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + return err; err = 0; if (u->addr) @@ -873,7 +881,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + goto out; err = -EINVAL; if (u->addr) @@ -2433,8 +2443,7 @@ static int __init af_unix_init(void) rc = proto_register(&unix_proto, 1); if (rc != 0) { - printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n", - __func__); + pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); goto out; } diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 9b8cc87..78559b5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -277,6 +277,32 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy, width, dfs_state); } +static u32 cfg80211_get_start_freq(u32 center_freq, + u32 bandwidth) +{ + u32 start_freq; + + if (bandwidth <= 20) + start_freq = center_freq; + else + start_freq = center_freq - bandwidth/2 + 10; + + return start_freq; +} + +static u32 cfg80211_get_end_freq(u32 center_freq, + u32 bandwidth) +{ + u32 end_freq; + + if (bandwidth <= 20) + end_freq = center_freq; + else + end_freq = center_freq + bandwidth/2 - 10; + + return end_freq; +} + static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, u32 center_freq, u32 bandwidth) @@ -284,13 +310,8 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, struct ieee80211_channel *c; u32 freq, start_freq, end_freq; - if (bandwidth <= 20) { - start_freq = center_freq; - end_freq = center_freq; - } else { - start_freq = center_freq - bandwidth/2 + 10; - end_freq = center_freq + bandwidth/2 - 10; - } + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); @@ -330,33 +351,159 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); -static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, - u32 center_freq, u32 bandwidth, - u32 prohibited_flags) +static int cfg80211_get_chans_dfs_usable(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) { struct ieee80211_channel *c; u32 freq, start_freq, end_freq; + int count = 0; - if (bandwidth <= 20) { - start_freq = center_freq; - end_freq = center_freq; - } else { - start_freq = center_freq - bandwidth/2 + 10; - end_freq = center_freq + bandwidth/2 - 10; + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); + + /* + * Check entire range of channels for the bandwidth. + * Check all channels are DFS channels (DFS_USABLE or + * DFS_AVAILABLE). Return number of usable channels + * (require CAC). Allow DFS and non-DFS channel mix. + */ + for (freq = start_freq; freq <= end_freq; freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_DISABLED) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_RADAR) { + if (c->dfs_state == NL80211_DFS_UNAVAILABLE) + return -EINVAL; + + if (c->dfs_state == NL80211_DFS_USABLE) + count++; + } } + return count; +} + +bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r1, r2 = 0; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return false; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return false; + + r1 = cfg80211_get_chans_dfs_usable(wiphy, chandef->center_freq1, + width); + + if (r1 < 0) + return false; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80P80: + WARN_ON(!chandef->center_freq2); + r2 = cfg80211_get_chans_dfs_usable(wiphy, + chandef->center_freq2, + width); + if (r2 < 0) + return false; + break; + default: + WARN_ON(chandef->center_freq2); + break; + } + + return (r1 + r2 > 0); +} + + +static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 freq, start_freq, end_freq; + + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); + + /* + * Check entire range of channels for the bandwidth. + * If any channel in between is disabled or has not + * had gone through CAC return false + */ for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return false; - /* check for radar flags */ - if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) && + if (c->flags & IEEE80211_CHAN_DISABLED) + return false; + + if ((c->flags & IEEE80211_CHAN_RADAR) && (c->dfs_state != NL80211_DFS_AVAILABLE)) return false; + } + + return true; +} + +static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return false; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return false; + + r = cfg80211_get_chans_dfs_available(wiphy, chandef->center_freq1, + width); + + /* If any of channels unavailable for cf1 just return */ + if (!r) + return r; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80P80: + WARN_ON(!chandef->center_freq2); + r = cfg80211_get_chans_dfs_available(wiphy, + chandef->center_freq2, + width); + default: + WARN_ON(chandef->center_freq2); + break; + } + + return r; +} - /* check for the other flags */ - if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR) + +static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, + u32 center_freq, u32 bandwidth, + u32 prohibited_flags) +{ + struct ieee80211_channel *c; + u32 freq, start_freq, end_freq; + + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); + + for (freq = start_freq; freq <= end_freq; freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || c->flags & prohibited_flags) return false; } @@ -462,14 +609,19 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { bool res; + u32 prohibited_flags = IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_NO_IR | + IEEE80211_CHAN_RADAR; trace_cfg80211_reg_can_beacon(wiphy, chandef); - res = cfg80211_chandef_usable(wiphy, chandef, - IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_PASSIVE_SCAN | - IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR); + if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 && + cfg80211_chandef_dfs_available(wiphy, chandef)) { + /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ + prohibited_flags = IEEE80211_CHAN_DISABLED; + } + + res = cfg80211_chandef_usable(wiphy, chandef, prohibited_flags); trace_cfg80211_return_bool(res); return res; @@ -510,6 +662,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, : CHAN_MODE_EXCLUSIVE; return; } + break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->current_bss) { diff --git a/net/wireless/core.c b/net/wireless/core.c index aff959e..d89dee2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -203,17 +203,8 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - /* - * If the scan request wasn't notified as done, set it - * to aborted and leak it after a warning. The driver - * should have notified us that it ended at the latest - * during rdev_stop_p2p_device(). - */ - if (WARN_ON(!rdev->scan_req->notified)) - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, !rdev->scan_req->notified); - } + WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && + !rdev->scan_req->notified); } static int cfg80211_rfkill_set_block(void *data, bool blocked) @@ -357,8 +348,6 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; - rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; - return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); @@ -451,6 +440,15 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; + /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ + wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; + + /* + * There are major locking problems in nl80211/mac80211 for CSA, + * disable for all drivers until this has been reworked. + */ + wiphy->flags &= ~WIPHY_FLAG_HAS_CHANNEL_SWITCH; + #ifdef CONFIG_PM if (WARN_ON(wiphy->wowlan && (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && @@ -566,6 +564,8 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); + rdev->wiphy.features |= NL80211_FEATURE_SCAN_FLUSH; + rtnl_lock(); res = device_add(&rdev->wiphy.dev); if (res) { @@ -586,7 +586,7 @@ int wiphy_register(struct wiphy *wiphy) if (IS_ERR(rdev->wiphy.debugfsdir)) rdev->wiphy.debugfsdir = NULL; - if (wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { + if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { struct regulatory_request request; request.wiphy_idx = get_wiphy_idx(wiphy); @@ -756,13 +756,16 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, { struct net_device *dev = wdev->netdev; + ASSERT_RTNL(); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - __cfg80211_stop_sched_scan(rdev, false); + if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) + __cfg80211_stop_sched_scan(rdev, false); wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT @@ -856,11 +859,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - if (WARN_ON(!rdev->scan_req->notified)) - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } + WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && + !rdev->scan_req->notified); if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { diff --git a/net/wireless/core.h b/net/wireless/core.h index af10e59..37ec16d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -67,9 +67,7 @@ struct cfg80211_registered_device { struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; -#ifdef CONFIG_NL80211_TESTMODE - struct genl_info *testmode_info; -#endif + struct genl_info *cur_cmd_info; struct work_struct conn_work; struct work_struct event_work; @@ -317,9 +315,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie); + struct cfg80211_mgmt_tx_params *params, + u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, @@ -364,7 +361,7 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); void __cfg80211_scan_done(struct work_struct *wk); -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev); void __cfg80211_sched_scan_results(struct work_struct *wk); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, bool driver_initiated); @@ -382,6 +379,19 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode, u8 radar_detect); +/** + * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Checks if chandef is usable and we can/need start CAC on such channel. + * + * Return: Return true if all channels available and at least + * one channel require CAC (NL80211_DFS_USABLE) + */ +bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index 42ed274..9a8217d 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -33,15 +33,7 @@ BEGIN { regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n" } -/^[ \t]*#/ { - # Ignore -} - -!active && /^[ \t]*$/ { - # Ignore -} - -!active && /country/ { +function parse_country_head() { country=$2 sub(/:/, "", country) printf "static const struct ieee80211_regdomain regdom_%s = {\n", country @@ -57,7 +49,8 @@ BEGIN { regdb = regdb "\t®dom_" country ",\n" } -active && /^[ \t]*\(/ { +function parse_reg_rule() +{ start = $1 sub(/\(/, "", start) end = $3 @@ -107,17 +100,21 @@ active && /^[ \t]*\(/ { } else if (flagarray[arg] == "PTMP-ONLY") { flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | " } else if (flagarray[arg] == "PASSIVE-SCAN") { - flags = flags "\n\t\t\tNL80211_RRF_PASSIVE_SCAN | " + flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " } else if (flagarray[arg] == "NO-IBSS") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IBSS | " + flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " + } else if (flagarray[arg] == "NO-IR") { + flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " } + } flags = flags "0" printf "\t\tREG_RULE(%d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, flags rules++ } -active && /^[ \t]*$/ { +function print_tail_country() +{ active = 0 printf "\t},\n" printf "\t.n_reg_rules = %d\n", rules @@ -125,7 +122,29 @@ active && /^[ \t]*$/ { rules = 0; } +/^[ \t]*#/ { + # Ignore +} + +!active && /^[ \t]*$/ { + # Ignore +} + +!active && /country/ { + parse_country_head() +} + +active && /^[ \t]*\(/ { + parse_reg_rule() +} + +active && /^[ \t]*$/ { + print_tail_country() +} + END { + if (active) + print_tail_country() print regdb "};" print "" print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);" diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 9d797df..730147e 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -262,7 +262,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.ibss.chandef.chan) { - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + struct ieee80211_channel *new_chan = NULL; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; @@ -274,22 +274,23 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, for (i = 0; i < sband->n_channels; i++) { chan = &sband->channels[i]; - if (chan->flags & IEEE80211_CHAN_NO_IBSS) + if (chan->flags & IEEE80211_CHAN_NO_IR) continue; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.center_freq1 = - chan->center_freq; + new_chan = chan; break; } - if (wdev->wext.ibss.chandef.chan) + if (new_chan) break; } - if (!wdev->wext.ibss.chandef.chan) + if (!new_chan) return -EINVAL; + + cfg80211_chandef_create(&wdev->wext.ibss.chandef, new_chan, + NL80211_CHAN_NO_HT); } /* don't join -- SSID is not there */ @@ -345,7 +346,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, chan = ieee80211_get_channel(wdev->wiphy, freq); if (!chan) return -EINVAL; - if (chan->flags & IEEE80211_CHAN_NO_IBSS || + if (chan->flags & IEEE80211_CHAN_NO_IR || chan->flags & IEEE80211_CHAN_DISABLED) return -EINVAL; } @@ -363,9 +364,8 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, return err; if (chan) { - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - wdev->wext.ibss.chandef.center_freq1 = freq; + cfg80211_chandef_create(&wdev->wext.ibss.chandef, chan, + NL80211_CHAN_NO_HT); wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0553fd4..9c7a11a 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -99,6 +99,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; + u8 radar_detect_width = 0; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -141,8 +142,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, for (i = 0; i < sband->n_channels; i++) { chan = &sband->channels[i]; - if (chan->flags & (IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_PASSIVE_SCAN | + if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_RADAR)) continue; @@ -178,8 +178,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) return -EINVAL; - err = cfg80211_can_use_chan(rdev, wdev, setup->chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); + if (err < 0) + return err; + if (err) + radar_detect_width = BIT(setup->chandef.width); + + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + setup->chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); if (err) return err; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 6a6b1c8..52cca05 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -520,9 +520,7 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie) + struct cfg80211_mgmt_tx_params *params, u64 *cookie) { const struct ieee80211_mgmt *mgmt; u16 stype; @@ -533,10 +531,10 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - if (len < 24 + 1) + if (params->len < 24 + 1) return -EINVAL; - mgmt = (const struct ieee80211_mgmt *) buf; + mgmt = (const struct ieee80211_mgmt *)params->buf; if (!ieee80211_is_mgmt(mgmt->frame_control)) return -EINVAL; @@ -615,9 +613,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev_mgmt_tx(rdev, wdev, chan, offchan, - wait, buf, len, no_cck, dont_wait_for_ack, - cookie); + return rdev_mgmt_tx(rdev, wdev, params, cookie); } bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, @@ -763,12 +759,12 @@ void cfg80211_radar_event(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_radar_event); void cfg80211_cac_event(struct net_device *netdev, + const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - struct cfg80211_chan_def chandef; unsigned long timeout; trace_cfg80211_cac_event(netdev, event); @@ -779,14 +775,12 @@ void cfg80211_cac_event(struct net_device *netdev, if (WARN_ON(!wdev->channel)) return; - cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT); - switch (event) { case NL80211_RADAR_CAC_FINISHED: timeout = wdev->cac_start_time + msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); WARN_ON(!time_after_eq(jiffies, timeout)); - cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE); + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); break; case NL80211_RADAR_CAC_ABORTED: break; @@ -796,6 +790,6 @@ void cfg80211_cac_event(struct net_device *netdev, } wdev->cac_started = false; - nl80211_radar_notify(rdev, &chandef, event, netdev, gfp); + nl80211_radar_notify(rdev, chandef, event, netdev, gfp); } EXPORT_SYMBOL(cfg80211_cac_event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a1eb210..04681a4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -376,6 +376,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY }, [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, + [NL80211_ATTR_OPMODE_NOTIF] = { .type = NLA_U8 }, + [NL80211_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NL80211_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, + [NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; /* policy for the key attributes */ @@ -564,12 +568,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_DISABLED) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_PASSIVE_SCAN)) - goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) - goto nla_put_failure; + if (chan->flags & IEEE80211_CHAN_NO_IR) { + if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IR)) + goto nla_put_failure; + if (nla_put_flag(msg, __NL80211_FREQUENCY_ATTR_NO_IBSS)) + goto nla_put_failure; + } if (chan->flags & IEEE80211_CHAN_RADAR) { if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) goto nla_put_failure; @@ -1184,6 +1188,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_cmds; + struct nlattr *nl_vendor_cmds; enum ieee80211_band band; struct ieee80211_channel *chan; int i; @@ -1247,10 +1252,6 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && - nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) - goto nla_put_failure; - state->split_start++; if (state->split) break; @@ -1579,6 +1580,24 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nl80211_send_coalesce(msg, dev)) goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || + nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) + goto nla_put_failure; + state->split_start++; + break; + case 11: + nl_vendor_cmds = nla_nest_start(msg, NL80211_ATTR_VENDOR_DATA); + if (!nl_vendor_cmds) + goto nla_put_failure; + + for (i = 0; i < dev->wiphy.n_vendor_commands; i++) + if (nla_put(msg, i + 1, + sizeof(struct nl80211_vendor_cmd_info), + &dev->wiphy.vendor_commands[i].info)) + goto nla_put_failure; + nla_nest_end(msg, nl_vendor_cmds); + /* done */ state->split_start = 0; break; @@ -2187,7 +2206,7 @@ static inline u64 wdev_id(struct wireless_dev *wdev) } static int nl80211_send_chandef(struct sk_buff *msg, - struct cfg80211_chan_def *chandef) + const struct cfg80211_chan_def *chandef) { WARN_ON(!cfg80211_chandef_valid(chandef)); @@ -2687,7 +2706,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (!hdr) - return -ENOBUFS; + goto nla_put_failure; cookie.msg = msg; cookie.idx = key_idx; @@ -3236,6 +3255,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(params.acl); } + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -3244,6 +3264,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + wdev_unlock(wdev); kfree(params.acl); @@ -3272,7 +3293,11 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return rdev_change_beacon(rdev, dev, ¶ms); + wdev_lock(wdev); + err = rdev_change_beacon(rdev, dev, ¶ms); + wdev_unlock(wdev); + + return err; } static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) @@ -4144,6 +4169,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { + params.opmode_notif_used = true; + params.opmode_notif = + nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); + } + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); @@ -4478,7 +4509,9 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; struct bss_parameters params; + int err; memset(¶ms, 0, sizeof(params)); /* default to not changing parameters */ @@ -4544,7 +4577,11 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - return rdev_change_bss(rdev, dev, ¶ms); + wdev_lock(wdev); + err = rdev_change_bss(rdev, dev, ¶ms); + wdev_unlock(wdev); + + return err; } static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { @@ -5098,7 +5135,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) char *alpha2 = NULL; int rem_reg_rules = 0, r = 0; u32 num_rules = 0, rule_idx = 0, size_of_regd; - u8 dfs_region = 0; + enum nl80211_dfs_regions dfs_region = NL80211_DFS_UNSET; struct ieee80211_regdomain *rd = NULL; if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) @@ -5119,6 +5156,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (!reg_is_valid_request(alpha2)) + return -EINVAL; + size_of_regd = sizeof(struct ieee80211_regdomain) + num_rules * sizeof(struct ieee80211_reg_rule); @@ -5349,6 +5389,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out_free; } + + if (!wiphy->bands[band]) + continue; + err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), @@ -5361,10 +5405,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || - ((request->flags & NL80211_SCAN_FLAG_FLUSH) && - !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { err = -EOPNOTSUPP; goto out_free; } @@ -5604,10 +5646,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || - ((request->flags & NL80211_SCAN_FLAG_FLUSH) && - !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { err = -EOPNOTSUPP; goto out_free; } @@ -5651,8 +5691,13 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def chandef; + enum nl80211_dfs_regions dfs_region; int err; + dfs_region = reg_get_dfs_region(wdev->wiphy); + if (dfs_region == NL80211_DFS_UNSET) + return -EINVAL; + err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; @@ -5670,7 +5715,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (err == 0) return -EINVAL; - if (chandef.chan->dfs_state != NL80211_DFS_USABLE) + if (!cfg80211_chandef_dfs_usable(wdev->wiphy, &chandef)) return -EINVAL; if (!rdev->ops->start_radar_detection) @@ -5810,7 +5855,11 @@ skip_beacons: if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; - return rdev_channel_switch(rdev, dev, ¶ms); + wdev_lock(wdev); + err = rdev_channel_switch(rdev, dev, ¶ms); + wdev_unlock(wdev); + + return err; } static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, @@ -6673,6 +6722,40 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) return err; } +static struct sk_buff * +__cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, + int approxlen, u32 portid, u32 seq, + enum nl80211_commands cmd, + enum nl80211_attrs attr, gfp_t gfp) +{ + struct sk_buff *skb; + void *hdr; + struct nlattr *data; + + skb = nlmsg_new(approxlen + 100, gfp); + if (!skb) + return NULL; + + hdr = nl80211hdr_put(skb, portid, seq, 0, cmd); + if (!hdr) { + kfree_skb(skb); + return NULL; + } + + if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + data = nla_nest_start(skb, attr); + + ((void **)skb->cb)[0] = rdev; + ((void **)skb->cb)[1] = hdr; + ((void **)skb->cb)[2] = data; + + return skb; + + nla_put_failure: + kfree_skb(skb); + return NULL; +} #ifdef CONFIG_NL80211_TESTMODE static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) @@ -6697,11 +6780,11 @@ static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_TESTDATA]) return -EINVAL; - rdev->testmode_info = info; + rdev->cur_cmd_info = info; err = rdev_testmode_cmd(rdev, wdev, nla_data(info->attrs[NL80211_ATTR_TESTDATA]), nla_len(info->attrs[NL80211_ATTR_TESTDATA])); - rdev->testmode_info = NULL; + rdev->cur_cmd_info = NULL; return err; } @@ -6801,77 +6884,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb, return err; } -static struct sk_buff * -__cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 portid, u32 seq, gfp_t gfp) -{ - struct sk_buff *skb; - void *hdr; - struct nlattr *data; - - skb = nlmsg_new(approxlen + 100, gfp); - if (!skb) - return NULL; - - hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); - if (!hdr) { - kfree_skb(skb); - return NULL; - } - - if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) - goto nla_put_failure; - data = nla_nest_start(skb, NL80211_ATTR_TESTDATA); - - ((void **)skb->cb)[0] = rdev; - ((void **)skb->cb)[1] = hdr; - ((void **)skb->cb)[2] = data; - - return skb; - - nla_put_failure: - kfree_skb(skb); - return NULL; -} - -struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, - int approxlen) -{ - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - if (WARN_ON(!rdev->testmode_info)) - return NULL; - - return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_portid, - rdev->testmode_info->snd_seq, - GFP_KERNEL); -} -EXPORT_SYMBOL(cfg80211_testmode_alloc_reply_skb); - -int cfg80211_testmode_reply(struct sk_buff *skb) -{ - struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; - void *hdr = ((void **)skb->cb)[1]; - struct nlattr *data = ((void **)skb->cb)[2]; - - if (WARN_ON(!rdev->testmode_info)) { - kfree_skb(skb); - return -EINVAL; - } - - nla_nest_end(skb, data); - genlmsg_end(skb, hdr); - return genlmsg_reply(skb, rdev->testmode_info); -} -EXPORT_SYMBOL(cfg80211_testmode_reply); - struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, int approxlen, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - return __cfg80211_testmode_alloc_skb(rdev, approxlen, 0, 0, gfp); + return __cfg80211_alloc_vendor_skb(rdev, approxlen, 0, 0, + NL80211_CMD_TESTMODE, + NL80211_ATTR_TESTDATA, gfp); } EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); @@ -7308,11 +7328,72 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return true; } +static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) +{ + u16 mcs_mask = 0; + + switch (vht_mcs_map) { + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + break; + case IEEE80211_VHT_MCS_SUPPORT_0_7: + mcs_mask = 0x00FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_8: + mcs_mask = 0x01FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_9: + mcs_mask = 0x03FF; + break; + default: + break; + } + + return mcs_mask; +} + +static void vht_build_mcs_mask(u16 vht_mcs_map, + u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) +{ + u8 nss; + + for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { + vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); + vht_mcs_map >>= 2; + } +} + +static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, + struct nl80211_txrate_vht *txrate, + u16 mcs[NL80211_VHT_NSS_MAX]) +{ + u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; + u8 i; + + if (!sband->vht_cap.vht_supported) + return false; + + memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); + + /* Build vht_mcs_mask from VHT capabilities */ + vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) + mcs[i] = txrate->mcs[i]; + else + return false; + } + + return true; +} + static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_MCS] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, }; static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, @@ -7325,9 +7406,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct nlattr *tx_rates; struct ieee80211_supported_band *sband; - - if (info->attrs[NL80211_ATTR_TX_RATES] == NULL) - return -EINVAL; + u16 vht_tx_mcs_map; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; @@ -7336,17 +7415,26 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, /* Default to all rates enabled */ for (i = 0; i < IEEE80211_NUM_BANDS; i++) { sband = rdev->wiphy.bands[i]; - mask.control[i].legacy = - sband ? (1 << sband->n_bitrates) - 1 : 0; - if (sband) - memcpy(mask.control[i].mcs, - sband->ht_cap.mcs.rx_mask, - sizeof(mask.control[i].mcs)); - else - memset(mask.control[i].mcs, 0, - sizeof(mask.control[i].mcs)); + + if (!sband) + continue; + + mask.control[i].legacy = (1 << sband->n_bitrates) - 1; + memcpy(mask.control[i].ht_mcs, + sband->ht_cap.mcs.rx_mask, + sizeof(mask.control[i].ht_mcs)); + + if (!sband->vht_cap.vht_supported) + continue; + + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs); } + /* if no rates are given set it back to the defaults */ + if (!info->attrs[NL80211_ATTR_TX_RATES]) + goto out; + /* * The nested attribute uses enum nl80211_band as the index. This maps * directly to the enum ieee80211_band values used in cfg80211. @@ -7371,31 +7459,44 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, nla_len(tb[NL80211_TXRATE_LEGACY])) return -EINVAL; } - if (tb[NL80211_TXRATE_MCS]) { + if (tb[NL80211_TXRATE_HT]) { if (!ht_rateset_to_mask( sband, - nla_data(tb[NL80211_TXRATE_MCS]), - nla_len(tb[NL80211_TXRATE_MCS]), - mask.control[band].mcs)) + nla_data(tb[NL80211_TXRATE_HT]), + nla_len(tb[NL80211_TXRATE_HT]), + mask.control[band].ht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_VHT]) { + if (!vht_set_mcs_mask( + sband, + nla_data(tb[NL80211_TXRATE_VHT]), + mask.control[band].vht_mcs)) return -EINVAL; } if (mask.control[band].legacy == 0) { - /* don't allow empty legacy rates if HT - * is not even supported. */ - if (!rdev->wiphy.bands[band]->ht_cap.ht_supported) + /* don't allow empty legacy rates if HT or VHT + * are not even supported. + */ + if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || + rdev->wiphy.bands[band]->vht_cap.vht_supported)) return -EINVAL; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - if (mask.control[band].mcs[i]) - break; + if (mask.control[band].ht_mcs[i]) + goto out; + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) + if (mask.control[band].vht_mcs[i]) + goto out; /* legacy and mcs rates may not be both empty */ - if (i == IEEE80211_HT_MCS_MASK_LEN) - return -EINVAL; + return -EINVAL; } } +out: return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); } @@ -7443,10 +7544,10 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) void *hdr = NULL; u64 cookie; struct sk_buff *msg = NULL; - unsigned int wait = 0; - bool offchan, no_cck, dont_wait_for_ack; - - dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK]; + struct cfg80211_mgmt_tx_params params = { + .dont_wait_for_ack = + info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK], + }; if (!info->attrs[NL80211_ATTR_FRAME]) return -EINVAL; @@ -7473,24 +7574,24 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_DURATION]) { if (!(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; - wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + params.wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); /* * We should wait on the channel for at least a minimum amount * of time (10ms) but no longer than the driver supports. */ - if (wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || - wait > rdev->wiphy.max_remain_on_channel_duration) + if (params.wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || + params.wait > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; } - offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; + params.offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; - if (offchan && !(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) + if (params.offchan && !(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; - no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); + params.no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* get the channel if any has been specified, otherwise pass NULL to * the driver. The latter will use the current one @@ -7502,10 +7603,10 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return err; } - if (!chandef.chan && offchan) + if (!chandef.chan && params.offchan) return -EINVAL; - if (!dont_wait_for_ack) { + if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -7518,10 +7619,10 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - err = cfg80211_mlme_mgmt_tx(rdev, wdev, chandef.chan, offchan, wait, - nla_data(info->attrs[NL80211_ATTR_FRAME]), - nla_len(info->attrs[NL80211_ATTR_FRAME]), - no_cck, dont_wait_for_ack, &cookie); + params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + params.chan = chandef.chan; + err = cfg80211_mlme_mgmt_tx(rdev, wdev, ¶ms, &cookie); if (err) goto free_msg; @@ -8855,6 +8956,111 @@ static int nl80211_crit_protocol_stop(struct sk_buff *skb, return 0; } +static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = + __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); + int i, err; + u32 vid, subcmd; + + if (!rdev->wiphy.vendor_commands) + return -EOPNOTSUPP; + + if (IS_ERR(wdev)) { + err = PTR_ERR(wdev); + if (err != -EINVAL) + return err; + wdev = NULL; + } else if (wdev->wiphy != &rdev->wiphy) { + return -EINVAL; + } + + if (!info->attrs[NL80211_ATTR_VENDOR_ID] || + !info->attrs[NL80211_ATTR_VENDOR_SUBCMD]) + return -EINVAL; + + vid = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_ID]); + subcmd = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_SUBCMD]); + for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { + const struct wiphy_vendor_command *vcmd; + void *data = NULL; + int len = 0; + + vcmd = &rdev->wiphy.vendor_commands[i]; + + if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) + continue; + + if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | + WIPHY_VENDOR_CMD_NEED_NETDEV)) { + if (!wdev) + return -EINVAL; + if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && + !wdev->netdev) + return -EINVAL; + + if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { + if (wdev->netdev && + !netif_running(wdev->netdev)) + return -ENETDOWN; + if (!wdev->netdev && !wdev->p2p_started) + return -ENETDOWN; + } + } else { + wdev = NULL; + } + + if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { + data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); + len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); + } + + rdev->cur_cmd_info = info; + err = rdev->wiphy.vendor_commands[i].doit(&rdev->wiphy, wdev, + data, len); + rdev->cur_cmd_info = NULL; + return err; + } + + return -EOPNOTSUPP; +} + +struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + int approxlen) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + if (WARN_ON(!rdev->cur_cmd_info)) + return NULL; + + return __cfg80211_alloc_vendor_skb(rdev, approxlen, + rdev->cur_cmd_info->snd_portid, + rdev->cur_cmd_info->snd_seq, + cmd, attr, GFP_KERNEL); +} +EXPORT_SYMBOL(__cfg80211_alloc_reply_skb); + +int cfg80211_vendor_cmd_reply(struct sk_buff *skb) +{ + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; + void *hdr = ((void **)skb->cb)[1]; + struct nlattr *data = ((void **)skb->cb)[2]; + + if (WARN_ON(!rdev->cur_cmd_info)) { + kfree_skb(skb); + return -EINVAL; + } + + nla_nest_end(skb, data); + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, rdev->cur_cmd_info); +} +EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply); + + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -9579,6 +9785,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_VENDOR, + .doit = nl80211_vendor_cmd, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, + }, }; /* notification functions */ @@ -9633,8 +9847,9 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; - if (req->flags) - nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags); + if (req->flags && + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) + goto nla_put_failure; return 0; nla_put_failure: @@ -10805,21 +11020,18 @@ void cfg80211_ch_switch_notify(struct net_device *dev, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_ch_switch_notify(dev, chandef); + ASSERT_WDEV_LOCK(wdev); - wdev_lock(wdev); + trace_cfg80211_ch_switch_notify(dev, chandef); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO && wdev->iftype != NL80211_IFTYPE_ADHOC && wdev->iftype != NL80211_IFTYPE_MESH_POINT)) - goto out; + return; wdev->channel = chandef->chan; nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); -out: - wdev_unlock(wdev); - return; } EXPORT_SYMBOL(cfg80211_ch_switch_notify); @@ -10878,7 +11090,7 @@ EXPORT_SYMBOL(cfg80211_cqm_txe_notify); void nl80211_radar_notify(struct cfg80211_registered_device *rdev, - struct cfg80211_chan_def *chandef, + const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, struct net_device *netdev, gfp_t gfp) { @@ -11093,6 +11305,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct nlattr *reasons; reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + if (!reasons) + goto free_msg; if (wakeup->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) @@ -11118,16 +11332,18 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, wakeup->pattern_idx)) goto free_msg; - if (wakeup->tcp_match) - nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + if (wakeup->tcp_match && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH)) + goto free_msg; - if (wakeup->tcp_connlost) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + if (wakeup->tcp_connlost && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST)) + goto free_msg; - if (wakeup->tcp_nomoretokens) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + if (wakeup->tcp_nomoretokens && + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS)) + goto free_msg; if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; @@ -11263,24 +11479,29 @@ void cfg80211_ft_event(struct net_device *netdev, return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); - if (!hdr) { - nlmsg_free(msg); - return; - } + if (!hdr) + goto out; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap)) + goto out; - nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); - if (ft_event->ies) - nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); - if (ft_event->ric_ies) - nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, - ft_event->ric_ies); + if (ft_event->ies && + nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies)) + goto out; + if (ft_event->ric_ies && + nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, + ft_event->ric_ies)) + goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); + return; + out: + nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_ft_event); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2c0f2b3..b1b2313 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -70,7 +70,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, void nl80211_radar_notify(struct cfg80211_registered_device *rdev, - struct cfg80211_chan_def *chandef, + const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, struct net_device *netdev, gfp_t gfp); diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index a271c27..722da61 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -124,6 +124,10 @@ int ieee80211_radiotap_iterator_init( /* find payload start allowing for extended bitmap(s) */ if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) { + if ((unsigned long)iterator->_arg - + (unsigned long)iterator->_rtheader + sizeof(uint32_t) > + (unsigned long)iterator->_max_length) + return -EINVAL; while (get_unaligned_le32(iterator->_arg) & (1 << IEEE80211_RADIOTAP_EXT)) { iterator->_arg += sizeof(uint32_t); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 37ce9fd..a6c03ab 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -624,16 +624,12 @@ rdev_cancel_remain_on_channel(struct cfg80211_registered_device *rdev, static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie) + struct cfg80211_mgmt_tx_params *params, + u64 *cookie) { int ret; - trace_rdev_mgmt_tx(&rdev->wiphy, wdev, chan, offchan, - wait, no_cck, dont_wait_for_ack); - ret = rdev->ops->mgmt_tx(&rdev->wiphy, wdev, chan, offchan, - wait, buf, len, no_cck, - dont_wait_for_ack, cookie); + trace_rdev_mgmt_tx(&rdev->wiphy, wdev, params); + ret = rdev->ops->mgmt_tx(&rdev->wiphy, wdev, params, cookie); trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie); return ret; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7da67fd..7d20d84 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -120,6 +120,48 @@ static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) return rtnl_dereference(wiphy->regd); } +static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region) +{ + switch (dfs_region) { + case NL80211_DFS_UNSET: + return "unset"; + case NL80211_DFS_FCC: + return "FCC"; + case NL80211_DFS_ETSI: + return "ETSI"; + case NL80211_DFS_JP: + return "JP"; + } + return "Unknown"; +} + +enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *regd = NULL; + const struct ieee80211_regdomain *wiphy_regd = NULL; + + regd = get_cfg80211_regdom(); + if (!wiphy) + goto out; + + wiphy_regd = get_wiphy_regdom(wiphy); + if (!wiphy_regd) + goto out; + + if (wiphy_regd->dfs_region == regd->dfs_region) + goto out; + + REG_DBG_PRINT("%s: device specific dfs_region " + "(%s) disagrees with cfg80211's " + "central dfs_region (%s)\n", + dev_name(&wiphy->dev), + reg_dfs_region_str(wiphy_regd->dfs_region), + reg_dfs_region_str(regd->dfs_region)); + +out: + return regd->dfs_region; +} + static void rcu_free_regdom(const struct ieee80211_regdomain *r) { if (!r) @@ -163,35 +205,29 @@ static const struct ieee80211_regdomain world_regdom = { REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), /* IEEE 802.11b/g, channels 12..13. */ REG_RULE(2467-10, 2472+10, 40, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), + NL80211_RRF_NO_IR), /* IEEE 802.11 channel 14 - Only JP enables * this and for 802.11b only */ REG_RULE(2484-10, 2484+10, 20, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS | + NL80211_RRF_NO_IR | NL80211_RRF_NO_OFDM), /* IEEE 802.11a, channel 36..48 */ REG_RULE(5180-10, 5240+10, 160, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), + NL80211_RRF_NO_IR), /* IEEE 802.11a, channel 52..64 - DFS required */ REG_RULE(5260-10, 5320+10, 160, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS | + NL80211_RRF_NO_IR | NL80211_RRF_DFS), /* IEEE 802.11a, channel 100..144 - DFS required */ REG_RULE(5500-10, 5720+10, 160, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS | + NL80211_RRF_NO_IR | NL80211_RRF_DFS), /* IEEE 802.11a, channel 149..165 */ REG_RULE(5745-10, 5825+10, 80, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), + NL80211_RRF_NO_IR), /* IEEE 802.11ad (60gHz), channels 1..3 */ REG_RULE(56160+2160*1-1080, 56160+2160*3+1080, 2160, 0, 0, 0), @@ -208,11 +244,26 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); +static void reg_kfree_last_request(void) +{ + struct regulatory_request *lr; + + lr = get_last_request(); + + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); +} + +static void reg_update_last_request(struct regulatory_request *request) +{ + reg_kfree_last_request(); + rcu_assign_pointer(last_request, request); +} + static void reset_regdomains(bool full_reset, const struct ieee80211_regdomain *new_regdom) { const struct ieee80211_regdomain *r; - struct regulatory_request *lr; ASSERT_RTNL(); @@ -235,10 +286,7 @@ static void reset_regdomains(bool full_reset, if (!full_reset) return; - lr = get_last_request(); - if (lr != &core_request_world && lr) - kfree_rcu(lr, rcu_head); - rcu_assign_pointer(last_request, &core_request_world); + reg_update_last_request(&core_request_world); } /* @@ -456,7 +504,15 @@ static int call_crda(const char *alpha2) return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } -static bool reg_is_valid_request(const char *alpha2) +static enum reg_request_treatment +reg_call_crda(struct regulatory_request *request) +{ + if (call_crda(request->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_OK; +} + +bool reg_is_valid_request(const char *alpha2) { struct regulatory_request *lr = get_last_request(); @@ -557,6 +613,20 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, } /* + * Later on we can perhaps use the more restrictive DFS + * region but we don't have information for that yet so + * for now simply disallow conflicts. + */ +static enum nl80211_dfs_regions +reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1, + const enum nl80211_dfs_regions dfs_region2) +{ + if (dfs_region1 != dfs_region2) + return NL80211_DFS_UNSET; + return dfs_region1; +} + +/* * Helper for regdom_intersect(), this does the real * mathematical intersection fun */ @@ -687,6 +757,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1, rd->n_reg_rules = num_rules; rd->alpha2[0] = '9'; rd->alpha2[1] = '8'; + rd->dfs_region = reg_intersect_dfs_region(rd1->dfs_region, + rd2->dfs_region); return rd; } @@ -698,10 +770,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1, static u32 map_regdom_flags(u32 rd_flags) { u32 channel_flags = 0; - if (rd_flags & NL80211_RRF_PASSIVE_SCAN) - channel_flags |= IEEE80211_CHAN_PASSIVE_SCAN; - if (rd_flags & NL80211_RRF_NO_IBSS) - channel_flags |= IEEE80211_CHAN_NO_IBSS; + if (rd_flags & NL80211_RRF_NO_IR_ALL) + channel_flags |= IEEE80211_CHAN_NO_IR; if (rd_flags & NL80211_RRF_DFS) channel_flags |= IEEE80211_CHAN_RADAR; if (rd_flags & NL80211_RRF_NO_OFDM) @@ -854,8 +924,18 @@ static void handle_channel(struct wiphy *wiphy, PTR_ERR(reg_rule) == -ERANGE) return; - REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); - chan->flags |= IEEE80211_CHAN_DISABLED; + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && + request_wiphy && request_wiphy == wiphy && + request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) { + REG_DBG_PRINT("Disabling freq %d MHz for good\n", + chan->center_freq); + chan->orig_flags |= IEEE80211_CHAN_DISABLED; + chan->flags = chan->orig_flags; + } else { + REG_DBG_PRINT("Disabling freq %d MHz\n", + chan->center_freq); + chan->flags |= IEEE80211_CHAN_DISABLED; + } return; } @@ -873,7 +953,7 @@ static void handle_channel(struct wiphy *wiphy, if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && - request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { + request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) { /* * This guarantees the driver's requested regulatory domain * will always be used as a base for further regulatory @@ -899,13 +979,11 @@ static void handle_channel(struct wiphy *wiphy, chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); if (chan->orig_mpwr) { /* - * Devices that have their own custom regulatory domain - * but also use WIPHY_FLAG_STRICT_REGULATORY will follow the - * passed country IE power settings. + * Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER + * will always follow the passed country IE power settings. */ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && - wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) + wiphy->regulatory_flags & REGULATORY_COUNTRY_IE_FOLLOW_POWER) chan->max_power = chan->max_reg_power; else chan->max_power = min(chan->orig_mpwr, @@ -975,8 +1053,8 @@ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) static bool wiphy_strict_alpha2_regd(struct wiphy *wiphy) { - if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && - !(wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY)) + if (wiphy->regulatory_flags & REGULATORY_STRICT_REG && + !(wiphy->regulatory_flags & REGULATORY_CUSTOM_REG)) return true; return false; } @@ -994,7 +1072,7 @@ static bool ignore_reg_update(struct wiphy *wiphy, } if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { + wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { REG_DBG_PRINT("Ignoring regulatory request set by %s " "since the driver uses its own custom " "regulatory domain\n", @@ -1032,7 +1110,7 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) return true; if (lr && lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) + wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) return true; return false; @@ -1060,19 +1138,14 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, if (!reg_is_world_roaming(wiphy)) return; - if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS) + if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS) return; chan_before.center_freq = chan->center_freq; chan_before.flags = chan->flags; - if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { - chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; - channel_changed = true; - } - - if (chan->flags & IEEE80211_CHAN_NO_IBSS) { - chan->flags &= ~IEEE80211_CHAN_NO_IBSS; + if (chan->flags & IEEE80211_CHAN_NO_IR) { + chan->flags &= ~IEEE80211_CHAN_NO_IR; channel_changed = true; } @@ -1205,14 +1278,30 @@ static void reg_process_ht_flags(struct wiphy *wiphy) reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } +static void reg_call_notifier(struct wiphy *wiphy, + struct regulatory_request *request) +{ + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, request); +} + static void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { enum ieee80211_band band; struct regulatory_request *lr = get_last_request(); - if (ignore_reg_update(wiphy, initiator)) + if (ignore_reg_update(wiphy, initiator)) { + /* + * Regulatory updates set by CORE are ignored for custom + * regulatory cards. Let us notify the changes to the driver, + * as some drivers used this to restore its orig_* reg domain. + */ + if (initiator == NL80211_REGDOM_SET_BY_CORE && + wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) + reg_call_notifier(wiphy, lr); return; + } lr->dfs_region = get_cfg80211_regdom()->dfs_region; @@ -1221,9 +1310,7 @@ static void wiphy_update_regulatory(struct wiphy *wiphy, reg_process_beacons(wiphy); reg_process_ht_flags(wiphy); - - if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, lr); + reg_call_notifier(wiphy, lr); } static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) @@ -1236,15 +1323,6 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; wiphy_update_regulatory(wiphy, initiator); - /* - * Regulatory updates set by CORE are ignored for custom - * regulatory cards. Let us notify the changes to the driver, - * as some drivers used this to restore its orig_* reg domain. - */ - if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && - wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, get_last_request()); } } @@ -1263,7 +1341,8 @@ static void handle_channel_custom(struct wiphy *wiphy, if (IS_ERR(reg_rule)) { REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); - chan->flags = IEEE80211_CHAN_DISABLED; + chan->orig_flags |= IEEE80211_CHAN_DISABLED; + chan->flags = chan->orig_flags; return; } @@ -1305,6 +1384,10 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, enum ieee80211_band band; unsigned int bands_set = 0; + WARN(!(wiphy->regulatory_flags & REGULATORY_CUSTOM_REG), + "wiphy should have REGULATORY_CUSTOM_REG\n"); + wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!wiphy->bands[band]) continue; @@ -1320,225 +1403,285 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); -/* This has the logic which determines when a new request - * should be ignored. */ -static enum reg_request_treatment -get_reg_request_treatment(struct wiphy *wiphy, - struct regulatory_request *pending_request) +static void reg_set_request_processed(void) { - struct wiphy *last_wiphy = NULL; + bool need_more_processing = false; struct regulatory_request *lr = get_last_request(); - /* All initial requests are respected */ - if (!lr) - return REG_REQ_OK; + lr->processed = true; - switch (pending_request->initiator) { - case NL80211_REGDOM_SET_BY_CORE: - return REG_REQ_OK; - case NL80211_REGDOM_SET_BY_COUNTRY_IE: - if (reg_request_cell_base(lr)) { - /* Trust a Cell base station over the AP's country IE */ - if (regdom_changes(pending_request->alpha2)) - return REG_REQ_IGNORE; - return REG_REQ_ALREADY_SET; - } + spin_lock(®_requests_lock); + if (!list_empty(®_requests_list)) + need_more_processing = true; + spin_unlock(®_requests_lock); - last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) + cancel_delayed_work(®_timeout); - if (unlikely(!is_an_alpha2(pending_request->alpha2))) - return -EINVAL; - if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { - if (last_wiphy != wiphy) { - /* - * Two cards with two APs claiming different - * Country IE alpha2s. We could - * intersect them, but that seems unlikely - * to be correct. Reject second one for now. - */ - if (regdom_changes(pending_request->alpha2)) - return REG_REQ_IGNORE; - return REG_REQ_ALREADY_SET; - } - /* - * Two consecutive Country IE hints on the same wiphy. - * This should be picked up early by the driver/stack - */ - if (WARN_ON(regdom_changes(pending_request->alpha2))) - return REG_REQ_OK; - return REG_REQ_ALREADY_SET; - } - return REG_REQ_OK; - case NL80211_REGDOM_SET_BY_DRIVER: - if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { - if (regdom_changes(pending_request->alpha2)) - return REG_REQ_OK; - return REG_REQ_ALREADY_SET; - } + if (need_more_processing) + schedule_work(®_work); +} - /* - * This would happen if you unplug and plug your card - * back in or if you add a new device for which the previously - * loaded card also agrees on the regulatory domain. - */ - if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && - !regdom_changes(pending_request->alpha2)) - return REG_REQ_ALREADY_SET; +/** + * reg_process_hint_core - process core regulatory requests + * @pending_request: a pending core regulatory request + * + * The wireless subsystem can use this function to process + * a regulatory request issued by the regulatory core. + * + * Returns one of the different reg request treatment values. + */ +static enum reg_request_treatment +reg_process_hint_core(struct regulatory_request *core_request) +{ + + core_request->intersect = false; + core_request->processed = false; + + reg_update_last_request(core_request); + + return reg_call_crda(core_request); +} + +static enum reg_request_treatment +__reg_process_hint_user(struct regulatory_request *user_request) +{ + struct regulatory_request *lr = get_last_request(); + + if (reg_request_cell_base(user_request)) + return reg_ignore_cell_hint(user_request); + if (reg_request_cell_base(lr)) + return REG_REQ_IGNORE; + + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) return REG_REQ_INTERSECT; - case NL80211_REGDOM_SET_BY_USER: - if (reg_request_cell_base(pending_request)) - return reg_ignore_cell_hint(pending_request); + /* + * If the user knows better the user should set the regdom + * to their country before the IE is picked up + */ + if (lr->initiator == NL80211_REGDOM_SET_BY_USER && + lr->intersect) + return REG_REQ_IGNORE; + /* + * Process user requests only after previous user/driver/core + * requests have been processed + */ + if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE || + lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_USER) && + regdom_changes(lr->alpha2)) + return REG_REQ_IGNORE; - if (reg_request_cell_base(lr)) - return REG_REQ_IGNORE; + if (!regdom_changes(user_request->alpha2)) + return REG_REQ_ALREADY_SET; - if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) - return REG_REQ_INTERSECT; - /* - * If the user knows better the user should set the regdom - * to their country before the IE is picked up - */ - if (lr->initiator == NL80211_REGDOM_SET_BY_USER && - lr->intersect) - return REG_REQ_IGNORE; - /* - * Process user requests only after previous user/driver/core - * requests have been processed - */ - if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE || - lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || - lr->initiator == NL80211_REGDOM_SET_BY_USER) && - regdom_changes(lr->alpha2)) - return REG_REQ_IGNORE; + return REG_REQ_OK; +} - if (!regdom_changes(pending_request->alpha2)) - return REG_REQ_ALREADY_SET; +/** + * reg_process_hint_user - process user regulatory requests + * @user_request: a pending user regulatory request + * + * The wireless subsystem can use this function to process + * a regulatory request initiated by userspace. + * + * Returns one of the different reg request treatment values. + */ +static enum reg_request_treatment +reg_process_hint_user(struct regulatory_request *user_request) +{ + enum reg_request_treatment treatment; - return REG_REQ_OK; + treatment = __reg_process_hint_user(user_request); + if (treatment == REG_REQ_IGNORE || + treatment == REG_REQ_ALREADY_SET) { + kfree(user_request); + return treatment; } - return REG_REQ_IGNORE; + user_request->intersect = treatment == REG_REQ_INTERSECT; + user_request->processed = false; + + reg_update_last_request(user_request); + + user_alpha2[0] = user_request->alpha2[0]; + user_alpha2[1] = user_request->alpha2[1]; + + return reg_call_crda(user_request); } -static void reg_set_request_processed(void) +static enum reg_request_treatment +__reg_process_hint_driver(struct regulatory_request *driver_request) { - bool need_more_processing = false; struct regulatory_request *lr = get_last_request(); - lr->processed = true; - - spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) - need_more_processing = true; - spin_unlock(®_requests_lock); + if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { + if (regdom_changes(driver_request->alpha2)) + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; + } - if (lr->initiator == NL80211_REGDOM_SET_BY_USER) - cancel_delayed_work(®_timeout); + /* + * This would happen if you unplug and plug your card + * back in or if you add a new device for which the previously + * loaded card also agrees on the regulatory domain. + */ + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && + !regdom_changes(driver_request->alpha2)) + return REG_REQ_ALREADY_SET; - if (need_more_processing) - schedule_work(®_work); + return REG_REQ_INTERSECT; } /** - * __regulatory_hint - hint to the wireless core a regulatory domain - * @wiphy: if the hint comes from country information from an AP, this - * is required to be set to the wiphy that received the information - * @pending_request: the regulatory request currently being processed + * reg_process_hint_driver - process driver regulatory requests + * @driver_request: a pending driver regulatory request * - * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain. + * The wireless subsystem can use this function to process + * a regulatory request issued by an 802.11 driver. * * Returns one of the different reg request treatment values. */ static enum reg_request_treatment -__regulatory_hint(struct wiphy *wiphy, - struct regulatory_request *pending_request) +reg_process_hint_driver(struct wiphy *wiphy, + struct regulatory_request *driver_request) { const struct ieee80211_regdomain *regd; - bool intersect = false; enum reg_request_treatment treatment; - struct regulatory_request *lr; - treatment = get_reg_request_treatment(wiphy, pending_request); + treatment = __reg_process_hint_driver(driver_request); switch (treatment) { - case REG_REQ_INTERSECT: - if (pending_request->initiator == - NL80211_REGDOM_SET_BY_DRIVER) { - regd = reg_copy_regd(get_cfg80211_regdom()); - if (IS_ERR(regd)) { - kfree(pending_request); - return PTR_ERR(regd); - } - rcu_assign_pointer(wiphy->regd, regd); - } - intersect = true; - break; case REG_REQ_OK: break; - default: - /* - * If the regulatory domain being requested by the - * driver has already been set just copy it to the - * wiphy - */ - if (treatment == REG_REQ_ALREADY_SET && - pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { - regd = reg_copy_regd(get_cfg80211_regdom()); - if (IS_ERR(regd)) { - kfree(pending_request); - return REG_REQ_IGNORE; - } - treatment = REG_REQ_ALREADY_SET; - rcu_assign_pointer(wiphy->regd, regd); - goto new_request; - } - kfree(pending_request); + case REG_REQ_IGNORE: + kfree(driver_request); return treatment; + case REG_REQ_INTERSECT: + /* fall through */ + case REG_REQ_ALREADY_SET: + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { + kfree(driver_request); + return REG_REQ_IGNORE; + } + rcu_assign_pointer(wiphy->regd, regd); } -new_request: - lr = get_last_request(); - if (lr != &core_request_world && lr) - kfree_rcu(lr, rcu_head); - pending_request->intersect = intersect; - pending_request->processed = false; - rcu_assign_pointer(last_request, pending_request); - lr = pending_request; + driver_request->intersect = treatment == REG_REQ_INTERSECT; + driver_request->processed = false; + + reg_update_last_request(driver_request); + + /* + * Since CRDA will not be called in this case as we already + * have applied the requested regulatory domain before we just + * inform userspace we have processed the request + */ + if (treatment == REG_REQ_ALREADY_SET) { + nl80211_send_reg_change_event(driver_request); + reg_set_request_processed(); + return treatment; + } + + return reg_call_crda(driver_request); +} - pending_request = NULL; +static enum reg_request_treatment +__reg_process_hint_country_ie(struct wiphy *wiphy, + struct regulatory_request *country_ie_request) +{ + struct wiphy *last_wiphy = NULL; + struct regulatory_request *lr = get_last_request(); - if (lr->initiator == NL80211_REGDOM_SET_BY_USER) { - user_alpha2[0] = lr->alpha2[0]; - user_alpha2[1] = lr->alpha2[1]; + if (reg_request_cell_base(lr)) { + /* Trust a Cell base station over the AP's country IE */ + if (regdom_changes(country_ie_request->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; + } else { + if (wiphy->regulatory_flags & REGULATORY_COUNTRY_IE_IGNORE) + return REG_REQ_IGNORE; } - /* When r == REG_REQ_INTERSECT we do need to call CRDA */ - if (treatment != REG_REQ_OK && treatment != REG_REQ_INTERSECT) { + if (unlikely(!is_an_alpha2(country_ie_request->alpha2))) + return -EINVAL; + + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) + return REG_REQ_OK; + + last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); + + if (last_wiphy != wiphy) { /* - * Since CRDA will not be called in this case as we already - * have applied the requested regulatory domain before we just - * inform userspace we have processed the request + * Two cards with two APs claiming different + * Country IE alpha2s. We could + * intersect them, but that seems unlikely + * to be correct. Reject second one for now. */ - if (treatment == REG_REQ_ALREADY_SET) { - nl80211_send_reg_change_event(lr); - reg_set_request_processed(); - } - return treatment; + if (regdom_changes(country_ie_request->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } + /* + * Two consecutive Country IE hints on the same wiphy. + * This should be picked up early by the driver/stack + */ + if (WARN_ON(regdom_changes(country_ie_request->alpha2))) + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; +} + +/** + * reg_process_hint_country_ie - process regulatory requests from country IEs + * @country_ie_request: a regulatory request from a country IE + * + * The wireless subsystem can use this function to process + * a regulatory request issued by a country Information Element. + * + * Returns one of the different reg request treatment values. + */ +static enum reg_request_treatment +reg_process_hint_country_ie(struct wiphy *wiphy, + struct regulatory_request *country_ie_request) +{ + enum reg_request_treatment treatment; + + treatment = __reg_process_hint_country_ie(wiphy, country_ie_request); - if (call_crda(lr->alpha2)) + switch (treatment) { + case REG_REQ_OK: + break; + case REG_REQ_IGNORE: + /* fall through */ + case REG_REQ_ALREADY_SET: + kfree(country_ie_request); + return treatment; + case REG_REQ_INTERSECT: + kfree(country_ie_request); + /* + * This doesn't happen yet, not sure we + * ever want to support it for this case. + */ + WARN_ONCE(1, "Unexpected intersection for country IEs"); return REG_REQ_IGNORE; - return REG_REQ_OK; + } + + country_ie_request->intersect = false; + country_ie_request->processed = false; + + reg_update_last_request(country_ie_request); + + return reg_call_crda(country_ie_request); } /* This processes *all* regulatory hints */ -static void reg_process_hint(struct regulatory_request *reg_request, - enum nl80211_reg_initiator reg_initiator) +static void reg_process_hint(struct regulatory_request *reg_request) { struct wiphy *wiphy = NULL; + enum reg_request_treatment treatment; if (WARN_ON(!reg_request->alpha2)) return; @@ -1546,23 +1689,37 @@ static void reg_process_hint(struct regulatory_request *reg_request, if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { + if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); return; } - switch (__regulatory_hint(wiphy, reg_request)) { - case REG_REQ_ALREADY_SET: - /* This is required so that the orig_* parameters are saved */ - if (wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) - wiphy_update_regulatory(wiphy, reg_initiator); + switch (reg_request->initiator) { + case NL80211_REGDOM_SET_BY_CORE: + reg_process_hint_core(reg_request); + return; + case NL80211_REGDOM_SET_BY_USER: + treatment = reg_process_hint_user(reg_request); + if (treatment == REG_REQ_OK || + treatment == REG_REQ_ALREADY_SET) + return; + schedule_delayed_work(®_timeout, msecs_to_jiffies(3142)); + return; + case NL80211_REGDOM_SET_BY_DRIVER: + treatment = reg_process_hint_driver(wiphy, reg_request); break; - default: - if (reg_initiator == NL80211_REGDOM_SET_BY_USER) - schedule_delayed_work(®_timeout, - msecs_to_jiffies(3142)); + case NL80211_REGDOM_SET_BY_COUNTRY_IE: + treatment = reg_process_hint_country_ie(wiphy, reg_request); break; + default: + WARN(1, "invalid initiator %d\n", reg_request->initiator); + return; } + + /* This is required so that the orig_* parameters are saved */ + if (treatment == REG_REQ_ALREADY_SET && wiphy && + wiphy->regulatory_flags & REGULATORY_STRICT_REG) + wiphy_update_regulatory(wiphy, reg_request->initiator); } /* @@ -1596,7 +1753,7 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); - reg_process_hint(reg_request, reg_request->initiator); + reg_process_hint(reg_request); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1888,7 +2045,7 @@ static void restore_regulatory_settings(bool reset_user) world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - if (rdev->wiphy.flags & WIPHY_FLAG_CUSTOM_REGULATORY) + if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG) restore_custom_reg_settings(&rdev->wiphy); } @@ -2016,7 +2173,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) } } -bool reg_supported_dfs_region(u8 dfs_region) +bool reg_supported_dfs_region(enum nl80211_dfs_regions dfs_region) { switch (dfs_region) { case NL80211_DFS_UNSET: @@ -2031,27 +2188,6 @@ bool reg_supported_dfs_region(u8 dfs_region) } } -static void print_dfs_region(u8 dfs_region) -{ - if (!dfs_region) - return; - - switch (dfs_region) { - case NL80211_DFS_FCC: - pr_info(" DFS Master region FCC"); - break; - case NL80211_DFS_ETSI: - pr_info(" DFS Master region ETSI"); - break; - case NL80211_DFS_JP: - pr_info(" DFS Master region JP"); - break; - default: - pr_info(" DFS Master region Unknown"); - break; - } -} - static void print_regdomain(const struct ieee80211_regdomain *rd) { struct regulatory_request *lr = get_last_request(); @@ -2083,7 +2219,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) } } - print_dfs_region(rd->dfs_region); + pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region)); print_rd_rules(rd); } @@ -2093,48 +2229,60 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } -/* Takes ownership of rd only if it doesn't fail */ -static int __set_regdom(const struct ieee80211_regdomain *rd) +static int reg_set_rd_core(const struct ieee80211_regdomain *rd) +{ + if (!is_world_regdom(rd->alpha2)) + return -EINVAL; + update_world_regdomain(rd); + return 0; +} + +static int reg_set_rd_user(const struct ieee80211_regdomain *rd, + struct regulatory_request *user_request) { - const struct ieee80211_regdomain *regd; const struct ieee80211_regdomain *intersected_rd = NULL; - struct wiphy *request_wiphy; - struct regulatory_request *lr = get_last_request(); - /* Some basic sanity checks first */ + if (is_world_regdom(rd->alpha2)) + return -EINVAL; + + if (!regdom_changes(rd->alpha2)) + return -EALREADY; - if (!reg_is_valid_request(rd->alpha2)) + if (!is_valid_rd(rd)) { + pr_err("Invalid regulatory domain detected:\n"); + print_regdomain_info(rd); return -EINVAL; + } - if (is_world_regdom(rd->alpha2)) { - update_world_regdomain(rd); + if (!user_request->intersect) { + reset_regdomains(false, rd); return 0; } - if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && - !is_unknown_alpha2(rd->alpha2)) + intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); + if (!intersected_rd) return -EINVAL; - /* - * Lets only bother proceeding on the same alpha2 if the current - * rd is non static (it means CRDA was present and was used last) - * and the pending request came in from a country IE - */ - if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { - /* - * If someone else asked us to change the rd lets only bother - * checking if the alpha2 changes if CRDA was already called - */ - if (!regdom_changes(rd->alpha2)) - return -EALREADY; - } + kfree(rd); + rd = NULL; + reset_regdomains(false, intersected_rd); - /* - * Now lets set the regulatory domain, update all driver channels - * and finally inform them of what we have done, in case they want - * to review or adjust their own settings based on their own - * internal EEPROM data - */ + return 0; +} + +static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, + struct regulatory_request *driver_request) +{ + const struct ieee80211_regdomain *regd; + const struct ieee80211_regdomain *intersected_rd = NULL; + const struct ieee80211_regdomain *tmp; + struct wiphy *request_wiphy; + + if (is_world_regdom(rd->alpha2)) + return -EINVAL; + + if (!regdom_changes(rd->alpha2)) + return -EALREADY; if (!is_valid_rd(rd)) { pr_err("Invalid regulatory domain detected:\n"); @@ -2142,29 +2290,13 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; } - request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); - if (!request_wiphy && - (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || - lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx); + if (!request_wiphy) { schedule_delayed_work(®_timeout, 0); return -ENODEV; } - if (!lr->intersect) { - if (lr->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(false, rd); - return 0; - } - - /* - * For a driver hint, lets copy the regulatory domain the - * driver wanted to the wiphy to deal with conflicts - */ - - /* - * Userspace could have sent two replies with only - * one kernel request. - */ + if (!driver_request->intersect) { if (request_wiphy->regd) return -EALREADY; @@ -2177,38 +2309,59 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - /* Intersection requires a bit more work */ + intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); + if (!intersected_rd) + return -EINVAL; - if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { - intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); - if (!intersected_rd) - return -EINVAL; + /* + * We can trash what CRDA provided now. + * However if a driver requested this specific regulatory + * domain we keep it for its private use + */ + tmp = get_wiphy_regdom(request_wiphy); + rcu_assign_pointer(request_wiphy->regd, rd); + rcu_free_regdom(tmp); - /* - * We can trash what CRDA provided now. - * However if a driver requested this specific regulatory - * domain we keep it for its private use - */ - if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER) { - const struct ieee80211_regdomain *tmp; + rd = NULL; - tmp = get_wiphy_regdom(request_wiphy); - rcu_assign_pointer(request_wiphy->regd, rd); - rcu_free_regdom(tmp); - } else { - kfree(rd); - } + reset_regdomains(false, intersected_rd); + + return 0; +} - rd = NULL; +static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd, + struct regulatory_request *country_ie_request) +{ + struct wiphy *request_wiphy; - reset_regdomains(false, intersected_rd); + if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && + !is_unknown_alpha2(rd->alpha2)) + return -EINVAL; - return 0; + /* + * Lets only bother proceeding on the same alpha2 if the current + * rd is non static (it means CRDA was present and was used last) + * and the pending request came in from a country IE + */ + + if (!is_valid_rd(rd)) { + pr_err("Invalid regulatory domain detected:\n"); + print_regdomain_info(rd); + return -EINVAL; } - return -EINVAL; -} + request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx); + if (!request_wiphy) { + schedule_delayed_work(®_timeout, 0); + return -ENODEV; + } + + if (country_ie_request->intersect) + return -EINVAL; + reset_regdomains(false, rd); + return 0; +} /* * Use this call to set the current regulatory domain. Conflicts with @@ -2220,10 +2373,32 @@ int set_regdom(const struct ieee80211_regdomain *rd) struct regulatory_request *lr; int r; + if (!reg_is_valid_request(rd->alpha2)) { + kfree(rd); + return -EINVAL; + } + lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ - r = __set_regdom(rd); + switch (lr->initiator) { + case NL80211_REGDOM_SET_BY_CORE: + r = reg_set_rd_core(rd); + break; + case NL80211_REGDOM_SET_BY_USER: + r = reg_set_rd_user(rd, lr); + break; + case NL80211_REGDOM_SET_BY_DRIVER: + r = reg_set_rd_driver(rd, lr); + break; + case NL80211_REGDOM_SET_BY_COUNTRY_IE: + r = reg_set_rd_country_ie(rd, lr); + break; + default: + WARN(1, "invalid initiator %d\n", lr->initiator); + return -EINVAL; + } + if (r) { if (r == -EALREADY) reg_set_request_processed(); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 9677e3c..02bd8f4 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -18,8 +18,10 @@ extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; +bool reg_is_valid_request(const char *alpha2); bool is_world_regdom(const char *alpha2); -bool reg_supported_dfs_region(u8 dfs_region); +bool reg_supported_dfs_region(enum nl80211_dfs_regions dfs_region); +enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d4397eb..a32d52a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -161,7 +161,7 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, dev->bss_generation++; } -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev) { struct cfg80211_scan_request *request; struct wireless_dev *wdev; @@ -210,17 +210,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) dev_put(wdev->netdev); rdev->scan_req = NULL; - - /* - * OK. If this is invoked with "leak" then we can't - * free this ... but we've cleaned it up anyway. The - * driver failed to call the scan_done callback, so - * all bets are off, it might still be trying to use - * the scan request or not ... if it accesses the dev - * in there (it shouldn't anyway) then it may crash. - */ - if (!leak) - kfree(request); + kfree(request); } void __cfg80211_scan_done(struct work_struct *wk) @@ -231,7 +221,7 @@ void __cfg80211_scan_done(struct work_struct *wk) scan_done_wk); rtnl_lock(); - ___cfg80211_scan_done(rdev, false); + ___cfg80211_scan_done(rdev); rtnl_unlock(); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 65f8008..d3c5bd7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -632,6 +632,16 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } #endif + if (!bss && (status == WLAN_STATUS_SUCCESS)) { + WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_ESS, + WLAN_CAPABILITY_ESS); + if (bss) + cfg80211_hold_bss(bss_from_pub(bss)); + } + if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); @@ -649,16 +659,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, return; } - if (!bss) { - WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); - bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, - wdev->ssid, wdev->ssid_len, - WLAN_CAPABILITY_ESS, - WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) - return; - cfg80211_hold_bss(bss_from_pub(bss)); - } + if (WARN_ON(!bss)) + return; wdev->current_bss = bss_from_pub(bss); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ba5f0d6..f7aa7a7 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1653,9 +1653,8 @@ TRACE_EVENT(rdev_cancel_remain_on_channel, TRACE_EVENT(rdev_mgmt_tx, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, bool no_cck, bool dont_wait_for_ack), - TP_ARGS(wiphy, wdev, chan, offchan, wait, no_cck, dont_wait_for_ack), + struct cfg80211_mgmt_tx_params *params), + TP_ARGS(wiphy, wdev, params), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY @@ -1668,11 +1667,11 @@ TRACE_EVENT(rdev_mgmt_tx, TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; - CHAN_ASSIGN(chan); - __entry->offchan = offchan; - __entry->wait = wait; - __entry->no_cck = no_cck; - __entry->dont_wait_for_ack = dont_wait_for_ack; + CHAN_ASSIGN(params->chan); + __entry->offchan = params->offchan; + __entry->wait = params->wait; + __entry->no_cck = params->no_cck; + __entry->dont_wait_for_ack = params->dont_wait_for_ack; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", " CHAN_PR_FMT ", offchan: %s," " wait: %u, no cck: %s, dont wait for ack: %s", diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 7622789..c8a8297 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -35,6 +35,8 @@ * response */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> @@ -1809,7 +1811,7 @@ static int __init x25_init(void) if (rc != 0) goto out_sock; - printk(KERN_INFO "X.25 for Linux Version 0.2\n"); + pr_info("Linux Version 0.2\n"); x25_register_sysctl(); rc = x25_proc_init(); diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index a8a2363..3923123 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -17,6 +17,8 @@ * 2000-09-04 Henner Eisen Prevent freeing a dangling skb. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> @@ -89,7 +91,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) */ if (frametype != X25_CLEAR_CONFIRMATION) - printk(KERN_DEBUG "x25_receive_data(): unknown frame type %2x\n",frametype); + pr_debug("x25_receive_data(): unknown frame type %2x\n",frametype); return 0; } @@ -114,7 +116,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, */ nb = x25_get_neigh(dev); if (!nb) { - printk(KERN_DEBUG "X.25: unknown neighbour - %s\n", dev->name); + pr_debug("unknown neighbour - %s\n", dev->name); goto drop; } @@ -154,7 +156,7 @@ void x25_establish_link(struct x25_neigh *nb) switch (nb->dev->type) { case ARPHRD_X25: if ((skb = alloc_skb(1, GFP_ATOMIC)) == NULL) { - printk(KERN_ERR "x25_dev: out of memory\n"); + pr_err("x25_dev: out of memory\n"); return; } ptr = skb_put(skb, 1); @@ -189,7 +191,7 @@ void x25_terminate_link(struct x25_neigh *nb) skb = alloc_skb(1, GFP_ATOMIC); if (!skb) { - printk(KERN_ERR "x25_dev: out of memory\n"); + pr_err("x25_dev: out of memory\n"); return; } diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index b825325..7ecd04c 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -21,6 +21,8 @@ * on response. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/kernel.h> #include <linux/string.h> #include <linux/skbuff.h> @@ -109,7 +111,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, case X25_MARKER: break; default: - printk(KERN_DEBUG "X.25: unknown facility " + pr_debug("unknown facility " "%02X, value %02X\n", p[0], p[1]); break; @@ -132,7 +134,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, *vc_fac_mask |= X25_MASK_WINDOW_SIZE; break; default: - printk(KERN_DEBUG "X.25: unknown facility " + pr_debug("unknown facility " "%02X, values %02X, %02X\n", p[0], p[1], p[2]); break; @@ -143,7 +145,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, case X25_FAC_CLASS_C: if (len < 4) return -1; - printk(KERN_DEBUG "X.25: unknown facility %02X, " + pr_debug("unknown facility %02X, " "values %02X, %02X, %02X\n", p[0], p[1], p[2], p[3]); p += 4; @@ -172,7 +174,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, *vc_fac_mask |= X25_MASK_CALLED_AE; break; default: - printk(KERN_DEBUG "X.25: unknown facility %02X," + pr_debug("unknown facility %02X," "length %d\n", p[0], p[1]); break; } @@ -341,12 +343,12 @@ void x25_limit_facilities(struct x25_facilities *facilities, if (!nb->extended) { if (facilities->winsize_in > 7) { - printk(KERN_DEBUG "X.25: incoming winsize limited to 7\n"); + pr_debug("incoming winsize limited to 7\n"); facilities->winsize_in = 7; } if (facilities->winsize_out > 7) { facilities->winsize_out = 7; - printk( KERN_DEBUG "X.25: outgoing winsize limited to 7\n"); + pr_debug("outgoing winsize limited to 7\n"); } } } diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c index c541b62..cf561f1 100644 --- a/net/x25/x25_forward.c +++ b/net/x25/x25_forward.c @@ -8,6 +8,9 @@ * History * 03-01-2007 Added forwarding for x.25 Andrew Hendry */ + +#define pr_fmt(fmt) "X25: " fmt + #include <linux/if_arp.h> #include <linux/init.h> #include <linux/slab.h> @@ -51,7 +54,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, list_for_each(entry, &x25_forward_list) { x25_frwd = list_entry(entry, struct x25_forward, node); if (x25_frwd->lci == lci) { - printk(KERN_WARNING "X.25: call request for lci which is already registered!, transmitting but not registering new pair\n"); + pr_warn("call request for lci which is already registered!, transmitting but not registering new pair\n"); same_lci = 1; } } diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index a49cd4e..d1b0dc7 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -23,6 +23,8 @@ * i-frames. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/slab.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -317,7 +319,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; default: - printk(KERN_WARNING "x25: unknown %02X in state 3\n", frametype); + pr_warn("unknown %02X in state 3\n", frametype); break; } diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 4acacf3..fd5ffb2 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -21,6 +21,8 @@ * 2000-09-04 Henner Eisen dev_hold() / dev_put() for x25_neigh. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> @@ -93,13 +95,13 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 4)) break; - printk(KERN_WARNING "x25: diagnostic #%d - %02X %02X %02X\n", + pr_warn("diagnostic #%d - %02X %02X %02X\n", skb->data[3], skb->data[4], skb->data[5], skb->data[6]); break; default: - printk(KERN_WARNING "x25: received unknown %02X with LCI 000\n", + pr_warn("received unknown %02X with LCI 000\n", frametype); break; } diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 5170d52..6b5af65 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -23,6 +23,8 @@ * restriction on response. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/slab.h> #include <linux/kernel.h> #include <linux/string.h> @@ -148,7 +150,7 @@ void x25_write_internal(struct sock *sk, int frametype) case X25_RESET_CONFIRMATION: break; default: - printk(KERN_ERR "X.25: invalid frame type %02X\n", frametype); + pr_err("invalid frame type %02X\n", frametype); return; } @@ -338,7 +340,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, } } - printk(KERN_DEBUG "X.25: invalid PLP frame %02X %02X %02X\n", + pr_debug("invalid PLP frame %02X %02X %02X\n", frame[0], frame[1], frame[2]); return X25_ILLEGAL; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9a91f74..a7487f3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,12 +39,7 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 -DEFINE_MUTEX(xfrm_cfg_mutex); -EXPORT_SYMBOL(xfrm_cfg_mutex); - -static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); static struct dst_entry *xfrm_policy_sk_bundles; -static DEFINE_RWLOCK(xfrm_policy_lock); static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] @@ -438,7 +433,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) if (!ndst) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); @@ -446,7 +441,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) net->xfrm.policy_bydst[dir].table = ndst; net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } @@ -463,7 +458,7 @@ static void xfrm_byidx_resize(struct net *net, int total) if (!nidx) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -471,7 +466,7 @@ static void xfrm_byidx_resize(struct net *net, int total) net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } @@ -504,7 +499,7 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; @@ -513,7 +508,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); @@ -538,7 +533,7 @@ static void xfrm_hash_resize(struct work_struct *work) /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(struct net *net, int dir) +static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { static u32 idx_generator; @@ -548,8 +543,14 @@ static u32 xfrm_gen_index(struct net *net, int dir) u32 idx; int found; - idx = (idx_generator | dir); - idx_generator += 8; + if (!index) { + idx = (idx_generator | dir); + idx_generator += 8; + } else { + idx = index; + index = 0; + } + if (idx == 0) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); @@ -630,7 +631,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct hlist_head *chain; struct hlist_node *newpos; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; @@ -641,7 +642,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -672,14 +673,14 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); } - policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); list_add(&policy->walk.all, &net->xfrm.policy_all); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); @@ -699,7 +700,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, struct hlist_head *chain; *err = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -712,7 +713,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -721,7 +722,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -740,7 +741,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, return NULL; *err = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { @@ -751,7 +752,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -760,7 +761,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -823,7 +824,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0, cnt = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, audit_info); if (err) @@ -839,7 +840,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, @@ -848,7 +849,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) xfrm_policy_kill(pol); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } @@ -860,7 +861,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, @@ -869,7 +870,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) audit_info->secid); xfrm_policy_kill(pol); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } @@ -878,7 +879,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) if (!cnt) err = -ESRCH; out: - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -898,7 +899,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else @@ -924,7 +925,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, } list_del_init(&walk->walk.all); out: - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -938,14 +939,14 @@ void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) } EXPORT_SYMBOL(xfrm_policy_walk_init); -void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) { if (list_empty(&walk->walk.all)) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -990,7 +991,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -1026,7 +1027,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (ret) xfrm_pol_hold(ret); fail: - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } @@ -1103,8 +1104,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, const struct flowi *fl) { struct xfrm_policy *pol; + struct net *net = sock_net(sk); - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); if ((pol = sk->sk_policy[dir]) != NULL) { bool match = xfrm_selector_match(&pol->selector, fl, sk->sk_family); @@ -1128,7 +1130,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, pol = NULL; } out: - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } @@ -1166,9 +1168,11 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { - write_lock_bh(&xfrm_policy_lock); + struct net *net = xp_net(pol); + + write_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; @@ -1187,12 +1191,12 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return -EINVAL; #endif - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) { @@ -1204,7 +1208,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1215,6 +1219,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); + struct net *net = xp_net(old); if (newp) { newp->selector = old->selector; @@ -1233,9 +1238,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -1896,8 +1901,7 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, if (IS_ERR(xdst)) return xdst; - if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || - (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) + if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) return xdst; dst1 = &xdst->u.dst; @@ -2072,7 +2076,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); int i, err, num_pols, num_xfrms = 0, drop_pols = 0; -restart: dst = NULL; xdst = NULL; route = NULL; @@ -2106,10 +2109,10 @@ restart: dst_hold(&xdst->u.dst); - spin_lock_bh(&xfrm_policy_sk_bundle_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); xdst->u.dst.next = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = &xdst->u.dst; - spin_unlock_bh(&xfrm_policy_sk_bundle_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); route = xdst->route; } @@ -2152,23 +2155,8 @@ restart: return make_blackhole(net, family, dst_orig); } - if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&net->xfrm.km_waitq, &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_current_state(TASK_RUNNING); - remove_wait_queue(&net->xfrm.km_waitq, &wait); - if (!signal_pending(current)) { - dst_release(dst); - goto restart; - } - - err = -ERESTART; - } else - err = -EAGAIN; + err = -EAGAIN; XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; @@ -2434,7 +2422,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_nr = ti; if (npols > 1) { - xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); tpp = stp; } @@ -2563,10 +2551,10 @@ static void __xfrm_garbage_collect(struct net *net) { struct dst_entry *head, *next; - spin_lock_bh(&xfrm_policy_sk_bundle_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); head = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = NULL; - spin_unlock_bh(&xfrm_policy_sk_bundle_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); while (head) { next = head->next; @@ -2950,6 +2938,13 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; + + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + return 0; out_sysctl: @@ -3070,14 +3065,14 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, } static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel, - u8 dir, u8 type) + u8 dir, u8 type, struct net *net) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); + read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { @@ -3086,7 +3081,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector break; } } - chain = &init_net.xfrm.policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && @@ -3099,7 +3094,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector if (ret) xfrm_pol_hold(ret); - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } @@ -3210,7 +3205,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) + struct xfrm_kmaddress *k, struct net *net) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -3223,14 +3218,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, goto out; /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp))) { + if ((x = xfrm_migrate_state_find(mp, net))) { x_cur[nx_cur] = x; nx_cur++; if ((xc = xfrm_state_migrate(x, mp))) { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 68c2f357..a62c25e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -35,8 +35,6 @@ destination/tunnel endpoint. (output) */ -static DEFINE_SPINLOCK(xfrm_state_lock); - static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static inline unsigned int xfrm_dst_hash(struct net *net, @@ -127,7 +125,7 @@ static void xfrm_hash_resize(struct work_struct *work) goto out_unlock; } - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; for (i = net->xfrm.state_hmask; i >= 0; i--) @@ -144,7 +142,7 @@ static void xfrm_hash_resize(struct work_struct *work) net->xfrm.state_byspi = nspi; net->xfrm.state_hmask = nhashmask; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); xfrm_hash_free(odst, osize); @@ -374,8 +372,6 @@ static void xfrm_state_gc_task(struct work_struct *work) hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - - wake_up(&net->xfrm.km_waitq); } static inline unsigned long make_jiffies(long secs) @@ -390,7 +386,6 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) { struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); - struct net *net = xs_net(x); unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -460,12 +455,8 @@ resched: goto out; expired: - if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { + if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) x->km.state = XFRM_STATE_EXPIRED; - wake_up(&net->xfrm.km_waitq); - next = 2; - goto resched; - } err = __xfrm_state_delete(x); if (!err) @@ -535,14 +526,14 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; - spin_lock(&xfrm_state_lock); + spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) hlist_del(&x->byspi); net->xfrm.state_num--; - spin_unlock(&xfrm_state_lock); + spin_unlock(&net->xfrm.xfrm_state_lock); /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -603,7 +594,7 @@ int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0, cnt = 0; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); err = xfrm_state_flush_secctx_check(net, proto, audit_info); if (err) goto out; @@ -616,7 +607,7 @@ restart: if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, @@ -627,7 +618,7 @@ restart: if (!err) cnt++; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); goto restart; } } @@ -636,19 +627,18 @@ restart: err = 0; out: - spin_unlock_bh(&xfrm_state_lock); - wake_up(&net->xfrm.km_waitq); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_flush); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); si->sadcnt = net->xfrm.state_num; si->sadhcnt = net->xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_sad_getinfo); @@ -801,7 +791,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, to_put = NULL; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == encap_family && @@ -886,7 +876,7 @@ out: xfrm_state_hold(x); else *err = acquire_in_progress ? -EAGAIN : error; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); return x; @@ -900,7 +890,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, unsigned int h; struct xfrm_state *rx = NULL, *x = NULL; - spin_lock(&xfrm_state_lock); + spin_lock(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && @@ -918,7 +908,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, if (rx) xfrm_state_hold(rx); - spin_unlock(&xfrm_state_lock); + spin_unlock(&net->xfrm.xfrm_state_lock); return rx; @@ -950,14 +940,12 @@ static void __xfrm_state_insert(struct xfrm_state *x) if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); - wake_up(&net->xfrm.km_waitq); - net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); } -/* xfrm_state_lock is held */ +/* net->xfrm.xfrm_state_lock is held */ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) { struct net *net = xs_net(xnew); @@ -980,14 +968,16 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) void xfrm_state_insert(struct xfrm_state *x) { - spin_lock_bh(&xfrm_state_lock); + struct net *net = xs_net(x); + + spin_lock_bh(&net->xfrm.xfrm_state_lock); __xfrm_state_bump_genids(x); __xfrm_state_insert(x); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_insert); -/* xfrm_state_lock is held */ +/* net->xfrm.xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(struct net *net, const struct xfrm_mark *m, unsigned short family, u8 mode, @@ -1079,7 +1069,7 @@ int xfrm_state_add(struct xfrm_state *x) to_put = NULL; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { @@ -1108,7 +1098,7 @@ int xfrm_state_add(struct xfrm_state *x) err = 0; out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (x1) { xfrm_state_delete(x1); @@ -1203,16 +1193,16 @@ out: return NULL; } -/* xfrm_state_lock is held */ -struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) +/* net->xfrm.xfrm_state_lock is held */ +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) { unsigned int h; struct xfrm_state *x; if (m->reqid) { - h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, + h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, init_net.xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1227,9 +1217,9 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) return x; } } else { - h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, + h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, init_net.xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1283,10 +1273,11 @@ int xfrm_state_update(struct xfrm_state *x) struct xfrm_state *x1, *to_put; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); + struct net *net = xs_net(x); to_put = NULL; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; @@ -1306,7 +1297,7 @@ int xfrm_state_update(struct xfrm_state *x) err = 0; out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); @@ -1377,9 +1368,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); @@ -1391,9 +1382,9 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); @@ -1405,9 +1396,9 @@ xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } @@ -1416,17 +1407,17 @@ EXPORT_SYMBOL(xfrm_find_acq); #ifdef CONFIG_XFRM_SUB_POLICY int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family) + unsigned short family, struct net *net) { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/ if (afinfo->tmpl_sort) err = afinfo->tmpl_sort(dst, src, n); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); return err; } @@ -1438,13 +1429,15 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + struct net *net = xs_net(*dst); + if (!afinfo) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (afinfo->state_sort) err = afinfo->state_sort(dst, src, n); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); return err; } @@ -1476,9 +1469,9 @@ struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __xfrm_find_acq_byseq(net, mark, seq); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_find_acq_byseq); @@ -1496,6 +1489,30 @@ u32 xfrm_get_acqseq(void) } EXPORT_SYMBOL(xfrm_get_acqseq); +int verify_spi_info(u8 proto, u32 min, u32 max) +{ + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + break; + + case IPPROTO_COMP: + /* IPCOMP spi is 16-bits. */ + if (max >= 0x10000) + return -EINVAL; + break; + + default: + return -EINVAL; + } + + if (min > max) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(verify_spi_info); + int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { struct net *net = xs_net(x); @@ -1536,10 +1553,10 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) } } if (x->id.spi) { - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; } @@ -1562,7 +1579,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, if (walk->seq != 0 && list_empty(&walk->all)) return 0; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (list_empty(&walk->all)) x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); else @@ -1586,7 +1603,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, } list_del_init(&walk->all); out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_walk); @@ -1600,14 +1617,14 @@ void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) } EXPORT_SYMBOL(xfrm_state_walk_init); -void xfrm_state_walk_done(struct xfrm_state_walk *walk) +void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) { if (list_empty(&walk->all)) return; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); list_del(&walk->all); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_walk_done); @@ -1655,16 +1672,12 @@ EXPORT_SYMBOL(km_state_notify); void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { - struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); - - if (hard) - wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_state_expired); @@ -1707,16 +1720,12 @@ EXPORT_SYMBOL(km_new_mapping); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { - struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); - - if (hard) - wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_policy_expired); @@ -2025,7 +2034,7 @@ int __net_init xfrm_state_init(struct net *net) INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); INIT_HLIST_HEAD(&net->xfrm.state_gc_list); INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); - init_waitqueue_head(&net->xfrm.km_waitq); + spin_lock_init(&net->xfrm.xfrm_state_lock); return 0; out_byspi: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f964d4c..97681a3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -181,7 +181,9 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP] || - attrs[XFRMA_TFCPAD]) + attrs[XFRMA_TFCPAD] || + (ntohl(p->id.spi) >= 0x10000)) + goto out; break; @@ -877,7 +879,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) static int xfrm_dump_sa_done(struct netlink_callback *cb) { struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; - xfrm_state_walk_done(walk); + struct sock *sk = cb->skb->sk; + struct net *net = sock_net(sk); + + xfrm_state_walk_done(walk, net); return 0; } @@ -1074,29 +1079,6 @@ out_noput: return err; } -static int verify_userspi_info(struct xfrm_userspi_info *p) -{ - switch (p->info.id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - break; - - case IPPROTO_COMP: - /* IPCOMP spi is 16-bits. */ - if (p->max >= 0x10000) - return -EINVAL; - break; - - default: - return -EINVAL; - } - - if (p->min > p->max) - return -EINVAL; - - return 0; -} - static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { @@ -1111,7 +1093,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_mark m; p = nlmsg_data(nlh); - err = verify_userspi_info(p); + err = verify_spi_info(p->info.id.proto, p->min, p->max); if (err) goto out_noput; @@ -1189,6 +1171,8 @@ static int verify_policy_type(u8 type) static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { + int ret; + switch (p->share) { case XFRM_SHARE_ANY: case XFRM_SHARE_SESSION: @@ -1224,7 +1208,13 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) return -EINVAL; } - return verify_policy_dir(p->dir); + ret = verify_policy_dir(p->dir); + if (ret) + return ret; + if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + return -EINVAL; + + return 0; } static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) @@ -1547,8 +1537,9 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr static int xfrm_dump_policy_done(struct netlink_callback *cb) { struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct net *net = sock_net(cb->skb->sk); - xfrm_policy_walk_done(walk); + xfrm_policy_walk_done(walk, net); return 0; } @@ -2129,6 +2120,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, u8 type; int err; int n = 0; + struct net *net = sock_net(skb->sk); if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2146,7 +2138,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net); return 0; } @@ -2394,9 +2386,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) static void xfrm_netlink_rcv(struct sk_buff *skb) { - mutex_lock(&xfrm_cfg_mutex); + struct net *net = sock_net(skb->sk); + + mutex_lock(&net->xfrm.xfrm_cfg_mutex); netlink_rcv_skb(skb, &xfrm_user_rcv_msg); - mutex_unlock(&xfrm_cfg_mutex); + mutex_unlock(&net->xfrm.xfrm_cfg_mutex); } static inline size_t xfrm_expire_msgsize(void) |